]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/netinet6/ip6_output.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / netinet6 / ip6_output.c
1 /*-
2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the project nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
30  */
31
32 /*-
33  * Copyright (c) 1982, 1986, 1988, 1990, 1993
34  *      The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61  */
62
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68 #include "opt_ipsec.h"
69 #include "opt_sctp.h"
70
71 #include <sys/param.h>
72 #include <sys/kernel.h>
73 #include <sys/malloc.h>
74 #include <sys/mbuf.h>
75 #include <sys/errno.h>
76 #include <sys/priv.h>
77 #include <sys/proc.h>
78 #include <sys/protosw.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #include <sys/syslog.h>
82 #include <sys/ucred.h>
83
84 #include <net/if.h>
85 #include <net/netisr.h>
86 #include <net/route.h>
87 #include <net/pfil.h>
88 #include <net/vnet.h>
89
90 #include <netinet/in.h>
91 #include <netinet/in_var.h>
92 #include <netinet6/in6_var.h>
93 #include <netinet/ip6.h>
94 #include <netinet/icmp6.h>
95 #include <netinet6/ip6_var.h>
96 #include <netinet/in_pcb.h>
97 #include <netinet/tcp_var.h>
98 #include <netinet6/nd6.h>
99
100 #ifdef IPSEC
101 #include <netipsec/ipsec.h>
102 #include <netipsec/ipsec6.h>
103 #include <netipsec/key.h>
104 #include <netinet6/ip6_ipsec.h>
105 #endif /* IPSEC */
106 #ifdef SCTP
107 #include <netinet/sctp.h>
108 #include <netinet/sctp_crc32.h>
109 #endif
110
111 #include <netinet6/ip6protosw.h>
112 #include <netinet6/scope6_var.h>
113
114 extern int in6_mcast_loop;
115
116 struct ip6_exthdrs {
117         struct mbuf *ip6e_ip6;
118         struct mbuf *ip6e_hbh;
119         struct mbuf *ip6e_dest1;
120         struct mbuf *ip6e_rthdr;
121         struct mbuf *ip6e_dest2;
122 };
123
124 static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
125                            struct ucred *, int));
126 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
127         struct socket *, struct sockopt *));
128 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
129 static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *,
130         struct ucred *, int, int, int));
131
132 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
133 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
134         struct ip6_frag **));
135 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
136 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
137 static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
138         struct ifnet *, struct in6_addr *, u_long *, int *));
139 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
140
141
142 /*
143  * Make an extension header from option data.  hp is the source, and
144  * mp is the destination.
145  */
146 #define MAKE_EXTHDR(hp, mp)                                             \
147     do {                                                                \
148         if (hp) {                                                       \
149                 struct ip6_ext *eh = (struct ip6_ext *)(hp);            \
150                 error = ip6_copyexthdr((mp), (caddr_t)(hp),             \
151                     ((eh)->ip6e_len + 1) << 3);                         \
152                 if (error)                                              \
153                         goto freehdrs;                                  \
154         }                                                               \
155     } while (/*CONSTCOND*/ 0)
156
157 /*
158  * Form a chain of extension headers.
159  * m is the extension header mbuf
160  * mp is the previous mbuf in the chain
161  * p is the next header
162  * i is the type of option.
163  */
164 #define MAKE_CHAIN(m, mp, p, i)\
165     do {\
166         if (m) {\
167                 if (!hdrsplit) \
168                         panic("assumption failed: hdr not split"); \
169                 *mtod((m), u_char *) = *(p);\
170                 *(p) = (i);\
171                 p = mtod((m), u_char *);\
172                 (m)->m_next = (mp)->m_next;\
173                 (mp)->m_next = (m);\
174                 (mp) = (m);\
175         }\
176     } while (/*CONSTCOND*/ 0)
177
178 /*
179  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
180  * header (with pri, len, nxt, hlim, src, dst).
181  * This function may modify ver and hlim only.
182  * The mbuf chain containing the packet will be freed.
183  * The mbuf opt, if present, will not be freed.
184  *
185  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
186  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
187  * which is rt_rmx.rmx_mtu.
188  *
189  * ifpp - XXX: just for statistics
190  */
191 int
192 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
193     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
194     struct ifnet **ifpp, struct inpcb *inp)
195 {
196         struct ip6_hdr *ip6, *mhip6;
197         struct ifnet *ifp, *origifp;
198         struct mbuf *m = m0;
199         struct mbuf *mprev = NULL;
200         int hlen, tlen, len, off;
201         struct route_in6 ip6route;
202         struct rtentry *rt = NULL;
203         struct sockaddr_in6 *dst, src_sa, dst_sa;
204         struct in6_addr odst;
205         int error = 0;
206         struct in6_ifaddr *ia = NULL;
207         u_long mtu;
208         int alwaysfrag, dontfrag;
209         u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
210         struct ip6_exthdrs exthdrs;
211         struct in6_addr finaldst, src0, dst0;
212         u_int32_t zone;
213         struct route_in6 *ro_pmtu = NULL;
214         int hdrsplit = 0;
215         int needipsec = 0;
216 #ifdef SCTP
217         int sw_csum;
218 #endif
219 #ifdef IPSEC
220         struct ipsec_output_state state;
221         struct ip6_rthdr *rh = NULL;
222         int needipsectun = 0;
223         int segleft_org = 0;
224         struct secpolicy *sp = NULL;
225 #endif /* IPSEC */
226
227         ip6 = mtod(m, struct ip6_hdr *);
228         if (ip6 == NULL) {
229                 printf ("ip6 is NULL");
230                 goto bad;
231         }
232
233         finaldst = ip6->ip6_dst;
234
235         bzero(&exthdrs, sizeof(exthdrs));
236
237         if (opt) {
238                 /* Hop-by-Hop options header */
239                 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
240                 /* Destination options header(1st part) */
241                 if (opt->ip6po_rthdr) {
242                         /*
243                          * Destination options header(1st part)
244                          * This only makes sense with a routing header.
245                          * See Section 9.2 of RFC 3542.
246                          * Disabling this part just for MIP6 convenience is
247                          * a bad idea.  We need to think carefully about a
248                          * way to make the advanced API coexist with MIP6
249                          * options, which might automatically be inserted in
250                          * the kernel.
251                          */
252                         MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
253                 }
254                 /* Routing header */
255                 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
256                 /* Destination options header(2nd part) */
257                 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
258         }
259
260         /*
261          * IPSec checking which handles several cases.
262          * FAST IPSEC: We re-injected the packet.
263          */
264 #ifdef IPSEC
265         switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
266         {
267         case 1:                 /* Bad packet */
268                 goto freehdrs;
269         case -1:                /* Do IPSec */
270                 needipsec = 1;
271         case 0:                 /* No IPSec */
272         default:
273                 break;
274         }
275 #endif /* IPSEC */
276
277         /*
278          * Calculate the total length of the extension header chain.
279          * Keep the length of the unfragmentable part for fragmentation.
280          */
281         optlen = 0;
282         if (exthdrs.ip6e_hbh)
283                 optlen += exthdrs.ip6e_hbh->m_len;
284         if (exthdrs.ip6e_dest1)
285                 optlen += exthdrs.ip6e_dest1->m_len;
286         if (exthdrs.ip6e_rthdr)
287                 optlen += exthdrs.ip6e_rthdr->m_len;
288         unfragpartlen = optlen + sizeof(struct ip6_hdr);
289
290         /* NOTE: we don't add AH/ESP length here. do that later. */
291         if (exthdrs.ip6e_dest2)
292                 optlen += exthdrs.ip6e_dest2->m_len;
293
294         /*
295          * If we need IPsec, or there is at least one extension header,
296          * separate IP6 header from the payload.
297          */
298         if ((needipsec || optlen) && !hdrsplit) {
299                 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
300                         m = NULL;
301                         goto freehdrs;
302                 }
303                 m = exthdrs.ip6e_ip6;
304                 hdrsplit++;
305         }
306
307         /* adjust pointer */
308         ip6 = mtod(m, struct ip6_hdr *);
309
310         /* adjust mbuf packet header length */
311         m->m_pkthdr.len += optlen;
312         plen = m->m_pkthdr.len - sizeof(*ip6);
313
314         /* If this is a jumbo payload, insert a jumbo payload option. */
315         if (plen > IPV6_MAXPACKET) {
316                 if (!hdrsplit) {
317                         if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
318                                 m = NULL;
319                                 goto freehdrs;
320                         }
321                         m = exthdrs.ip6e_ip6;
322                         hdrsplit++;
323                 }
324                 /* adjust pointer */
325                 ip6 = mtod(m, struct ip6_hdr *);
326                 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
327                         goto freehdrs;
328                 ip6->ip6_plen = 0;
329         } else
330                 ip6->ip6_plen = htons(plen);
331
332         /*
333          * Concatenate headers and fill in next header fields.
334          * Here we have, on "m"
335          *      IPv6 payload
336          * and we insert headers accordingly.  Finally, we should be getting:
337          *      IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
338          *
339          * during the header composing process, "m" points to IPv6 header.
340          * "mprev" points to an extension header prior to esp.
341          */
342         u_char *nexthdrp = &ip6->ip6_nxt;
343         mprev = m;
344
345         /*
346          * we treat dest2 specially.  this makes IPsec processing
347          * much easier.  the goal here is to make mprev point the
348          * mbuf prior to dest2.
349          *
350          * result: IPv6 dest2 payload
351          * m and mprev will point to IPv6 header.
352          */
353         if (exthdrs.ip6e_dest2) {
354                 if (!hdrsplit)
355                         panic("assumption failed: hdr not split");
356                 exthdrs.ip6e_dest2->m_next = m->m_next;
357                 m->m_next = exthdrs.ip6e_dest2;
358                 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
359                 ip6->ip6_nxt = IPPROTO_DSTOPTS;
360         }
361
362         /*
363          * result: IPv6 hbh dest1 rthdr dest2 payload
364          * m will point to IPv6 header.  mprev will point to the
365          * extension header prior to dest2 (rthdr in the above case).
366          */
367         MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
368         MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
369                    IPPROTO_DSTOPTS);
370         MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
371                    IPPROTO_ROUTING);
372
373 #ifdef IPSEC
374         if (!needipsec)
375                 goto skip_ipsec2;
376
377         /*
378          * pointers after IPsec headers are not valid any more.
379          * other pointers need a great care too.
380          * (IPsec routines should not mangle mbufs prior to AH/ESP)
381          */
382         exthdrs.ip6e_dest2 = NULL;
383
384         if (exthdrs.ip6e_rthdr) {
385                 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
386                 segleft_org = rh->ip6r_segleft;
387                 rh->ip6r_segleft = 0;
388         }
389
390         bzero(&state, sizeof(state));
391         state.m = m;
392         error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
393                                     &needipsectun);
394         m = state.m;
395         if (error == EJUSTRETURN) {
396                 /*
397                  * We had a SP with a level of 'use' and no SA. We
398                  * will just continue to process the packet without
399                  * IPsec processing.
400                  */
401                 ;
402         } else if (error) {
403                 /* mbuf is already reclaimed in ipsec6_output_trans. */
404                 m = NULL;
405                 switch (error) {
406                 case EHOSTUNREACH:
407                 case ENETUNREACH:
408                 case EMSGSIZE:
409                 case ENOBUFS:
410                 case ENOMEM:
411                         break;
412                 default:
413                         printf("[%s:%d] (ipsec): error code %d\n",
414                             __func__, __LINE__, error);
415                         /* FALLTHROUGH */
416                 case ENOENT:
417                         /* don't show these error codes to the user */
418                         error = 0;
419                         break;
420                 }
421                 goto bad;
422         } else if (!needipsectun) {
423                 /*
424                  * In the FAST IPSec case we have already
425                  * re-injected the packet and it has been freed
426                  * by the ipsec_done() function.  So, just clean
427                  * up after ourselves.
428                  */
429                 m = NULL;
430                 goto done;
431         }
432         if (exthdrs.ip6e_rthdr) {
433                 /* ah6_output doesn't modify mbuf chain */
434                 rh->ip6r_segleft = segleft_org;
435         }
436 skip_ipsec2:;
437 #endif /* IPSEC */
438
439         /*
440          * If there is a routing header, discard the packet.
441          */
442         if (exthdrs.ip6e_rthdr) {
443                  error = EINVAL;
444                  goto bad;
445         }
446
447         /* Source address validation */
448         if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
449             (flags & IPV6_UNSPECSRC) == 0) {
450                 error = EOPNOTSUPP;
451                 V_ip6stat.ip6s_badscope++;
452                 goto bad;
453         }
454         if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
455                 error = EOPNOTSUPP;
456                 V_ip6stat.ip6s_badscope++;
457                 goto bad;
458         }
459
460         V_ip6stat.ip6s_localout++;
461
462         /*
463          * Route packet.
464          */
465         if (ro == 0) {
466                 ro = &ip6route;
467                 bzero((caddr_t)ro, sizeof(*ro));
468         }
469         ro_pmtu = ro;
470         if (opt && opt->ip6po_rthdr)
471                 ro = &opt->ip6po_route;
472         dst = (struct sockaddr_in6 *)&ro->ro_dst;
473
474 again:
475         /*
476          * if specified, try to fill in the traffic class field.
477          * do not override if a non-zero value is already set.
478          * we check the diffserv field and the ecn field separately.
479          */
480         if (opt && opt->ip6po_tclass >= 0) {
481                 int mask = 0;
482
483                 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
484                         mask |= 0xfc;
485                 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
486                         mask |= 0x03;
487                 if (mask != 0)
488                         ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
489         }
490
491         /* fill in or override the hop limit field, if necessary. */
492         if (opt && opt->ip6po_hlim != -1)
493                 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
494         else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
495                 if (im6o != NULL)
496                         ip6->ip6_hlim = im6o->im6o_multicast_hlim;
497                 else
498                         ip6->ip6_hlim = V_ip6_defmcasthlim;
499         }
500
501 #ifdef IPSEC
502         /*
503          * We may re-inject packets into the stack here.
504          */
505         if (needipsec && needipsectun) {
506                 struct ipsec_output_state state;
507
508                 /*
509                  * All the extension headers will become inaccessible
510                  * (since they can be encrypted).
511                  * Don't panic, we need no more updates to extension headers
512                  * on inner IPv6 packet (since they are now encapsulated).
513                  *
514                  * IPv6 [ESP|AH] IPv6 [extension headers] payload
515                  */
516                 bzero(&exthdrs, sizeof(exthdrs));
517                 exthdrs.ip6e_ip6 = m;
518
519                 bzero(&state, sizeof(state));
520                 state.m = m;
521                 state.ro = (struct route *)ro;
522                 state.dst = (struct sockaddr *)dst;
523
524                 error = ipsec6_output_tunnel(&state, sp, flags);
525
526                 m = state.m;
527                 ro = (struct route_in6 *)state.ro;
528                 dst = (struct sockaddr_in6 *)state.dst;
529                 if (error == EJUSTRETURN) {
530                         /*
531                          * We had a SP with a level of 'use' and no SA. We
532                          * will just continue to process the packet without
533                          * IPsec processing.
534                          */
535                         ;
536                 } else if (error) {
537                         /* mbuf is already reclaimed in ipsec6_output_tunnel. */
538                         m0 = m = NULL;
539                         m = NULL;
540                         switch (error) {
541                         case EHOSTUNREACH:
542                         case ENETUNREACH:
543                         case EMSGSIZE:
544                         case ENOBUFS:
545                         case ENOMEM:
546                                 break;
547                         default:
548                                 printf("[%s:%d] (ipsec): error code %d\n",
549                                     __func__, __LINE__, error);
550                                 /* FALLTHROUGH */
551                         case ENOENT:
552                                 /* don't show these error codes to the user */
553                                 error = 0;
554                                 break;
555                         }
556                         goto bad;
557                 } else {
558                         /*
559                          * In the FAST IPSec case we have already
560                          * re-injected the packet and it has been freed
561                          * by the ipsec_done() function.  So, just clean
562                          * up after ourselves.
563                          */
564                         m = NULL;
565                         goto done;
566                 }
567
568                 exthdrs.ip6e_ip6 = m;
569         }
570 #endif /* IPSEC */
571
572         /* adjust pointer */
573         ip6 = mtod(m, struct ip6_hdr *);
574
575         bzero(&dst_sa, sizeof(dst_sa));
576         dst_sa.sin6_family = AF_INET6;
577         dst_sa.sin6_len = sizeof(dst_sa);
578         dst_sa.sin6_addr = ip6->ip6_dst;
579         if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
580             &ifp, &rt)) != 0) {
581                 switch (error) {
582                 case EHOSTUNREACH:
583                         V_ip6stat.ip6s_noroute++;
584                         break;
585                 case EADDRNOTAVAIL:
586                 default:
587                         break; /* XXX statistics? */
588                 }
589                 if (ifp != NULL)
590                         in6_ifstat_inc(ifp, ifs6_out_discard);
591                 goto bad;
592         }
593         if (rt == NULL) {
594                 /*
595                  * If in6_selectroute() does not return a route entry,
596                  * dst may not have been updated.
597                  */
598                 *dst = dst_sa;  /* XXX */
599         }
600
601         /*
602          * then rt (for unicast) and ifp must be non-NULL valid values.
603          */
604         if ((flags & IPV6_FORWARDING) == 0) {
605                 /* XXX: the FORWARDING flag can be set for mrouting. */
606                 in6_ifstat_inc(ifp, ifs6_out_request);
607         }
608         if (rt != NULL) {
609                 ia = (struct in6_ifaddr *)(rt->rt_ifa);
610                 rt->rt_use++;
611         }
612
613
614         /*
615          * The outgoing interface must be in the zone of source and
616          * destination addresses.  
617          */
618         origifp = ifp;
619
620         src0 = ip6->ip6_src;
621         if (in6_setscope(&src0, origifp, &zone))
622                 goto badscope;
623         bzero(&src_sa, sizeof(src_sa));
624         src_sa.sin6_family = AF_INET6;
625         src_sa.sin6_len = sizeof(src_sa);
626         src_sa.sin6_addr = ip6->ip6_src;
627         if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
628                 goto badscope;
629
630         dst0 = ip6->ip6_dst;
631         if (in6_setscope(&dst0, origifp, &zone))
632                 goto badscope;
633         /* re-initialize to be sure */
634         bzero(&dst_sa, sizeof(dst_sa));
635         dst_sa.sin6_family = AF_INET6;
636         dst_sa.sin6_len = sizeof(dst_sa);
637         dst_sa.sin6_addr = ip6->ip6_dst;
638         if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
639                 goto badscope;
640         }
641
642         /* We should use ia_ifp to support the case of 
643          * sending packets to an address of our own.
644          */
645         if (ia != NULL && ia->ia_ifp)
646                 ifp = ia->ia_ifp;
647
648         /* scope check is done. */
649         goto routefound;
650
651   badscope:
652         V_ip6stat.ip6s_badscope++;
653         in6_ifstat_inc(origifp, ifs6_out_discard);
654         if (error == 0)
655                 error = EHOSTUNREACH; /* XXX */
656         goto bad;
657
658   routefound:
659         if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
660                 if (opt && opt->ip6po_nextroute.ro_rt) {
661                         /*
662                          * The nexthop is explicitly specified by the
663                          * application.  We assume the next hop is an IPv6
664                          * address.
665                          */
666                         dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
667                 }
668                 else if ((rt->rt_flags & RTF_GATEWAY))
669                         dst = (struct sockaddr_in6 *)rt->rt_gateway;
670         }
671
672         if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
673                 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
674         } else {
675                 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
676                 in6_ifstat_inc(ifp, ifs6_out_mcast);
677                 /*
678                  * Confirm that the outgoing interface supports multicast.
679                  */
680                 if (!(ifp->if_flags & IFF_MULTICAST)) {
681                         V_ip6stat.ip6s_noroute++;
682                         in6_ifstat_inc(ifp, ifs6_out_discard);
683                         error = ENETUNREACH;
684                         goto bad;
685                 }
686                 if ((im6o == NULL && in6_mcast_loop) ||
687                     (im6o && im6o->im6o_multicast_loop)) {
688                         /*
689                          * Loop back multicast datagram if not expressly
690                          * forbidden to do so, even if we have not joined
691                          * the address; protocols will filter it later,
692                          * thus deferring a hash lookup and lock acquisition
693                          * at the expense of an m_copym().
694                          */
695                         ip6_mloopback(ifp, m, dst);
696                 } else {
697                         /*
698                          * If we are acting as a multicast router, perform
699                          * multicast forwarding as if the packet had just
700                          * arrived on the interface to which we are about
701                          * to send.  The multicast forwarding function
702                          * recursively calls this function, using the
703                          * IPV6_FORWARDING flag to prevent infinite recursion.
704                          *
705                          * Multicasts that are looped back by ip6_mloopback(),
706                          * above, will be forwarded by the ip6_input() routine,
707                          * if necessary.
708                          */
709                         if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
710                                 /*
711                                  * XXX: ip6_mforward expects that rcvif is NULL
712                                  * when it is called from the originating path.
713                                  * However, it is not always the case, since
714                                  * some versions of MGETHDR() does not
715                                  * initialize the field.
716                                  */
717                                 m->m_pkthdr.rcvif = NULL;
718                                 if (ip6_mforward(ip6, ifp, m) != 0) {
719                                         m_freem(m);
720                                         goto done;
721                                 }
722                         }
723                 }
724                 /*
725                  * Multicasts with a hoplimit of zero may be looped back,
726                  * above, but must not be transmitted on a network.
727                  * Also, multicasts addressed to the loopback interface
728                  * are not sent -- the above call to ip6_mloopback() will
729                  * loop back a copy if this host actually belongs to the
730                  * destination group on the loopback interface.
731                  */
732                 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
733                     IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
734                         m_freem(m);
735                         goto done;
736                 }
737         }
738
739         /*
740          * Fill the outgoing inteface to tell the upper layer
741          * to increment per-interface statistics.
742          */
743         if (ifpp)
744                 *ifpp = ifp;
745
746         /* Determine path MTU. */
747         if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
748             &alwaysfrag)) != 0)
749                 goto bad;
750
751         /*
752          * The caller of this function may specify to use the minimum MTU
753          * in some cases.
754          * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
755          * setting.  The logic is a bit complicated; by default, unicast
756          * packets will follow path MTU while multicast packets will be sent at
757          * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
758          * including unicast ones will be sent at the minimum MTU.  Multicast
759          * packets will always be sent at the minimum MTU unless
760          * IP6PO_MINMTU_DISABLE is explicitly specified.
761          * See RFC 3542 for more details.
762          */
763         if (mtu > IPV6_MMTU) {
764                 if ((flags & IPV6_MINMTU))
765                         mtu = IPV6_MMTU;
766                 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
767                         mtu = IPV6_MMTU;
768                 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
769                          (opt == NULL ||
770                           opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
771                         mtu = IPV6_MMTU;
772                 }
773         }
774
775         /*
776          * clear embedded scope identifiers if necessary.
777          * in6_clearscope will touch the addresses only when necessary.
778          */
779         in6_clearscope(&ip6->ip6_src);
780         in6_clearscope(&ip6->ip6_dst);
781
782         /*
783          * If the outgoing packet contains a hop-by-hop options header,
784          * it must be examined and processed even by the source node.
785          * (RFC 2460, section 4.)
786          */
787         if (exthdrs.ip6e_hbh) {
788                 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
789                 u_int32_t dummy; /* XXX unused */
790                 u_int32_t plen = 0; /* XXX: ip6_process will check the value */
791
792 #ifdef DIAGNOSTIC
793                 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
794                         panic("ip6e_hbh is not continuous");
795 #endif
796                 /*
797                  *  XXX: if we have to send an ICMPv6 error to the sender,
798                  *       we need the M_LOOP flag since icmp6_error() expects
799                  *       the IPv6 and the hop-by-hop options header are
800                  *       continuous unless the flag is set.
801                  */
802                 m->m_flags |= M_LOOP;
803                 m->m_pkthdr.rcvif = ifp;
804                 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
805                     ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
806                     &dummy, &plen) < 0) {
807                         /* m was already freed at this point */
808                         error = EINVAL;/* better error? */
809                         goto done;
810                 }
811                 m->m_flags &= ~M_LOOP; /* XXX */
812                 m->m_pkthdr.rcvif = NULL;
813         }
814
815         /* Jump over all PFIL processing if hooks are not active. */
816         if (!PFIL_HOOKED(&V_inet6_pfil_hook))
817                 goto passout;
818
819         odst = ip6->ip6_dst;
820         /* Run through list of hooks for output packets. */
821         error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
822         if (error != 0 || m == NULL)
823                 goto done;
824         ip6 = mtod(m, struct ip6_hdr *);
825
826         /* See if destination IP address was changed by packet filter. */
827         if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
828                 m->m_flags |= M_SKIP_FIREWALL;
829                 /* If destination is now ourself drop to ip6_input(). */
830                 if (in6_localaddr(&ip6->ip6_dst)) {
831                         if (m->m_pkthdr.rcvif == NULL)
832                                 m->m_pkthdr.rcvif = V_loif;
833                         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
834                                 m->m_pkthdr.csum_flags |=
835                                     CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
836                                 m->m_pkthdr.csum_data = 0xffff;
837                         }
838                         m->m_pkthdr.csum_flags |=
839                             CSUM_IP_CHECKED | CSUM_IP_VALID;
840 #ifdef SCTP
841                         if (m->m_pkthdr.csum_flags & CSUM_SCTP)
842                                 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
843 #endif
844                         error = netisr_queue(NETISR_IPV6, m);
845                         goto done;
846                 } else
847                         goto again;     /* Redo the routing table lookup. */
848         }
849
850         /* XXX: IPFIREWALL_FORWARD */
851
852 passout:
853         /*
854          * Send the packet to the outgoing interface.
855          * If necessary, do IPv6 fragmentation before sending.
856          *
857          * the logic here is rather complex:
858          * 1: normal case (dontfrag == 0, alwaysfrag == 0)
859          * 1-a: send as is if tlen <= path mtu
860          * 1-b: fragment if tlen > path mtu
861          *
862          * 2: if user asks us not to fragment (dontfrag == 1)
863          * 2-a: send as is if tlen <= interface mtu
864          * 2-b: error if tlen > interface mtu
865          *
866          * 3: if we always need to attach fragment header (alwaysfrag == 1)
867          *      always fragment
868          *
869          * 4: if dontfrag == 1 && alwaysfrag == 1
870          *      error, as we cannot handle this conflicting request
871          */
872 #ifdef SCTP
873         sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
874         if (sw_csum & CSUM_SCTP) {
875                 sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
876                 sw_csum &= ~CSUM_SCTP;
877         }
878 #endif
879         tlen = m->m_pkthdr.len;
880
881         if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
882                 dontfrag = 1;
883         else
884                 dontfrag = 0;
885         if (dontfrag && alwaysfrag) {   /* case 4 */
886                 /* conflicting request - can't transmit */
887                 error = EMSGSIZE;
888                 goto bad;
889         }
890         if (dontfrag && tlen > IN6_LINKMTU(ifp)) {      /* case 2-b */
891                 /*
892                  * Even if the DONTFRAG option is specified, we cannot send the
893                  * packet when the data length is larger than the MTU of the
894                  * outgoing interface.
895                  * Notify the error by sending IPV6_PATHMTU ancillary data as
896                  * well as returning an error code (the latter is not described
897                  * in the API spec.)
898                  */
899                 u_int32_t mtu32;
900                 struct ip6ctlparam ip6cp;
901
902                 mtu32 = (u_int32_t)mtu;
903                 bzero(&ip6cp, sizeof(ip6cp));
904                 ip6cp.ip6c_cmdarg = (void *)&mtu32;
905                 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
906                     (void *)&ip6cp);
907
908                 error = EMSGSIZE;
909                 goto bad;
910         }
911
912         /*
913          * transmit packet without fragmentation
914          */
915         if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
916                 struct in6_ifaddr *ia6;
917
918                 ip6 = mtod(m, struct ip6_hdr *);
919                 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
920                 if (ia6) {
921                         /* Record statistics for this interface address. */
922                         ia6->ia_ifa.if_opackets++;
923                         ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
924                         ifa_free(&ia6->ia_ifa);
925                 }
926                 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
927                 goto done;
928         }
929
930         /*
931          * try to fragment the packet.  case 1-b and 3
932          */
933         if (mtu < IPV6_MMTU) {
934                 /* path MTU cannot be less than IPV6_MMTU */
935                 error = EMSGSIZE;
936                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
937                 goto bad;
938         } else if (ip6->ip6_plen == 0) {
939                 /* jumbo payload cannot be fragmented */
940                 error = EMSGSIZE;
941                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
942                 goto bad;
943         } else {
944                 struct mbuf **mnext, *m_frgpart;
945                 struct ip6_frag *ip6f;
946                 u_int32_t id = htonl(ip6_randomid());
947                 u_char nextproto;
948
949                 int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
950
951                 /*
952                  * Too large for the destination or interface;
953                  * fragment if possible.
954                  * Must be able to put at least 8 bytes per fragment.
955                  */
956                 hlen = unfragpartlen;
957                 if (mtu > IPV6_MAXPACKET)
958                         mtu = IPV6_MAXPACKET;
959
960                 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
961                 if (len < 8) {
962                         error = EMSGSIZE;
963                         in6_ifstat_inc(ifp, ifs6_out_fragfail);
964                         goto bad;
965                 }
966
967                 /*
968                  * Verify that we have any chance at all of being able to queue
969                  *      the packet or packet fragments
970                  */
971                 if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
972                     < tlen  /* - hlen */)) {
973                         error = ENOBUFS;
974                         V_ip6stat.ip6s_odropped++;
975                         goto bad;
976                 }
977
978                 mnext = &m->m_nextpkt;
979
980                 /*
981                  * Change the next header field of the last header in the
982                  * unfragmentable part.
983                  */
984                 if (exthdrs.ip6e_rthdr) {
985                         nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
986                         *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
987                 } else if (exthdrs.ip6e_dest1) {
988                         nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
989                         *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
990                 } else if (exthdrs.ip6e_hbh) {
991                         nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
992                         *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
993                 } else {
994                         nextproto = ip6->ip6_nxt;
995                         ip6->ip6_nxt = IPPROTO_FRAGMENT;
996                 }
997
998                 /*
999                  * Loop through length of segment after first fragment,
1000                  * make new header and copy data of each part and link onto
1001                  * chain.
1002                  */
1003                 m0 = m;
1004                 for (off = hlen; off < tlen; off += len) {
1005                         MGETHDR(m, M_DONTWAIT, MT_HEADER);
1006                         if (!m) {
1007                                 error = ENOBUFS;
1008                                 V_ip6stat.ip6s_odropped++;
1009                                 goto sendorfree;
1010                         }
1011                         m->m_pkthdr.rcvif = NULL;
1012                         m->m_flags = m0->m_flags & M_COPYFLAGS;
1013                         *mnext = m;
1014                         mnext = &m->m_nextpkt;
1015                         m->m_data += max_linkhdr;
1016                         mhip6 = mtod(m, struct ip6_hdr *);
1017                         *mhip6 = *ip6;
1018                         m->m_len = sizeof(*mhip6);
1019                         error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1020                         if (error) {
1021                                 V_ip6stat.ip6s_odropped++;
1022                                 goto sendorfree;
1023                         }
1024                         ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1025                         if (off + len >= tlen)
1026                                 len = tlen - off;
1027                         else
1028                                 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1029                         mhip6->ip6_plen = htons((u_short)(len + hlen +
1030                             sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1031                         if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1032                                 error = ENOBUFS;
1033                                 V_ip6stat.ip6s_odropped++;
1034                                 goto sendorfree;
1035                         }
1036                         m_cat(m, m_frgpart);
1037                         m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1038                         m->m_pkthdr.rcvif = NULL;
1039                         ip6f->ip6f_reserved = 0;
1040                         ip6f->ip6f_ident = id;
1041                         ip6f->ip6f_nxt = nextproto;
1042                         V_ip6stat.ip6s_ofragments++;
1043                         in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1044                 }
1045
1046                 in6_ifstat_inc(ifp, ifs6_out_fragok);
1047         }
1048
1049         /*
1050          * Remove leading garbages.
1051          */
1052 sendorfree:
1053         m = m0->m_nextpkt;
1054         m0->m_nextpkt = 0;
1055         m_freem(m0);
1056         for (m0 = m; m; m = m0) {
1057                 m0 = m->m_nextpkt;
1058                 m->m_nextpkt = 0;
1059                 if (error == 0) {
1060                         /* Record statistics for this interface address. */
1061                         if (ia) {
1062                                 ia->ia_ifa.if_opackets++;
1063                                 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1064                         }
1065                         error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1066                 } else
1067                         m_freem(m);
1068         }
1069
1070         if (error == 0)
1071                 V_ip6stat.ip6s_fragmented++;
1072
1073 done:
1074         if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1075                 RTFREE(ro->ro_rt);
1076         } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1077                 RTFREE(ro_pmtu->ro_rt);
1078         }
1079 #ifdef IPSEC
1080         if (sp != NULL)
1081                 KEY_FREESP(&sp);
1082 #endif
1083
1084         return (error);
1085
1086 freehdrs:
1087         m_freem(exthdrs.ip6e_hbh);      /* m_freem will check if mbuf is 0 */
1088         m_freem(exthdrs.ip6e_dest1);
1089         m_freem(exthdrs.ip6e_rthdr);
1090         m_freem(exthdrs.ip6e_dest2);
1091         /* FALLTHROUGH */
1092 bad:
1093         if (m)
1094                 m_freem(m);
1095         goto done;
1096 }
1097
1098 static int
1099 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1100 {
1101         struct mbuf *m;
1102
1103         if (hlen > MCLBYTES)
1104                 return (ENOBUFS); /* XXX */
1105
1106         MGET(m, M_DONTWAIT, MT_DATA);
1107         if (!m)
1108                 return (ENOBUFS);
1109
1110         if (hlen > MLEN) {
1111                 MCLGET(m, M_DONTWAIT);
1112                 if ((m->m_flags & M_EXT) == 0) {
1113                         m_free(m);
1114                         return (ENOBUFS);
1115                 }
1116         }
1117         m->m_len = hlen;
1118         if (hdr)
1119                 bcopy(hdr, mtod(m, caddr_t), hlen);
1120
1121         *mp = m;
1122         return (0);
1123 }
1124
1125 /*
1126  * Insert jumbo payload option.
1127  */
1128 static int
1129 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1130 {
1131         struct mbuf *mopt;
1132         u_char *optbuf;
1133         u_int32_t v;
1134
1135 #define JUMBOOPTLEN     8       /* length of jumbo payload option and padding */
1136
1137         /*
1138          * If there is no hop-by-hop options header, allocate new one.
1139          * If there is one but it doesn't have enough space to store the
1140          * jumbo payload option, allocate a cluster to store the whole options.
1141          * Otherwise, use it to store the options.
1142          */
1143         if (exthdrs->ip6e_hbh == 0) {
1144                 MGET(mopt, M_DONTWAIT, MT_DATA);
1145                 if (mopt == 0)
1146                         return (ENOBUFS);
1147                 mopt->m_len = JUMBOOPTLEN;
1148                 optbuf = mtod(mopt, u_char *);
1149                 optbuf[1] = 0;  /* = ((JUMBOOPTLEN) >> 3) - 1 */
1150                 exthdrs->ip6e_hbh = mopt;
1151         } else {
1152                 struct ip6_hbh *hbh;
1153
1154                 mopt = exthdrs->ip6e_hbh;
1155                 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1156                         /*
1157                          * XXX assumption:
1158                          * - exthdrs->ip6e_hbh is not referenced from places
1159                          *   other than exthdrs.
1160                          * - exthdrs->ip6e_hbh is not an mbuf chain.
1161                          */
1162                         int oldoptlen = mopt->m_len;
1163                         struct mbuf *n;
1164
1165                         /*
1166                          * XXX: give up if the whole (new) hbh header does
1167                          * not fit even in an mbuf cluster.
1168                          */
1169                         if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1170                                 return (ENOBUFS);
1171
1172                         /*
1173                          * As a consequence, we must always prepare a cluster
1174                          * at this point.
1175                          */
1176                         MGET(n, M_DONTWAIT, MT_DATA);
1177                         if (n) {
1178                                 MCLGET(n, M_DONTWAIT);
1179                                 if ((n->m_flags & M_EXT) == 0) {
1180                                         m_freem(n);
1181                                         n = NULL;
1182                                 }
1183                         }
1184                         if (!n)
1185                                 return (ENOBUFS);
1186                         n->m_len = oldoptlen + JUMBOOPTLEN;
1187                         bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1188                             oldoptlen);
1189                         optbuf = mtod(n, caddr_t) + oldoptlen;
1190                         m_freem(mopt);
1191                         mopt = exthdrs->ip6e_hbh = n;
1192                 } else {
1193                         optbuf = mtod(mopt, u_char *) + mopt->m_len;
1194                         mopt->m_len += JUMBOOPTLEN;
1195                 }
1196                 optbuf[0] = IP6OPT_PADN;
1197                 optbuf[1] = 1;
1198
1199                 /*
1200                  * Adjust the header length according to the pad and
1201                  * the jumbo payload option.
1202                  */
1203                 hbh = mtod(mopt, struct ip6_hbh *);
1204                 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1205         }
1206
1207         /* fill in the option. */
1208         optbuf[2] = IP6OPT_JUMBO;
1209         optbuf[3] = 4;
1210         v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1211         bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1212
1213         /* finally, adjust the packet header length */
1214         exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1215
1216         return (0);
1217 #undef JUMBOOPTLEN
1218 }
1219
1220 /*
1221  * Insert fragment header and copy unfragmentable header portions.
1222  */
1223 static int
1224 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1225     struct ip6_frag **frghdrp)
1226 {
1227         struct mbuf *n, *mlast;
1228
1229         if (hlen > sizeof(struct ip6_hdr)) {
1230                 n = m_copym(m0, sizeof(struct ip6_hdr),
1231                     hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1232                 if (n == 0)
1233                         return (ENOBUFS);
1234                 m->m_next = n;
1235         } else
1236                 n = m;
1237
1238         /* Search for the last mbuf of unfragmentable part. */
1239         for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1240                 ;
1241
1242         if ((mlast->m_flags & M_EXT) == 0 &&
1243             M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1244                 /* use the trailing space of the last mbuf for the fragment hdr */
1245                 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1246                     mlast->m_len);
1247                 mlast->m_len += sizeof(struct ip6_frag);
1248                 m->m_pkthdr.len += sizeof(struct ip6_frag);
1249         } else {
1250                 /* allocate a new mbuf for the fragment header */
1251                 struct mbuf *mfrg;
1252
1253                 MGET(mfrg, M_DONTWAIT, MT_DATA);
1254                 if (mfrg == 0)
1255                         return (ENOBUFS);
1256                 mfrg->m_len = sizeof(struct ip6_frag);
1257                 *frghdrp = mtod(mfrg, struct ip6_frag *);
1258                 mlast->m_next = mfrg;
1259         }
1260
1261         return (0);
1262 }
1263
1264 static int
1265 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1266     struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
1267     int *alwaysfragp)
1268 {
1269         u_int32_t mtu = 0;
1270         int alwaysfrag = 0;
1271         int error = 0;
1272
1273         if (ro_pmtu != ro) {
1274                 /* The first hop and the final destination may differ. */
1275                 struct sockaddr_in6 *sa6_dst =
1276                     (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1277                 if (ro_pmtu->ro_rt &&
1278                     ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1279                      !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1280                         RTFREE(ro_pmtu->ro_rt);
1281                         ro_pmtu->ro_rt = (struct rtentry *)NULL;
1282                 }
1283                 if (ro_pmtu->ro_rt == NULL) {
1284                         bzero(sa6_dst, sizeof(*sa6_dst));
1285                         sa6_dst->sin6_family = AF_INET6;
1286                         sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1287                         sa6_dst->sin6_addr = *dst;
1288
1289                         rtalloc((struct route *)ro_pmtu);
1290                 }
1291         }
1292         if (ro_pmtu->ro_rt) {
1293                 u_int32_t ifmtu;
1294                 struct in_conninfo inc;
1295
1296                 bzero(&inc, sizeof(inc));
1297                 inc.inc_flags |= INC_ISIPV6;
1298                 inc.inc6_faddr = *dst;
1299
1300                 if (ifp == NULL)
1301                         ifp = ro_pmtu->ro_rt->rt_ifp;
1302                 ifmtu = IN6_LINKMTU(ifp);
1303                 mtu = tcp_hc_getmtu(&inc);
1304                 if (mtu)
1305                         mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1306                 else
1307                         mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1308                 if (mtu == 0)
1309                         mtu = ifmtu;
1310                 else if (mtu < IPV6_MMTU) {
1311                         /*
1312                          * RFC2460 section 5, last paragraph:
1313                          * if we record ICMPv6 too big message with
1314                          * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1315                          * or smaller, with framgent header attached.
1316                          * (fragment header is needed regardless from the
1317                          * packet size, for translators to identify packets)
1318                          */
1319                         alwaysfrag = 1;
1320                         mtu = IPV6_MMTU;
1321                 } else if (mtu > ifmtu) {
1322                         /*
1323                          * The MTU on the route is larger than the MTU on
1324                          * the interface!  This shouldn't happen, unless the
1325                          * MTU of the interface has been changed after the
1326                          * interface was brought up.  Change the MTU in the
1327                          * route to match the interface MTU (as long as the
1328                          * field isn't locked).
1329                          */
1330                         mtu = ifmtu;
1331                         ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1332                 }
1333         } else if (ifp) {
1334                 mtu = IN6_LINKMTU(ifp);
1335         } else
1336                 error = EHOSTUNREACH; /* XXX */
1337
1338         *mtup = mtu;
1339         if (alwaysfragp)
1340                 *alwaysfragp = alwaysfrag;
1341         return (error);
1342 }
1343
1344 /*
1345  * IP6 socket option processing.
1346  */
1347 int
1348 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1349 {
1350         int optdatalen, uproto;
1351         void *optdata;
1352         struct inpcb *in6p = sotoinpcb(so);
1353         int error, optval;
1354         int level, op, optname;
1355         int optlen;
1356         struct thread *td;
1357
1358         level = sopt->sopt_level;
1359         op = sopt->sopt_dir;
1360         optname = sopt->sopt_name;
1361         optlen = sopt->sopt_valsize;
1362         td = sopt->sopt_td;
1363         error = 0;
1364         optval = 0;
1365         uproto = (int)so->so_proto->pr_protocol;
1366
1367         if (level == IPPROTO_IPV6) {
1368                 switch (op) {
1369
1370                 case SOPT_SET:
1371                         switch (optname) {
1372                         case IPV6_2292PKTOPTIONS:
1373 #ifdef IPV6_PKTOPTIONS
1374                         case IPV6_PKTOPTIONS:
1375 #endif
1376                         {
1377                                 struct mbuf *m;
1378
1379                                 error = soopt_getm(sopt, &m); /* XXX */
1380                                 if (error != 0)
1381                                         break;
1382                                 error = soopt_mcopyin(sopt, m); /* XXX */
1383                                 if (error != 0)
1384                                         break;
1385                                 error = ip6_pcbopts(&in6p->in6p_outputopts,
1386                                                     m, so, sopt);
1387                                 m_freem(m); /* XXX */
1388                                 break;
1389                         }
1390
1391                         /*
1392                          * Use of some Hop-by-Hop options or some
1393                          * Destination options, might require special
1394                          * privilege.  That is, normal applications
1395                          * (without special privilege) might be forbidden
1396                          * from setting certain options in outgoing packets,
1397                          * and might never see certain options in received
1398                          * packets. [RFC 2292 Section 6]
1399                          * KAME specific note:
1400                          *  KAME prevents non-privileged users from sending or
1401                          *  receiving ANY hbh/dst options in order to avoid
1402                          *  overhead of parsing options in the kernel.
1403                          */
1404                         case IPV6_RECVHOPOPTS:
1405                         case IPV6_RECVDSTOPTS:
1406                         case IPV6_RECVRTHDRDSTOPTS:
1407                                 if (td != NULL) {
1408                                         error = priv_check(td,
1409                                             PRIV_NETINET_SETHDROPTS);
1410                                         if (error)
1411                                                 break;
1412                                 }
1413                                 /* FALLTHROUGH */
1414                         case IPV6_UNICAST_HOPS:
1415                         case IPV6_HOPLIMIT:
1416                         case IPV6_FAITH:
1417
1418                         case IPV6_RECVPKTINFO:
1419                         case IPV6_RECVHOPLIMIT:
1420                         case IPV6_RECVRTHDR:
1421                         case IPV6_RECVPATHMTU:
1422                         case IPV6_RECVTCLASS:
1423                         case IPV6_V6ONLY:
1424                         case IPV6_AUTOFLOWLABEL:
1425                         case IPV6_BINDANY:
1426                                 if (optname == IPV6_BINDANY && td != NULL) {
1427                                         error = priv_check(td,
1428                                             PRIV_NETINET_BINDANY);
1429                                         if (error)
1430                                                 break;
1431                                 }
1432
1433                                 if (optlen != sizeof(int)) {
1434                                         error = EINVAL;
1435                                         break;
1436                                 }
1437                                 error = sooptcopyin(sopt, &optval,
1438                                         sizeof optval, sizeof optval);
1439                                 if (error)
1440                                         break;
1441                                 switch (optname) {
1442
1443                                 case IPV6_UNICAST_HOPS:
1444                                         if (optval < -1 || optval >= 256)
1445                                                 error = EINVAL;
1446                                         else {
1447                                                 /* -1 = kernel default */
1448                                                 in6p->in6p_hops = optval;
1449                                                 if ((in6p->inp_vflag &
1450                                                      INP_IPV4) != 0)
1451                                                         in6p->inp_ip_ttl = optval;
1452                                         }
1453                                         break;
1454 #define OPTSET(bit) \
1455 do { \
1456         if (optval) \
1457                 in6p->inp_flags |= (bit); \
1458         else \
1459                 in6p->inp_flags &= ~(bit); \
1460 } while (/*CONSTCOND*/ 0)
1461 #define OPTSET2292(bit) \
1462 do { \
1463         in6p->inp_flags |= IN6P_RFC2292; \
1464         if (optval) \
1465                 in6p->inp_flags |= (bit); \
1466         else \
1467                 in6p->inp_flags &= ~(bit); \
1468 } while (/*CONSTCOND*/ 0)
1469 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
1470
1471                                 case IPV6_RECVPKTINFO:
1472                                         /* cannot mix with RFC2292 */
1473                                         if (OPTBIT(IN6P_RFC2292)) {
1474                                                 error = EINVAL;
1475                                                 break;
1476                                         }
1477                                         OPTSET(IN6P_PKTINFO);
1478                                         break;
1479
1480                                 case IPV6_HOPLIMIT:
1481                                 {
1482                                         struct ip6_pktopts **optp;
1483
1484                                         /* cannot mix with RFC2292 */
1485                                         if (OPTBIT(IN6P_RFC2292)) {
1486                                                 error = EINVAL;
1487                                                 break;
1488                                         }
1489                                         optp = &in6p->in6p_outputopts;
1490                                         error = ip6_pcbopt(IPV6_HOPLIMIT,
1491                                             (u_char *)&optval, sizeof(optval),
1492                                             optp, (td != NULL) ? td->td_ucred :
1493                                             NULL, uproto);
1494                                         break;
1495                                 }
1496
1497                                 case IPV6_RECVHOPLIMIT:
1498                                         /* cannot mix with RFC2292 */
1499                                         if (OPTBIT(IN6P_RFC2292)) {
1500                                                 error = EINVAL;
1501                                                 break;
1502                                         }
1503                                         OPTSET(IN6P_HOPLIMIT);
1504                                         break;
1505
1506                                 case IPV6_RECVHOPOPTS:
1507                                         /* cannot mix with RFC2292 */
1508                                         if (OPTBIT(IN6P_RFC2292)) {
1509                                                 error = EINVAL;
1510                                                 break;
1511                                         }
1512                                         OPTSET(IN6P_HOPOPTS);
1513                                         break;
1514
1515                                 case IPV6_RECVDSTOPTS:
1516                                         /* cannot mix with RFC2292 */
1517                                         if (OPTBIT(IN6P_RFC2292)) {
1518                                                 error = EINVAL;
1519                                                 break;
1520                                         }
1521                                         OPTSET(IN6P_DSTOPTS);
1522                                         break;
1523
1524                                 case IPV6_RECVRTHDRDSTOPTS:
1525                                         /* cannot mix with RFC2292 */
1526                                         if (OPTBIT(IN6P_RFC2292)) {
1527                                                 error = EINVAL;
1528                                                 break;
1529                                         }
1530                                         OPTSET(IN6P_RTHDRDSTOPTS);
1531                                         break;
1532
1533                                 case IPV6_RECVRTHDR:
1534                                         /* cannot mix with RFC2292 */
1535                                         if (OPTBIT(IN6P_RFC2292)) {
1536                                                 error = EINVAL;
1537                                                 break;
1538                                         }
1539                                         OPTSET(IN6P_RTHDR);
1540                                         break;
1541
1542                                 case IPV6_FAITH:
1543                                         OPTSET(INP_FAITH);
1544                                         break;
1545
1546                                 case IPV6_RECVPATHMTU:
1547                                         /*
1548                                          * We ignore this option for TCP
1549                                          * sockets.
1550                                          * (RFC3542 leaves this case
1551                                          * unspecified.)
1552                                          */
1553                                         if (uproto != IPPROTO_TCP)
1554                                                 OPTSET(IN6P_MTU);
1555                                         break;
1556
1557                                 case IPV6_V6ONLY:
1558                                         /*
1559                                          * make setsockopt(IPV6_V6ONLY)
1560                                          * available only prior to bind(2).
1561                                          * see ipng mailing list, Jun 22 2001.
1562                                          */
1563                                         if (in6p->inp_lport ||
1564                                             !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1565                                                 error = EINVAL;
1566                                                 break;
1567                                         }
1568                                         OPTSET(IN6P_IPV6_V6ONLY);
1569                                         if (optval)
1570                                                 in6p->inp_vflag &= ~INP_IPV4;
1571                                         else
1572                                                 in6p->inp_vflag |= INP_IPV4;
1573                                         break;
1574                                 case IPV6_RECVTCLASS:
1575                                         /* cannot mix with RFC2292 XXX */
1576                                         if (OPTBIT(IN6P_RFC2292)) {
1577                                                 error = EINVAL;
1578                                                 break;
1579                                         }
1580                                         OPTSET(IN6P_TCLASS);
1581                                         break;
1582                                 case IPV6_AUTOFLOWLABEL:
1583                                         OPTSET(IN6P_AUTOFLOWLABEL);
1584                                         break;
1585
1586                                 case IPV6_BINDANY:
1587                                         OPTSET(INP_BINDANY);
1588                                         break;
1589                                 }
1590                                 break;
1591
1592                         case IPV6_TCLASS:
1593                         case IPV6_DONTFRAG:
1594                         case IPV6_USE_MIN_MTU:
1595                         case IPV6_PREFER_TEMPADDR:
1596                                 if (optlen != sizeof(optval)) {
1597                                         error = EINVAL;
1598                                         break;
1599                                 }
1600                                 error = sooptcopyin(sopt, &optval,
1601                                         sizeof optval, sizeof optval);
1602                                 if (error)
1603                                         break;
1604                                 {
1605                                         struct ip6_pktopts **optp;
1606                                         optp = &in6p->in6p_outputopts;
1607                                         error = ip6_pcbopt(optname,
1608                                             (u_char *)&optval, sizeof(optval),
1609                                             optp, (td != NULL) ? td->td_ucred :
1610                                             NULL, uproto);
1611                                         break;
1612                                 }
1613
1614                         case IPV6_2292PKTINFO:
1615                         case IPV6_2292HOPLIMIT:
1616                         case IPV6_2292HOPOPTS:
1617                         case IPV6_2292DSTOPTS:
1618                         case IPV6_2292RTHDR:
1619                                 /* RFC 2292 */
1620                                 if (optlen != sizeof(int)) {
1621                                         error = EINVAL;
1622                                         break;
1623                                 }
1624                                 error = sooptcopyin(sopt, &optval,
1625                                         sizeof optval, sizeof optval);
1626                                 if (error)
1627                                         break;
1628                                 switch (optname) {
1629                                 case IPV6_2292PKTINFO:
1630                                         OPTSET2292(IN6P_PKTINFO);
1631                                         break;
1632                                 case IPV6_2292HOPLIMIT:
1633                                         OPTSET2292(IN6P_HOPLIMIT);
1634                                         break;
1635                                 case IPV6_2292HOPOPTS:
1636                                         /*
1637                                          * Check super-user privilege.
1638                                          * See comments for IPV6_RECVHOPOPTS.
1639                                          */
1640                                         if (td != NULL) {
1641                                                 error = priv_check(td,
1642                                                     PRIV_NETINET_SETHDROPTS);
1643                                                 if (error)
1644                                                         return (error);
1645                                         }
1646                                         OPTSET2292(IN6P_HOPOPTS);
1647                                         break;
1648                                 case IPV6_2292DSTOPTS:
1649                                         if (td != NULL) {
1650                                                 error = priv_check(td,
1651                                                     PRIV_NETINET_SETHDROPTS);
1652                                                 if (error)
1653                                                         return (error);
1654                                         }
1655                                         OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1656                                         break;
1657                                 case IPV6_2292RTHDR:
1658                                         OPTSET2292(IN6P_RTHDR);
1659                                         break;
1660                                 }
1661                                 break;
1662                         case IPV6_PKTINFO:
1663                         case IPV6_HOPOPTS:
1664                         case IPV6_RTHDR:
1665                         case IPV6_DSTOPTS:
1666                         case IPV6_RTHDRDSTOPTS:
1667                         case IPV6_NEXTHOP:
1668                         {
1669                                 /* new advanced API (RFC3542) */
1670                                 u_char *optbuf;
1671                                 u_char optbuf_storage[MCLBYTES];
1672                                 int optlen;
1673                                 struct ip6_pktopts **optp;
1674
1675                                 /* cannot mix with RFC2292 */
1676                                 if (OPTBIT(IN6P_RFC2292)) {
1677                                         error = EINVAL;
1678                                         break;
1679                                 }
1680
1681                                 /*
1682                                  * We only ensure valsize is not too large
1683                                  * here.  Further validation will be done
1684                                  * later.
1685                                  */
1686                                 error = sooptcopyin(sopt, optbuf_storage,
1687                                     sizeof(optbuf_storage), 0);
1688                                 if (error)
1689                                         break;
1690                                 optlen = sopt->sopt_valsize;
1691                                 optbuf = optbuf_storage;
1692                                 optp = &in6p->in6p_outputopts;
1693                                 error = ip6_pcbopt(optname, optbuf, optlen,
1694                                     optp, (td != NULL) ? td->td_ucred : NULL,
1695                                     uproto);
1696                                 break;
1697                         }
1698 #undef OPTSET
1699
1700                         case IPV6_MULTICAST_IF:
1701                         case IPV6_MULTICAST_HOPS:
1702                         case IPV6_MULTICAST_LOOP:
1703                         case IPV6_JOIN_GROUP:
1704                         case IPV6_LEAVE_GROUP:
1705                         case IPV6_MSFILTER:
1706                         case MCAST_BLOCK_SOURCE:
1707                         case MCAST_UNBLOCK_SOURCE:
1708                         case MCAST_JOIN_GROUP:
1709                         case MCAST_LEAVE_GROUP:
1710                         case MCAST_JOIN_SOURCE_GROUP:
1711                         case MCAST_LEAVE_SOURCE_GROUP:
1712                                 error = ip6_setmoptions(in6p, sopt);
1713                                 break;
1714
1715                         case IPV6_PORTRANGE:
1716                                 error = sooptcopyin(sopt, &optval,
1717                                     sizeof optval, sizeof optval);
1718                                 if (error)
1719                                         break;
1720
1721                                 switch (optval) {
1722                                 case IPV6_PORTRANGE_DEFAULT:
1723                                         in6p->inp_flags &= ~(INP_LOWPORT);
1724                                         in6p->inp_flags &= ~(INP_HIGHPORT);
1725                                         break;
1726
1727                                 case IPV6_PORTRANGE_HIGH:
1728                                         in6p->inp_flags &= ~(INP_LOWPORT);
1729                                         in6p->inp_flags |= INP_HIGHPORT;
1730                                         break;
1731
1732                                 case IPV6_PORTRANGE_LOW:
1733                                         in6p->inp_flags &= ~(INP_HIGHPORT);
1734                                         in6p->inp_flags |= INP_LOWPORT;
1735                                         break;
1736
1737                                 default:
1738                                         error = EINVAL;
1739                                         break;
1740                                 }
1741                                 break;
1742
1743 #ifdef IPSEC
1744                         case IPV6_IPSEC_POLICY:
1745                         {
1746                                 caddr_t req;
1747                                 struct mbuf *m;
1748
1749                                 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1750                                         break;
1751                                 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1752                                         break;
1753                                 req = mtod(m, caddr_t);
1754                                 error = ipsec_set_policy(in6p, optname, req,
1755                                     m->m_len, (sopt->sopt_td != NULL) ?
1756                                     sopt->sopt_td->td_ucred : NULL);
1757                                 m_freem(m);
1758                                 break;
1759                         }
1760 #endif /* IPSEC */
1761
1762                         default:
1763                                 error = ENOPROTOOPT;
1764                                 break;
1765                         }
1766                         break;
1767
1768                 case SOPT_GET:
1769                         switch (optname) {
1770
1771                         case IPV6_2292PKTOPTIONS:
1772 #ifdef IPV6_PKTOPTIONS
1773                         case IPV6_PKTOPTIONS:
1774 #endif
1775                                 /*
1776                                  * RFC3542 (effectively) deprecated the
1777                                  * semantics of the 2292-style pktoptions.
1778                                  * Since it was not reliable in nature (i.e.,
1779                                  * applications had to expect the lack of some
1780                                  * information after all), it would make sense
1781                                  * to simplify this part by always returning
1782                                  * empty data.
1783                                  */
1784                                 sopt->sopt_valsize = 0;
1785                                 break;
1786
1787                         case IPV6_RECVHOPOPTS:
1788                         case IPV6_RECVDSTOPTS:
1789                         case IPV6_RECVRTHDRDSTOPTS:
1790                         case IPV6_UNICAST_HOPS:
1791                         case IPV6_RECVPKTINFO:
1792                         case IPV6_RECVHOPLIMIT:
1793                         case IPV6_RECVRTHDR:
1794                         case IPV6_RECVPATHMTU:
1795
1796                         case IPV6_FAITH:
1797                         case IPV6_V6ONLY:
1798                         case IPV6_PORTRANGE:
1799                         case IPV6_RECVTCLASS:
1800                         case IPV6_AUTOFLOWLABEL:
1801                                 switch (optname) {
1802
1803                                 case IPV6_RECVHOPOPTS:
1804                                         optval = OPTBIT(IN6P_HOPOPTS);
1805                                         break;
1806
1807                                 case IPV6_RECVDSTOPTS:
1808                                         optval = OPTBIT(IN6P_DSTOPTS);
1809                                         break;
1810
1811                                 case IPV6_RECVRTHDRDSTOPTS:
1812                                         optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1813                                         break;
1814
1815                                 case IPV6_UNICAST_HOPS:
1816                                         optval = in6p->in6p_hops;
1817                                         break;
1818
1819                                 case IPV6_RECVPKTINFO:
1820                                         optval = OPTBIT(IN6P_PKTINFO);
1821                                         break;
1822
1823                                 case IPV6_RECVHOPLIMIT:
1824                                         optval = OPTBIT(IN6P_HOPLIMIT);
1825                                         break;
1826
1827                                 case IPV6_RECVRTHDR:
1828                                         optval = OPTBIT(IN6P_RTHDR);
1829                                         break;
1830
1831                                 case IPV6_RECVPATHMTU:
1832                                         optval = OPTBIT(IN6P_MTU);
1833                                         break;
1834
1835                                 case IPV6_FAITH:
1836                                         optval = OPTBIT(INP_FAITH);
1837                                         break;
1838
1839                                 case IPV6_V6ONLY:
1840                                         optval = OPTBIT(IN6P_IPV6_V6ONLY);
1841                                         break;
1842
1843                                 case IPV6_PORTRANGE:
1844                                     {
1845                                         int flags;
1846                                         flags = in6p->inp_flags;
1847                                         if (flags & INP_HIGHPORT)
1848                                                 optval = IPV6_PORTRANGE_HIGH;
1849                                         else if (flags & INP_LOWPORT)
1850                                                 optval = IPV6_PORTRANGE_LOW;
1851                                         else
1852                                                 optval = 0;
1853                                         break;
1854                                     }
1855                                 case IPV6_RECVTCLASS:
1856                                         optval = OPTBIT(IN6P_TCLASS);
1857                                         break;
1858
1859                                 case IPV6_AUTOFLOWLABEL:
1860                                         optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1861                                         break;
1862
1863                                 case IPV6_BINDANY:
1864                                         optval = OPTBIT(INP_BINDANY);
1865                                         break;
1866                                 }
1867                                 if (error)
1868                                         break;
1869                                 error = sooptcopyout(sopt, &optval,
1870                                         sizeof optval);
1871                                 break;
1872
1873                         case IPV6_PATHMTU:
1874                         {
1875                                 u_long pmtu = 0;
1876                                 struct ip6_mtuinfo mtuinfo;
1877                                 struct route_in6 sro;
1878
1879                                 bzero(&sro, sizeof(sro));
1880
1881                                 if (!(so->so_state & SS_ISCONNECTED))
1882                                         return (ENOTCONN);
1883                                 /*
1884                                  * XXX: we dot not consider the case of source
1885                                  * routing, or optional information to specify
1886                                  * the outgoing interface.
1887                                  */
1888                                 error = ip6_getpmtu(&sro, NULL, NULL,
1889                                     &in6p->in6p_faddr, &pmtu, NULL);
1890                                 if (sro.ro_rt)
1891                                         RTFREE(sro.ro_rt);
1892                                 if (error)
1893                                         break;
1894                                 if (pmtu > IPV6_MAXPACKET)
1895                                         pmtu = IPV6_MAXPACKET;
1896
1897                                 bzero(&mtuinfo, sizeof(mtuinfo));
1898                                 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1899                                 optdata = (void *)&mtuinfo;
1900                                 optdatalen = sizeof(mtuinfo);
1901                                 error = sooptcopyout(sopt, optdata,
1902                                     optdatalen);
1903                                 break;
1904                         }
1905
1906                         case IPV6_2292PKTINFO:
1907                         case IPV6_2292HOPLIMIT:
1908                         case IPV6_2292HOPOPTS:
1909                         case IPV6_2292RTHDR:
1910                         case IPV6_2292DSTOPTS:
1911                                 switch (optname) {
1912                                 case IPV6_2292PKTINFO:
1913                                         optval = OPTBIT(IN6P_PKTINFO);
1914                                         break;
1915                                 case IPV6_2292HOPLIMIT:
1916                                         optval = OPTBIT(IN6P_HOPLIMIT);
1917                                         break;
1918                                 case IPV6_2292HOPOPTS:
1919                                         optval = OPTBIT(IN6P_HOPOPTS);
1920                                         break;
1921                                 case IPV6_2292RTHDR:
1922                                         optval = OPTBIT(IN6P_RTHDR);
1923                                         break;
1924                                 case IPV6_2292DSTOPTS:
1925                                         optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1926                                         break;
1927                                 }
1928                                 error = sooptcopyout(sopt, &optval,
1929                                     sizeof optval);
1930                                 break;
1931                         case IPV6_PKTINFO:
1932                         case IPV6_HOPOPTS:
1933                         case IPV6_RTHDR:
1934                         case IPV6_DSTOPTS:
1935                         case IPV6_RTHDRDSTOPTS:
1936                         case IPV6_NEXTHOP:
1937                         case IPV6_TCLASS:
1938                         case IPV6_DONTFRAG:
1939                         case IPV6_USE_MIN_MTU:
1940                         case IPV6_PREFER_TEMPADDR:
1941                                 error = ip6_getpcbopt(in6p->in6p_outputopts,
1942                                     optname, sopt);
1943                                 break;
1944
1945                         case IPV6_MULTICAST_IF:
1946                         case IPV6_MULTICAST_HOPS:
1947                         case IPV6_MULTICAST_LOOP:
1948                         case IPV6_MSFILTER:
1949                                 error = ip6_getmoptions(in6p, sopt);
1950                                 break;
1951
1952 #ifdef IPSEC
1953                         case IPV6_IPSEC_POLICY:
1954                           {
1955                                 caddr_t req = NULL;
1956                                 size_t len = 0;
1957                                 struct mbuf *m = NULL;
1958                                 struct mbuf **mp = &m;
1959                                 size_t ovalsize = sopt->sopt_valsize;
1960                                 caddr_t oval = (caddr_t)sopt->sopt_val;
1961
1962                                 error = soopt_getm(sopt, &m); /* XXX */
1963                                 if (error != 0)
1964                                         break;
1965                                 error = soopt_mcopyin(sopt, m); /* XXX */
1966                                 if (error != 0)
1967                                         break;
1968                                 sopt->sopt_valsize = ovalsize;
1969                                 sopt->sopt_val = oval;
1970                                 if (m) {
1971                                         req = mtod(m, caddr_t);
1972                                         len = m->m_len;
1973                                 }
1974                                 error = ipsec_get_policy(in6p, req, len, mp);
1975                                 if (error == 0)
1976                                         error = soopt_mcopyout(sopt, m); /* XXX */
1977                                 if (error == 0 && m)
1978                                         m_freem(m);
1979                                 break;
1980                           }
1981 #endif /* IPSEC */
1982
1983                         default:
1984                                 error = ENOPROTOOPT;
1985                                 break;
1986                         }
1987                         break;
1988                 }
1989         } else {                /* level != IPPROTO_IPV6 */
1990                 error = EINVAL;
1991         }
1992         return (error);
1993 }
1994
1995 int
1996 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
1997 {
1998         int error = 0, optval, optlen;
1999         const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2000         struct inpcb *in6p = sotoinpcb(so);
2001         int level, op, optname;
2002
2003         level = sopt->sopt_level;
2004         op = sopt->sopt_dir;
2005         optname = sopt->sopt_name;
2006         optlen = sopt->sopt_valsize;
2007
2008         if (level != IPPROTO_IPV6) {
2009                 return (EINVAL);
2010         }
2011
2012         switch (optname) {
2013         case IPV6_CHECKSUM:
2014                 /*
2015                  * For ICMPv6 sockets, no modification allowed for checksum
2016                  * offset, permit "no change" values to help existing apps.
2017                  *
2018                  * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2019                  * for an ICMPv6 socket will fail."
2020                  * The current behavior does not meet RFC3542.
2021                  */
2022                 switch (op) {
2023                 case SOPT_SET:
2024                         if (optlen != sizeof(int)) {
2025                                 error = EINVAL;
2026                                 break;
2027                         }
2028                         error = sooptcopyin(sopt, &optval, sizeof(optval),
2029                                             sizeof(optval));
2030                         if (error)
2031                                 break;
2032                         if ((optval % 2) != 0) {
2033                                 /* the API assumes even offset values */
2034                                 error = EINVAL;
2035                         } else if (so->so_proto->pr_protocol ==
2036                             IPPROTO_ICMPV6) {
2037                                 if (optval != icmp6off)
2038                                         error = EINVAL;
2039                         } else
2040                                 in6p->in6p_cksum = optval;
2041                         break;
2042
2043                 case SOPT_GET:
2044                         if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2045                                 optval = icmp6off;
2046                         else
2047                                 optval = in6p->in6p_cksum;
2048
2049                         error = sooptcopyout(sopt, &optval, sizeof(optval));
2050                         break;
2051
2052                 default:
2053                         error = EINVAL;
2054                         break;
2055                 }
2056                 break;
2057
2058         default:
2059                 error = ENOPROTOOPT;
2060                 break;
2061         }
2062
2063         return (error);
2064 }
2065
2066 /*
2067  * Set up IP6 options in pcb for insertion in output packets or
2068  * specifying behavior of outgoing packets.
2069  */
2070 static int
2071 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2072     struct socket *so, struct sockopt *sopt)
2073 {
2074         struct ip6_pktopts *opt = *pktopt;
2075         int error = 0;
2076         struct thread *td = sopt->sopt_td;
2077
2078         /* turn off any old options. */
2079         if (opt) {
2080 #ifdef DIAGNOSTIC
2081                 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2082                     opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2083                     opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2084                         printf("ip6_pcbopts: all specified options are cleared.\n");
2085 #endif
2086                 ip6_clearpktopts(opt, -1);
2087         } else
2088                 opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2089         *pktopt = NULL;
2090
2091         if (!m || m->m_len == 0) {
2092                 /*
2093                  * Only turning off any previous options, regardless of
2094                  * whether the opt is just created or given.
2095                  */
2096                 free(opt, M_IP6OPT);
2097                 return (0);
2098         }
2099
2100         /*  set options specified by user. */
2101         if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
2102             td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
2103                 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2104                 free(opt, M_IP6OPT);
2105                 return (error);
2106         }
2107         *pktopt = opt;
2108         return (0);
2109 }
2110
2111 /*
2112  * initialize ip6_pktopts.  beware that there are non-zero default values in
2113  * the struct.
2114  */
2115 void
2116 ip6_initpktopts(struct ip6_pktopts *opt)
2117 {
2118
2119         bzero(opt, sizeof(*opt));
2120         opt->ip6po_hlim = -1;   /* -1 means default hop limit */
2121         opt->ip6po_tclass = -1; /* -1 means default traffic class */
2122         opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2123         opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2124 }
2125
2126 static int
2127 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2128     struct ucred *cred, int uproto)
2129 {
2130         struct ip6_pktopts *opt;
2131
2132         if (*pktopt == NULL) {
2133                 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2134                     M_WAITOK);
2135                 ip6_initpktopts(*pktopt);
2136         }
2137         opt = *pktopt;
2138
2139         return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2140 }
2141
2142 static int
2143 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2144 {
2145         void *optdata = NULL;
2146         int optdatalen = 0;
2147         struct ip6_ext *ip6e;
2148         int error = 0;
2149         struct in6_pktinfo null_pktinfo;
2150         int deftclass = 0, on;
2151         int defminmtu = IP6PO_MINMTU_MCASTONLY;
2152         int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2153
2154         switch (optname) {
2155         case IPV6_PKTINFO:
2156                 if (pktopt && pktopt->ip6po_pktinfo)
2157                         optdata = (void *)pktopt->ip6po_pktinfo;
2158                 else {
2159                         /* XXX: we don't have to do this every time... */
2160                         bzero(&null_pktinfo, sizeof(null_pktinfo));
2161                         optdata = (void *)&null_pktinfo;
2162                 }
2163                 optdatalen = sizeof(struct in6_pktinfo);
2164                 break;
2165         case IPV6_TCLASS:
2166                 if (pktopt && pktopt->ip6po_tclass >= 0)
2167                         optdata = (void *)&pktopt->ip6po_tclass;
2168                 else
2169                         optdata = (void *)&deftclass;
2170                 optdatalen = sizeof(int);
2171                 break;
2172         case IPV6_HOPOPTS:
2173                 if (pktopt && pktopt->ip6po_hbh) {
2174                         optdata = (void *)pktopt->ip6po_hbh;
2175                         ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2176                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2177                 }
2178                 break;
2179         case IPV6_RTHDR:
2180                 if (pktopt && pktopt->ip6po_rthdr) {
2181                         optdata = (void *)pktopt->ip6po_rthdr;
2182                         ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2183                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2184                 }
2185                 break;
2186         case IPV6_RTHDRDSTOPTS:
2187                 if (pktopt && pktopt->ip6po_dest1) {
2188                         optdata = (void *)pktopt->ip6po_dest1;
2189                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2190                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2191                 }
2192                 break;
2193         case IPV6_DSTOPTS:
2194                 if (pktopt && pktopt->ip6po_dest2) {
2195                         optdata = (void *)pktopt->ip6po_dest2;
2196                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2197                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2198                 }
2199                 break;
2200         case IPV6_NEXTHOP:
2201                 if (pktopt && pktopt->ip6po_nexthop) {
2202                         optdata = (void *)pktopt->ip6po_nexthop;
2203                         optdatalen = pktopt->ip6po_nexthop->sa_len;
2204                 }
2205                 break;
2206         case IPV6_USE_MIN_MTU:
2207                 if (pktopt)
2208                         optdata = (void *)&pktopt->ip6po_minmtu;
2209                 else
2210                         optdata = (void *)&defminmtu;
2211                 optdatalen = sizeof(int);
2212                 break;
2213         case IPV6_DONTFRAG:
2214                 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2215                         on = 1;
2216                 else
2217                         on = 0;
2218                 optdata = (void *)&on;
2219                 optdatalen = sizeof(on);
2220                 break;
2221         case IPV6_PREFER_TEMPADDR:
2222                 if (pktopt)
2223                         optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2224                 else
2225                         optdata = (void *)&defpreftemp;
2226                 optdatalen = sizeof(int);
2227                 break;
2228         default:                /* should not happen */
2229 #ifdef DIAGNOSTIC
2230                 panic("ip6_getpcbopt: unexpected option\n");
2231 #endif
2232                 return (ENOPROTOOPT);
2233         }
2234
2235         error = sooptcopyout(sopt, optdata, optdatalen);
2236
2237         return (error);
2238 }
2239
2240 void
2241 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2242 {
2243         if (pktopt == NULL)
2244                 return;
2245
2246         if (optname == -1 || optname == IPV6_PKTINFO) {
2247                 if (pktopt->ip6po_pktinfo)
2248                         free(pktopt->ip6po_pktinfo, M_IP6OPT);
2249                 pktopt->ip6po_pktinfo = NULL;
2250         }
2251         if (optname == -1 || optname == IPV6_HOPLIMIT)
2252                 pktopt->ip6po_hlim = -1;
2253         if (optname == -1 || optname == IPV6_TCLASS)
2254                 pktopt->ip6po_tclass = -1;
2255         if (optname == -1 || optname == IPV6_NEXTHOP) {
2256                 if (pktopt->ip6po_nextroute.ro_rt) {
2257                         RTFREE(pktopt->ip6po_nextroute.ro_rt);
2258                         pktopt->ip6po_nextroute.ro_rt = NULL;
2259                 }
2260                 if (pktopt->ip6po_nexthop)
2261                         free(pktopt->ip6po_nexthop, M_IP6OPT);
2262                 pktopt->ip6po_nexthop = NULL;
2263         }
2264         if (optname == -1 || optname == IPV6_HOPOPTS) {
2265                 if (pktopt->ip6po_hbh)
2266                         free(pktopt->ip6po_hbh, M_IP6OPT);
2267                 pktopt->ip6po_hbh = NULL;
2268         }
2269         if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2270                 if (pktopt->ip6po_dest1)
2271                         free(pktopt->ip6po_dest1, M_IP6OPT);
2272                 pktopt->ip6po_dest1 = NULL;
2273         }
2274         if (optname == -1 || optname == IPV6_RTHDR) {
2275                 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2276                         free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2277                 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2278                 if (pktopt->ip6po_route.ro_rt) {
2279                         RTFREE(pktopt->ip6po_route.ro_rt);
2280                         pktopt->ip6po_route.ro_rt = NULL;
2281                 }
2282         }
2283         if (optname == -1 || optname == IPV6_DSTOPTS) {
2284                 if (pktopt->ip6po_dest2)
2285                         free(pktopt->ip6po_dest2, M_IP6OPT);
2286                 pktopt->ip6po_dest2 = NULL;
2287         }
2288 }
2289
2290 #define PKTOPT_EXTHDRCPY(type) \
2291 do {\
2292         if (src->type) {\
2293                 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2294                 dst->type = malloc(hlen, M_IP6OPT, canwait);\
2295                 if (dst->type == NULL && canwait == M_NOWAIT)\
2296                         goto bad;\
2297                 bcopy(src->type, dst->type, hlen);\
2298         }\
2299 } while (/*CONSTCOND*/ 0)
2300
2301 static int
2302 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2303 {
2304         if (dst == NULL || src == NULL)  {
2305                 printf("ip6_clearpktopts: invalid argument\n");
2306                 return (EINVAL);
2307         }
2308
2309         dst->ip6po_hlim = src->ip6po_hlim;
2310         dst->ip6po_tclass = src->ip6po_tclass;
2311         dst->ip6po_flags = src->ip6po_flags;
2312         if (src->ip6po_pktinfo) {
2313                 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2314                     M_IP6OPT, canwait);
2315                 if (dst->ip6po_pktinfo == NULL)
2316                         goto bad;
2317                 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2318         }
2319         if (src->ip6po_nexthop) {
2320                 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2321                     M_IP6OPT, canwait);
2322                 if (dst->ip6po_nexthop == NULL)
2323                         goto bad;
2324                 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2325                     src->ip6po_nexthop->sa_len);
2326         }
2327         PKTOPT_EXTHDRCPY(ip6po_hbh);
2328         PKTOPT_EXTHDRCPY(ip6po_dest1);
2329         PKTOPT_EXTHDRCPY(ip6po_dest2);
2330         PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2331         return (0);
2332
2333   bad:
2334         ip6_clearpktopts(dst, -1);
2335         return (ENOBUFS);
2336 }
2337 #undef PKTOPT_EXTHDRCPY
2338
2339 struct ip6_pktopts *
2340 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2341 {
2342         int error;
2343         struct ip6_pktopts *dst;
2344
2345         dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2346         if (dst == NULL)
2347                 return (NULL);
2348         ip6_initpktopts(dst);
2349
2350         if ((error = copypktopts(dst, src, canwait)) != 0) {
2351                 free(dst, M_IP6OPT);
2352                 return (NULL);
2353         }
2354
2355         return (dst);
2356 }
2357
2358 void
2359 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2360 {
2361         if (pktopt == NULL)
2362                 return;
2363
2364         ip6_clearpktopts(pktopt, -1);
2365
2366         free(pktopt, M_IP6OPT);
2367 }
2368
2369 /*
2370  * Set IPv6 outgoing packet options based on advanced API.
2371  */
2372 int
2373 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2374     struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
2375 {
2376         struct cmsghdr *cm = 0;
2377
2378         if (control == NULL || opt == NULL)
2379                 return (EINVAL);
2380
2381         ip6_initpktopts(opt);
2382         if (stickyopt) {
2383                 int error;
2384
2385                 /*
2386                  * If stickyopt is provided, make a local copy of the options
2387                  * for this particular packet, then override them by ancillary
2388                  * objects.
2389                  * XXX: copypktopts() does not copy the cached route to a next
2390                  * hop (if any).  This is not very good in terms of efficiency,
2391                  * but we can allow this since this option should be rarely
2392                  * used.
2393                  */
2394                 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2395                         return (error);
2396         }
2397
2398         /*
2399          * XXX: Currently, we assume all the optional information is stored
2400          * in a single mbuf.
2401          */
2402         if (control->m_next)
2403                 return (EINVAL);
2404
2405         for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2406             control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2407                 int error;
2408
2409                 if (control->m_len < CMSG_LEN(0))
2410                         return (EINVAL);
2411
2412                 cm = mtod(control, struct cmsghdr *);
2413                 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2414                         return (EINVAL);
2415                 if (cm->cmsg_level != IPPROTO_IPV6)
2416                         continue;
2417
2418                 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2419                     cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2420                 if (error)
2421                         return (error);
2422         }
2423
2424         return (0);
2425 }
2426
2427 /*
2428  * Set a particular packet option, as a sticky option or an ancillary data
2429  * item.  "len" can be 0 only when it's a sticky option.
2430  * We have 4 cases of combination of "sticky" and "cmsg":
2431  * "sticky=0, cmsg=0": impossible
2432  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2433  * "sticky=1, cmsg=0": RFC3542 socket option
2434  * "sticky=1, cmsg=1": RFC2292 socket option
2435  */
2436 static int
2437 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2438     struct ucred *cred, int sticky, int cmsg, int uproto)
2439 {
2440         int minmtupolicy, preftemp;
2441         int error;
2442
2443         if (!sticky && !cmsg) {
2444 #ifdef DIAGNOSTIC
2445                 printf("ip6_setpktopt: impossible case\n");
2446 #endif
2447                 return (EINVAL);
2448         }
2449
2450         /*
2451          * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2452          * not be specified in the context of RFC3542.  Conversely,
2453          * RFC3542 types should not be specified in the context of RFC2292.
2454          */
2455         if (!cmsg) {
2456                 switch (optname) {
2457                 case IPV6_2292PKTINFO:
2458                 case IPV6_2292HOPLIMIT:
2459                 case IPV6_2292NEXTHOP:
2460                 case IPV6_2292HOPOPTS:
2461                 case IPV6_2292DSTOPTS:
2462                 case IPV6_2292RTHDR:
2463                 case IPV6_2292PKTOPTIONS:
2464                         return (ENOPROTOOPT);
2465                 }
2466         }
2467         if (sticky && cmsg) {
2468                 switch (optname) {
2469                 case IPV6_PKTINFO:
2470                 case IPV6_HOPLIMIT:
2471                 case IPV6_NEXTHOP:
2472                 case IPV6_HOPOPTS:
2473                 case IPV6_DSTOPTS:
2474                 case IPV6_RTHDRDSTOPTS:
2475                 case IPV6_RTHDR:
2476                 case IPV6_USE_MIN_MTU:
2477                 case IPV6_DONTFRAG:
2478                 case IPV6_TCLASS:
2479                 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2480                         return (ENOPROTOOPT);
2481                 }
2482         }
2483
2484         switch (optname) {
2485         case IPV6_2292PKTINFO:
2486         case IPV6_PKTINFO:
2487         {
2488                 struct ifnet *ifp = NULL;
2489                 struct in6_pktinfo *pktinfo;
2490
2491                 if (len != sizeof(struct in6_pktinfo))
2492                         return (EINVAL);
2493
2494                 pktinfo = (struct in6_pktinfo *)buf;
2495
2496                 /*
2497                  * An application can clear any sticky IPV6_PKTINFO option by
2498                  * doing a "regular" setsockopt with ipi6_addr being
2499                  * in6addr_any and ipi6_ifindex being zero.
2500                  * [RFC 3542, Section 6]
2501                  */
2502                 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2503                     pktinfo->ipi6_ifindex == 0 &&
2504                     IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2505                         ip6_clearpktopts(opt, optname);
2506                         break;
2507                 }
2508
2509                 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2510                     sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2511                         return (EINVAL);
2512                 }
2513
2514                 /* validate the interface index if specified. */
2515                 if (pktinfo->ipi6_ifindex > V_if_index ||
2516                     pktinfo->ipi6_ifindex < 0) {
2517                          return (ENXIO);
2518                 }
2519                 if (pktinfo->ipi6_ifindex) {
2520                         ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
2521                         if (ifp == NULL)
2522                                 return (ENXIO);
2523                 }
2524
2525                 /*
2526                  * We store the address anyway, and let in6_selectsrc()
2527                  * validate the specified address.  This is because ipi6_addr
2528                  * may not have enough information about its scope zone, and
2529                  * we may need additional information (such as outgoing
2530                  * interface or the scope zone of a destination address) to
2531                  * disambiguate the scope.
2532                  * XXX: the delay of the validation may confuse the
2533                  * application when it is used as a sticky option.
2534                  */
2535                 if (opt->ip6po_pktinfo == NULL) {
2536                         opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2537                             M_IP6OPT, M_NOWAIT);
2538                         if (opt->ip6po_pktinfo == NULL)
2539                                 return (ENOBUFS);
2540                 }
2541                 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2542                 break;
2543         }
2544
2545         case IPV6_2292HOPLIMIT:
2546         case IPV6_HOPLIMIT:
2547         {
2548                 int *hlimp;
2549
2550                 /*
2551                  * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2552                  * to simplify the ordering among hoplimit options.
2553                  */
2554                 if (optname == IPV6_HOPLIMIT && sticky)
2555                         return (ENOPROTOOPT);
2556
2557                 if (len != sizeof(int))
2558                         return (EINVAL);
2559                 hlimp = (int *)buf;
2560                 if (*hlimp < -1 || *hlimp > 255)
2561                         return (EINVAL);
2562
2563                 opt->ip6po_hlim = *hlimp;
2564                 break;
2565         }
2566
2567         case IPV6_TCLASS:
2568         {
2569                 int tclass;
2570
2571                 if (len != sizeof(int))
2572                         return (EINVAL);
2573                 tclass = *(int *)buf;
2574                 if (tclass < -1 || tclass > 255)
2575                         return (EINVAL);
2576
2577                 opt->ip6po_tclass = tclass;
2578                 break;
2579         }
2580
2581         case IPV6_2292NEXTHOP:
2582         case IPV6_NEXTHOP:
2583                 if (cred != NULL) {
2584                         error = priv_check_cred(cred,
2585                             PRIV_NETINET_SETHDROPTS, 0);
2586                         if (error)
2587                                 return (error);
2588                 }
2589
2590                 if (len == 0) { /* just remove the option */
2591                         ip6_clearpktopts(opt, IPV6_NEXTHOP);
2592                         break;
2593                 }
2594
2595                 /* check if cmsg_len is large enough for sa_len */
2596                 if (len < sizeof(struct sockaddr) || len < *buf)
2597                         return (EINVAL);
2598
2599                 switch (((struct sockaddr *)buf)->sa_family) {
2600                 case AF_INET6:
2601                 {
2602                         struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
2603                         int error;
2604
2605                         if (sa6->sin6_len != sizeof(struct sockaddr_in6))
2606                                 return (EINVAL);
2607
2608                         if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
2609                             IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
2610                                 return (EINVAL);
2611                         }
2612                         if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
2613                             != 0) {
2614                                 return (error);
2615                         }
2616                         break;
2617                 }
2618                 case AF_LINK:   /* should eventually be supported */
2619                 default:
2620                         return (EAFNOSUPPORT);
2621                 }
2622
2623                 /* turn off the previous option, then set the new option. */
2624                 ip6_clearpktopts(opt, IPV6_NEXTHOP);
2625                 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
2626                 if (opt->ip6po_nexthop == NULL)
2627                         return (ENOBUFS);
2628                 bcopy(buf, opt->ip6po_nexthop, *buf);
2629                 break;
2630
2631         case IPV6_2292HOPOPTS:
2632         case IPV6_HOPOPTS:
2633         {
2634                 struct ip6_hbh *hbh;
2635                 int hbhlen;
2636
2637                 /*
2638                  * XXX: We don't allow a non-privileged user to set ANY HbH
2639                  * options, since per-option restriction has too much
2640                  * overhead.
2641                  */
2642                 if (cred != NULL) {
2643                         error = priv_check_cred(cred,
2644                             PRIV_NETINET_SETHDROPTS, 0);
2645                         if (error)
2646                                 return (error);
2647                 }
2648
2649                 if (len == 0) {
2650                         ip6_clearpktopts(opt, IPV6_HOPOPTS);
2651                         break;  /* just remove the option */
2652                 }
2653
2654                 /* message length validation */
2655                 if (len < sizeof(struct ip6_hbh))
2656                         return (EINVAL);
2657                 hbh = (struct ip6_hbh *)buf;
2658                 hbhlen = (hbh->ip6h_len + 1) << 3;
2659                 if (len != hbhlen)
2660                         return (EINVAL);
2661
2662                 /* turn off the previous option, then set the new option. */
2663                 ip6_clearpktopts(opt, IPV6_HOPOPTS);
2664                 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
2665                 if (opt->ip6po_hbh == NULL)
2666                         return (ENOBUFS);
2667                 bcopy(hbh, opt->ip6po_hbh, hbhlen);
2668
2669                 break;
2670         }
2671
2672         case IPV6_2292DSTOPTS:
2673         case IPV6_DSTOPTS:
2674         case IPV6_RTHDRDSTOPTS:
2675         {
2676                 struct ip6_dest *dest, **newdest = NULL;
2677                 int destlen;
2678
2679                 if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
2680                         error = priv_check_cred(cred,
2681                             PRIV_NETINET_SETHDROPTS, 0);
2682                         if (error)
2683                                 return (error);
2684                 }
2685
2686                 if (len == 0) {
2687                         ip6_clearpktopts(opt, optname);
2688                         break;  /* just remove the option */
2689                 }
2690
2691                 /* message length validation */
2692                 if (len < sizeof(struct ip6_dest))
2693                         return (EINVAL);
2694                 dest = (struct ip6_dest *)buf;
2695                 destlen = (dest->ip6d_len + 1) << 3;
2696                 if (len != destlen)
2697                         return (EINVAL);
2698
2699                 /*
2700                  * Determine the position that the destination options header
2701                  * should be inserted; before or after the routing header.
2702                  */
2703                 switch (optname) {
2704                 case IPV6_2292DSTOPTS:
2705                         /*
2706                          * The old advacned API is ambiguous on this point.
2707                          * Our approach is to determine the position based
2708                          * according to the existence of a routing header.
2709                          * Note, however, that this depends on the order of the
2710                          * extension headers in the ancillary data; the 1st
2711                          * part of the destination options header must appear
2712                          * before the routing header in the ancillary data,
2713                          * too.
2714                          * RFC3542 solved the ambiguity by introducing
2715                          * separate ancillary data or option types.
2716                          */
2717                         if (opt->ip6po_rthdr == NULL)
2718                                 newdest = &opt->ip6po_dest1;
2719                         else
2720                                 newdest = &opt->ip6po_dest2;
2721                         break;
2722                 case IPV6_RTHDRDSTOPTS:
2723                         newdest = &opt->ip6po_dest1;
2724                         break;
2725                 case IPV6_DSTOPTS:
2726                         newdest = &opt->ip6po_dest2;
2727                         break;
2728                 }
2729
2730                 /* turn off the previous option, then set the new option. */
2731                 ip6_clearpktopts(opt, optname);
2732                 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
2733                 if (*newdest == NULL)
2734                         return (ENOBUFS);
2735                 bcopy(dest, *newdest, destlen);
2736
2737                 break;
2738         }
2739
2740         case IPV6_2292RTHDR:
2741         case IPV6_RTHDR:
2742         {
2743                 struct ip6_rthdr *rth;
2744                 int rthlen;
2745
2746                 if (len == 0) {
2747                         ip6_clearpktopts(opt, IPV6_RTHDR);
2748                         break;  /* just remove the option */
2749                 }
2750
2751                 /* message length validation */
2752                 if (len < sizeof(struct ip6_rthdr))
2753                         return (EINVAL);
2754                 rth = (struct ip6_rthdr *)buf;
2755                 rthlen = (rth->ip6r_len + 1) << 3;
2756                 if (len != rthlen)
2757                         return (EINVAL);
2758
2759                 switch (rth->ip6r_type) {
2760                 case IPV6_RTHDR_TYPE_0:
2761                         if (rth->ip6r_len == 0) /* must contain one addr */
2762                                 return (EINVAL);
2763                         if (rth->ip6r_len % 2) /* length must be even */
2764                                 return (EINVAL);
2765                         if (rth->ip6r_len / 2 != rth->ip6r_segleft)
2766                                 return (EINVAL);
2767                         break;
2768                 default:
2769                         return (EINVAL);        /* not supported */
2770                 }
2771
2772                 /* turn off the previous option */
2773                 ip6_clearpktopts(opt, IPV6_RTHDR);
2774                 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
2775                 if (opt->ip6po_rthdr == NULL)
2776                         return (ENOBUFS);
2777                 bcopy(rth, opt->ip6po_rthdr, rthlen);
2778
2779                 break;
2780         }
2781
2782         case IPV6_USE_MIN_MTU:
2783                 if (len != sizeof(int))
2784                         return (EINVAL);
2785                 minmtupolicy = *(int *)buf;
2786                 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
2787                     minmtupolicy != IP6PO_MINMTU_DISABLE &&
2788                     minmtupolicy != IP6PO_MINMTU_ALL) {
2789                         return (EINVAL);
2790                 }
2791                 opt->ip6po_minmtu = minmtupolicy;
2792                 break;
2793
2794         case IPV6_DONTFRAG:
2795                 if (len != sizeof(int))
2796                         return (EINVAL);
2797
2798                 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
2799                         /*
2800                          * we ignore this option for TCP sockets.
2801                          * (RFC3542 leaves this case unspecified.)
2802                          */
2803                         opt->ip6po_flags &= ~IP6PO_DONTFRAG;
2804                 } else
2805                         opt->ip6po_flags |= IP6PO_DONTFRAG;
2806                 break;
2807
2808         case IPV6_PREFER_TEMPADDR:
2809                 if (len != sizeof(int))
2810                         return (EINVAL);
2811                 preftemp = *(int *)buf;
2812                 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
2813                     preftemp != IP6PO_TEMPADDR_NOTPREFER &&
2814                     preftemp != IP6PO_TEMPADDR_PREFER) {
2815                         return (EINVAL);
2816                 }
2817                 opt->ip6po_prefer_tempaddr = preftemp;
2818                 break;
2819
2820         default:
2821                 return (ENOPROTOOPT);
2822         } /* end of switch */
2823
2824         return (0);
2825 }
2826
2827 /*
2828  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
2829  * packet to the input queue of a specified interface.  Note that this
2830  * calls the output routine of the loopback "driver", but with an interface
2831  * pointer that might NOT be &loif -- easier than replicating that code here.
2832  */
2833 void
2834 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
2835 {
2836         struct mbuf *copym;
2837         struct ip6_hdr *ip6;
2838
2839         copym = m_copy(m, 0, M_COPYALL);
2840         if (copym == NULL)
2841                 return;
2842
2843         /*
2844          * Make sure to deep-copy IPv6 header portion in case the data
2845          * is in an mbuf cluster, so that we can safely override the IPv6
2846          * header portion later.
2847          */
2848         if ((copym->m_flags & M_EXT) != 0 ||
2849             copym->m_len < sizeof(struct ip6_hdr)) {
2850                 copym = m_pullup(copym, sizeof(struct ip6_hdr));
2851                 if (copym == NULL)
2852                         return;
2853         }
2854
2855 #ifdef DIAGNOSTIC
2856         if (copym->m_len < sizeof(*ip6)) {
2857                 m_freem(copym);
2858                 return;
2859         }
2860 #endif
2861
2862         ip6 = mtod(copym, struct ip6_hdr *);
2863         /*
2864          * clear embedded scope identifiers if necessary.
2865          * in6_clearscope will touch the addresses only when necessary.
2866          */
2867         in6_clearscope(&ip6->ip6_src);
2868         in6_clearscope(&ip6->ip6_dst);
2869
2870         (void)if_simloop(ifp, copym, dst->sin6_family, 0);
2871 }
2872
2873 /*
2874  * Chop IPv6 header off from the payload.
2875  */
2876 static int
2877 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
2878 {
2879         struct mbuf *mh;
2880         struct ip6_hdr *ip6;
2881
2882         ip6 = mtod(m, struct ip6_hdr *);
2883         if (m->m_len > sizeof(*ip6)) {
2884                 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
2885                 if (mh == 0) {
2886                         m_freem(m);
2887                         return ENOBUFS;
2888                 }
2889                 M_MOVE_PKTHDR(mh, m);
2890                 MH_ALIGN(mh, sizeof(*ip6));
2891                 m->m_len -= sizeof(*ip6);
2892                 m->m_data += sizeof(*ip6);
2893                 mh->m_next = m;
2894                 m = mh;
2895                 m->m_len = sizeof(*ip6);
2896                 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
2897         }
2898         exthdrs->ip6e_ip6 = m;
2899         return 0;
2900 }
2901
2902 /*
2903  * Compute IPv6 extension header length.
2904  */
2905 int
2906 ip6_optlen(struct inpcb *in6p)
2907 {
2908         int len;
2909
2910         if (!in6p->in6p_outputopts)
2911                 return 0;
2912
2913         len = 0;
2914 #define elen(x) \
2915     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
2916
2917         len += elen(in6p->in6p_outputopts->ip6po_hbh);
2918         if (in6p->in6p_outputopts->ip6po_rthdr)
2919                 /* dest1 is valid with rthdr only */
2920                 len += elen(in6p->in6p_outputopts->ip6po_dest1);
2921         len += elen(in6p->in6p_outputopts->ip6po_rthdr);
2922         len += elen(in6p->in6p_outputopts->ip6po_dest2);
2923         return len;
2924 #undef elen
2925 }