]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet6/ip6_output.c
add -n option to suppress clearing the build tree and add -DNO_CLEAN
[FreeBSD/FreeBSD.git] / sys / netinet6 / ip6_output.c
1 /*-
2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the project nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
30  */
31
32 /*-
33  * Copyright (c) 1982, 1986, 1988, 1990, 1993
34  *      The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61  */
62
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68 #include "opt_ipsec.h"
69
70 #include <sys/param.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/errno.h>
75 #include <sys/priv.h>
76 #include <sys/proc.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/ucred.h>
81 #include <sys/vimage.h>
82
83 #include <net/if.h>
84 #include <net/netisr.h>
85 #include <net/route.h>
86 #include <net/pfil.h>
87
88 #include <netinet/in.h>
89 #include <netinet/in_var.h>
90 #include <netinet6/in6_var.h>
91 #include <netinet/ip6.h>
92 #include <netinet/icmp6.h>
93 #include <netinet6/ip6_var.h>
94 #include <netinet/in_pcb.h>
95 #include <netinet/tcp_var.h>
96 #include <netinet6/nd6.h>
97
98 #ifdef IPSEC
99 #include <netipsec/ipsec.h>
100 #include <netipsec/ipsec6.h>
101 #include <netipsec/key.h>
102 #include <netinet6/ip6_ipsec.h>
103 #endif /* IPSEC */
104
105 #include <netinet6/ip6protosw.h>
106 #include <netinet6/scope6_var.h>
107
108 static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "internet multicast options");
109
110 struct ip6_exthdrs {
111         struct mbuf *ip6e_ip6;
112         struct mbuf *ip6e_hbh;
113         struct mbuf *ip6e_dest1;
114         struct mbuf *ip6e_rthdr;
115         struct mbuf *ip6e_dest2;
116 };
117
118 static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
119                            struct ucred *, int));
120 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
121         struct socket *, struct sockopt *));
122 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
123 static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *,
124         struct ucred *, int, int, int));
125
126 static int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *);
127 static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **);
128 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
129 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
130         struct ip6_frag **));
131 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
132 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
133 static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
134         struct ifnet *, struct in6_addr *, u_long *, int *));
135 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
136
137
138 /*
139  * Make an extension header from option data.  hp is the source, and
140  * mp is the destination.
141  */
142 #define MAKE_EXTHDR(hp, mp)                                             \
143     do {                                                                \
144         if (hp) {                                                       \
145                 struct ip6_ext *eh = (struct ip6_ext *)(hp);            \
146                 error = ip6_copyexthdr((mp), (caddr_t)(hp),             \
147                     ((eh)->ip6e_len + 1) << 3);                         \
148                 if (error)                                              \
149                         goto freehdrs;                                  \
150         }                                                               \
151     } while (/*CONSTCOND*/ 0)
152
153 /*
154  * Form a chain of extension headers.
155  * m is the extension header mbuf
156  * mp is the previous mbuf in the chain
157  * p is the next header
158  * i is the type of option.
159  */
160 #define MAKE_CHAIN(m, mp, p, i)\
161     do {\
162         if (m) {\
163                 if (!hdrsplit) \
164                         panic("assumption failed: hdr not split"); \
165                 *mtod((m), u_char *) = *(p);\
166                 *(p) = (i);\
167                 p = mtod((m), u_char *);\
168                 (m)->m_next = (mp)->m_next;\
169                 (mp)->m_next = (m);\
170                 (mp) = (m);\
171         }\
172     } while (/*CONSTCOND*/ 0)
173
174 /*
175  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
176  * header (with pri, len, nxt, hlim, src, dst).
177  * This function may modify ver and hlim only.
178  * The mbuf chain containing the packet will be freed.
179  * The mbuf opt, if present, will not be freed.
180  *
181  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
182  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
183  * which is rt_rmx.rmx_mtu.
184  *
185  * ifpp - XXX: just for statistics
186  */
187 int
188 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
189     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
190     struct ifnet **ifpp, struct inpcb *inp)
191 {
192         INIT_VNET_NET(curvnet);
193         INIT_VNET_INET6(curvnet);
194         struct ip6_hdr *ip6, *mhip6;
195         struct ifnet *ifp, *origifp;
196         struct mbuf *m = m0;
197         struct mbuf *mprev = NULL;
198         int hlen, tlen, len, off;
199         struct route_in6 ip6route;
200         struct rtentry *rt = NULL;
201         struct sockaddr_in6 *dst, src_sa, dst_sa;
202         struct in6_addr odst;
203         int error = 0;
204         struct in6_ifaddr *ia = NULL;
205         u_long mtu;
206         int alwaysfrag, dontfrag;
207         u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
208         struct ip6_exthdrs exthdrs;
209         struct in6_addr finaldst, src0, dst0;
210         u_int32_t zone;
211         struct route_in6 *ro_pmtu = NULL;
212         int hdrsplit = 0;
213         int needipsec = 0;
214 #ifdef IPSEC
215         struct ipsec_output_state state;
216         struct ip6_rthdr *rh = NULL;
217         int needipsectun = 0;
218         int segleft_org = 0;
219         struct secpolicy *sp = NULL;
220 #endif /* IPSEC */
221
222         ip6 = mtod(m, struct ip6_hdr *);
223         if (ip6 == NULL) {
224                 printf ("ip6 is NULL");
225                 goto bad;
226         }
227
228         finaldst = ip6->ip6_dst;
229
230         bzero(&exthdrs, sizeof(exthdrs));
231
232         if (opt) {
233                 /* Hop-by-Hop options header */
234                 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
235                 /* Destination options header(1st part) */
236                 if (opt->ip6po_rthdr) {
237                         /*
238                          * Destination options header(1st part)
239                          * This only makes sense with a routing header.
240                          * See Section 9.2 of RFC 3542.
241                          * Disabling this part just for MIP6 convenience is
242                          * a bad idea.  We need to think carefully about a
243                          * way to make the advanced API coexist with MIP6
244                          * options, which might automatically be inserted in
245                          * the kernel.
246                          */
247                         MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
248                 }
249                 /* Routing header */
250                 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
251                 /* Destination options header(2nd part) */
252                 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
253         }
254
255         /*
256          * IPSec checking which handles several cases.
257          * FAST IPSEC: We re-injected the packet.
258          */
259 #ifdef IPSEC
260         switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
261         {
262         case 1:                 /* Bad packet */
263                 goto freehdrs;
264         case -1:                /* Do IPSec */
265                 needipsec = 1;
266         case 0:                 /* No IPSec */
267         default:
268                 break;
269         }
270 #endif /* IPSEC */
271
272         /*
273          * Calculate the total length of the extension header chain.
274          * Keep the length of the unfragmentable part for fragmentation.
275          */
276         optlen = 0;
277         if (exthdrs.ip6e_hbh)
278                 optlen += exthdrs.ip6e_hbh->m_len;
279         if (exthdrs.ip6e_dest1)
280                 optlen += exthdrs.ip6e_dest1->m_len;
281         if (exthdrs.ip6e_rthdr)
282                 optlen += exthdrs.ip6e_rthdr->m_len;
283         unfragpartlen = optlen + sizeof(struct ip6_hdr);
284
285         /* NOTE: we don't add AH/ESP length here. do that later. */
286         if (exthdrs.ip6e_dest2)
287                 optlen += exthdrs.ip6e_dest2->m_len;
288
289         /*
290          * If we need IPsec, or there is at least one extension header,
291          * separate IP6 header from the payload.
292          */
293         if ((needipsec || optlen) && !hdrsplit) {
294                 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
295                         m = NULL;
296                         goto freehdrs;
297                 }
298                 m = exthdrs.ip6e_ip6;
299                 hdrsplit++;
300         }
301
302         /* adjust pointer */
303         ip6 = mtod(m, struct ip6_hdr *);
304
305         /* adjust mbuf packet header length */
306         m->m_pkthdr.len += optlen;
307         plen = m->m_pkthdr.len - sizeof(*ip6);
308
309         /* If this is a jumbo payload, insert a jumbo payload option. */
310         if (plen > IPV6_MAXPACKET) {
311                 if (!hdrsplit) {
312                         if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
313                                 m = NULL;
314                                 goto freehdrs;
315                         }
316                         m = exthdrs.ip6e_ip6;
317                         hdrsplit++;
318                 }
319                 /* adjust pointer */
320                 ip6 = mtod(m, struct ip6_hdr *);
321                 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
322                         goto freehdrs;
323                 ip6->ip6_plen = 0;
324         } else
325                 ip6->ip6_plen = htons(plen);
326
327         /*
328          * Concatenate headers and fill in next header fields.
329          * Here we have, on "m"
330          *      IPv6 payload
331          * and we insert headers accordingly.  Finally, we should be getting:
332          *      IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
333          *
334          * during the header composing process, "m" points to IPv6 header.
335          * "mprev" points to an extension header prior to esp.
336          */
337         u_char *nexthdrp = &ip6->ip6_nxt;
338         mprev = m;
339
340         /*
341          * we treat dest2 specially.  this makes IPsec processing
342          * much easier.  the goal here is to make mprev point the
343          * mbuf prior to dest2.
344          *
345          * result: IPv6 dest2 payload
346          * m and mprev will point to IPv6 header.
347          */
348         if (exthdrs.ip6e_dest2) {
349                 if (!hdrsplit)
350                         panic("assumption failed: hdr not split");
351                 exthdrs.ip6e_dest2->m_next = m->m_next;
352                 m->m_next = exthdrs.ip6e_dest2;
353                 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
354                 ip6->ip6_nxt = IPPROTO_DSTOPTS;
355         }
356
357         /*
358          * result: IPv6 hbh dest1 rthdr dest2 payload
359          * m will point to IPv6 header.  mprev will point to the
360          * extension header prior to dest2 (rthdr in the above case).
361          */
362         MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
363         MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
364                    IPPROTO_DSTOPTS);
365         MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
366                    IPPROTO_ROUTING);
367
368 #ifdef IPSEC
369         if (!needipsec)
370                 goto skip_ipsec2;
371
372         /*
373          * pointers after IPsec headers are not valid any more.
374          * other pointers need a great care too.
375          * (IPsec routines should not mangle mbufs prior to AH/ESP)
376          */
377         exthdrs.ip6e_dest2 = NULL;
378
379         if (exthdrs.ip6e_rthdr) {
380                 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
381                 segleft_org = rh->ip6r_segleft;
382                 rh->ip6r_segleft = 0;
383         }
384
385         bzero(&state, sizeof(state));
386         state.m = m;
387         error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
388                                     &needipsectun);
389         m = state.m;
390         if (error == EJUSTRETURN) {
391                 /*
392                  * We had a SP with a level of 'use' and no SA. We
393                  * will just continue to process the packet without
394                  * IPsec processing.
395                  */
396                 ;
397         } else if (error) {
398                 /* mbuf is already reclaimed in ipsec6_output_trans. */
399                 m = NULL;
400                 switch (error) {
401                 case EHOSTUNREACH:
402                 case ENETUNREACH:
403                 case EMSGSIZE:
404                 case ENOBUFS:
405                 case ENOMEM:
406                         break;
407                 default:
408                         printf("[%s:%d] (ipsec): error code %d\n",
409                             __func__, __LINE__, error);
410                         /* FALLTHROUGH */
411                 case ENOENT:
412                         /* don't show these error codes to the user */
413                         error = 0;
414                         break;
415                 }
416                 goto bad;
417         } else if (!needipsectun) {
418                 /*
419                  * In the FAST IPSec case we have already
420                  * re-injected the packet and it has been freed
421                  * by the ipsec_done() function.  So, just clean
422                  * up after ourselves.
423                  */
424                 m = NULL;
425                 goto done;
426         }
427         if (exthdrs.ip6e_rthdr) {
428                 /* ah6_output doesn't modify mbuf chain */
429                 rh->ip6r_segleft = segleft_org;
430         }
431 skip_ipsec2:;
432 #endif /* IPSEC */
433
434         /*
435          * If there is a routing header, replace the destination address field
436          * with the first hop of the routing header.
437          */
438         if (exthdrs.ip6e_rthdr) {
439                 struct ip6_rthdr *rh =
440                         (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
441                                                   struct ip6_rthdr *));
442                 struct ip6_rthdr0 *rh0;
443                 struct in6_addr *addr;
444                 struct sockaddr_in6 sa;
445
446                 switch (rh->ip6r_type) {
447                 case IPV6_RTHDR_TYPE_0:
448                          rh0 = (struct ip6_rthdr0 *)rh;
449                          addr = (struct in6_addr *)(rh0 + 1);
450
451                          /*
452                           * construct a sockaddr_in6 form of
453                           * the first hop.
454                           *
455                           * XXX: we may not have enough
456                           * information about its scope zone;
457                           * there is no standard API to pass
458                           * the information from the
459                           * application.
460                           */
461                          bzero(&sa, sizeof(sa));
462                          sa.sin6_family = AF_INET6;
463                          sa.sin6_len = sizeof(sa);
464                          sa.sin6_addr = addr[0];
465                          if ((error = sa6_embedscope(&sa,
466                              V_ip6_use_defzone)) != 0) {
467                                  goto bad;
468                          }
469                          ip6->ip6_dst = sa.sin6_addr;
470                          bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
471                              * (rh0->ip6r0_segleft - 1));
472                          addr[rh0->ip6r0_segleft - 1] = finaldst;
473                          /* XXX */
474                          in6_clearscope(addr + rh0->ip6r0_segleft - 1);
475                          break;
476                 default:        /* is it possible? */
477                          error = EINVAL;
478                          goto bad;
479                 }
480         }
481
482         /* Source address validation */
483         if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
484             (flags & IPV6_UNSPECSRC) == 0) {
485                 error = EOPNOTSUPP;
486                 V_ip6stat.ip6s_badscope++;
487                 goto bad;
488         }
489         if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
490                 error = EOPNOTSUPP;
491                 V_ip6stat.ip6s_badscope++;
492                 goto bad;
493         }
494
495         V_ip6stat.ip6s_localout++;
496
497         /*
498          * Route packet.
499          */
500         if (ro == 0) {
501                 ro = &ip6route;
502                 bzero((caddr_t)ro, sizeof(*ro));
503         }
504         ro_pmtu = ro;
505         if (opt && opt->ip6po_rthdr)
506                 ro = &opt->ip6po_route;
507         dst = (struct sockaddr_in6 *)&ro->ro_dst;
508
509 again:
510         /*
511          * if specified, try to fill in the traffic class field.
512          * do not override if a non-zero value is already set.
513          * we check the diffserv field and the ecn field separately.
514          */
515         if (opt && opt->ip6po_tclass >= 0) {
516                 int mask = 0;
517
518                 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
519                         mask |= 0xfc;
520                 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
521                         mask |= 0x03;
522                 if (mask != 0)
523                         ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
524         }
525
526         /* fill in or override the hop limit field, if necessary. */
527         if (opt && opt->ip6po_hlim != -1)
528                 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
529         else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
530                 if (im6o != NULL)
531                         ip6->ip6_hlim = im6o->im6o_multicast_hlim;
532                 else
533                         ip6->ip6_hlim = V_ip6_defmcasthlim;
534         }
535
536 #ifdef IPSEC
537         /*
538          * We may re-inject packets into the stack here.
539          */
540         if (needipsec && needipsectun) {
541                 struct ipsec_output_state state;
542
543                 /*
544                  * All the extension headers will become inaccessible
545                  * (since they can be encrypted).
546                  * Don't panic, we need no more updates to extension headers
547                  * on inner IPv6 packet (since they are now encapsulated).
548                  *
549                  * IPv6 [ESP|AH] IPv6 [extension headers] payload
550                  */
551                 bzero(&exthdrs, sizeof(exthdrs));
552                 exthdrs.ip6e_ip6 = m;
553
554                 bzero(&state, sizeof(state));
555                 state.m = m;
556                 state.ro = (struct route *)ro;
557                 state.dst = (struct sockaddr *)dst;
558
559                 error = ipsec6_output_tunnel(&state, sp, flags);
560
561                 m = state.m;
562                 ro = (struct route_in6 *)state.ro;
563                 dst = (struct sockaddr_in6 *)state.dst;
564                 if (error == EJUSTRETURN) {
565                         /*
566                          * We had a SP with a level of 'use' and no SA. We
567                          * will just continue to process the packet without
568                          * IPsec processing.
569                          */
570                         ;
571                 } else if (error) {
572                         /* mbuf is already reclaimed in ipsec6_output_tunnel. */
573                         m0 = m = NULL;
574                         m = NULL;
575                         switch (error) {
576                         case EHOSTUNREACH:
577                         case ENETUNREACH:
578                         case EMSGSIZE:
579                         case ENOBUFS:
580                         case ENOMEM:
581                                 break;
582                         default:
583                                 printf("[%s:%d] (ipsec): error code %d\n",
584                                     __func__, __LINE__, error);
585                                 /* FALLTHROUGH */
586                         case ENOENT:
587                                 /* don't show these error codes to the user */
588                                 error = 0;
589                                 break;
590                         }
591                         goto bad;
592                 } else {
593                         /*
594                          * In the FAST IPSec case we have already
595                          * re-injected the packet and it has been freed
596                          * by the ipsec_done() function.  So, just clean
597                          * up after ourselves.
598                          */
599                         m = NULL;
600                         goto done;
601                 }
602
603                 exthdrs.ip6e_ip6 = m;
604         }
605 #endif /* IPSEC */
606
607         /* adjust pointer */
608         ip6 = mtod(m, struct ip6_hdr *);
609
610         bzero(&dst_sa, sizeof(dst_sa));
611         dst_sa.sin6_family = AF_INET6;
612         dst_sa.sin6_len = sizeof(dst_sa);
613         dst_sa.sin6_addr = ip6->ip6_dst;
614         if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
615             &ifp, &rt, 0)) != 0) {
616                 switch (error) {
617                 case EHOSTUNREACH:
618                         V_ip6stat.ip6s_noroute++;
619                         break;
620                 case EADDRNOTAVAIL:
621                 default:
622                         break; /* XXX statistics? */
623                 }
624                 if (ifp != NULL)
625                         in6_ifstat_inc(ifp, ifs6_out_discard);
626                 goto bad;
627         }
628         if (rt == NULL) {
629                 /*
630                  * If in6_selectroute() does not return a route entry,
631                  * dst may not have been updated.
632                  */
633                 *dst = dst_sa;  /* XXX */
634         }
635
636         /*
637          * then rt (for unicast) and ifp must be non-NULL valid values.
638          */
639         if ((flags & IPV6_FORWARDING) == 0) {
640                 /* XXX: the FORWARDING flag can be set for mrouting. */
641                 in6_ifstat_inc(ifp, ifs6_out_request);
642         }
643         if (rt != NULL) {
644                 ia = (struct in6_ifaddr *)(rt->rt_ifa);
645                 rt->rt_use++;
646         }
647
648         /*
649          * The outgoing interface must be in the zone of source and
650          * destination addresses.  We should use ia_ifp to support the
651          * case of sending packets to an address of our own.
652          */
653         if (ia != NULL && ia->ia_ifp)
654                 origifp = ia->ia_ifp;
655         else
656                 origifp = ifp;
657
658         src0 = ip6->ip6_src;
659         if (in6_setscope(&src0, origifp, &zone))
660                 goto badscope;
661         bzero(&src_sa, sizeof(src_sa));
662         src_sa.sin6_family = AF_INET6;
663         src_sa.sin6_len = sizeof(src_sa);
664         src_sa.sin6_addr = ip6->ip6_src;
665         if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
666                 goto badscope;
667
668         dst0 = ip6->ip6_dst;
669         if (in6_setscope(&dst0, origifp, &zone))
670                 goto badscope;
671         /* re-initialize to be sure */
672         bzero(&dst_sa, sizeof(dst_sa));
673         dst_sa.sin6_family = AF_INET6;
674         dst_sa.sin6_len = sizeof(dst_sa);
675         dst_sa.sin6_addr = ip6->ip6_dst;
676         if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
677                 goto badscope;
678         }
679
680         /* scope check is done. */
681         goto routefound;
682
683   badscope:
684         V_ip6stat.ip6s_badscope++;
685         in6_ifstat_inc(origifp, ifs6_out_discard);
686         if (error == 0)
687                 error = EHOSTUNREACH; /* XXX */
688         goto bad;
689
690   routefound:
691         if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
692                 if (opt && opt->ip6po_nextroute.ro_rt) {
693                         /*
694                          * The nexthop is explicitly specified by the
695                          * application.  We assume the next hop is an IPv6
696                          * address.
697                          */
698                         dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
699                 }
700                 else if ((rt->rt_flags & RTF_GATEWAY))
701                         dst = (struct sockaddr_in6 *)rt->rt_gateway;
702         }
703
704         if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
705                 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
706         } else {
707                 struct  in6_multi *in6m;
708
709                 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
710
711                 in6_ifstat_inc(ifp, ifs6_out_mcast);
712
713                 /*
714                  * Confirm that the outgoing interface supports multicast.
715                  */
716                 if (!(ifp->if_flags & IFF_MULTICAST)) {
717                         V_ip6stat.ip6s_noroute++;
718                         in6_ifstat_inc(ifp, ifs6_out_discard);
719                         error = ENETUNREACH;
720                         goto bad;
721                 }
722                 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
723                 if (in6m != NULL &&
724                    (im6o == NULL || im6o->im6o_multicast_loop)) {
725                         /*
726                          * If we belong to the destination multicast group
727                          * on the outgoing interface, and the caller did not
728                          * forbid loopback, loop back a copy.
729                          */
730                         ip6_mloopback(ifp, m, dst);
731                 } else {
732                         /*
733                          * If we are acting as a multicast router, perform
734                          * multicast forwarding as if the packet had just
735                          * arrived on the interface to which we are about
736                          * to send.  The multicast forwarding function
737                          * recursively calls this function, using the
738                          * IPV6_FORWARDING flag to prevent infinite recursion.
739                          *
740                          * Multicasts that are looped back by ip6_mloopback(),
741                          * above, will be forwarded by the ip6_input() routine,
742                          * if necessary.
743                          */
744                         if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
745                                 /*
746                                  * XXX: ip6_mforward expects that rcvif is NULL
747                                  * when it is called from the originating path.
748                                  * However, it is not always the case, since
749                                  * some versions of MGETHDR() does not
750                                  * initialize the field.
751                                  */
752                                 m->m_pkthdr.rcvif = NULL;
753                                 if (ip6_mforward(ip6, ifp, m) != 0) {
754                                         m_freem(m);
755                                         goto done;
756                                 }
757                         }
758                 }
759                 /*
760                  * Multicasts with a hoplimit of zero may be looped back,
761                  * above, but must not be transmitted on a network.
762                  * Also, multicasts addressed to the loopback interface
763                  * are not sent -- the above call to ip6_mloopback() will
764                  * loop back a copy if this host actually belongs to the
765                  * destination group on the loopback interface.
766                  */
767                 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
768                     IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
769                         m_freem(m);
770                         goto done;
771                 }
772         }
773
774         /*
775          * Fill the outgoing inteface to tell the upper layer
776          * to increment per-interface statistics.
777          */
778         if (ifpp)
779                 *ifpp = ifp;
780
781         /* Determine path MTU. */
782         if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
783             &alwaysfrag)) != 0)
784                 goto bad;
785
786         /*
787          * The caller of this function may specify to use the minimum MTU
788          * in some cases.
789          * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
790          * setting.  The logic is a bit complicated; by default, unicast
791          * packets will follow path MTU while multicast packets will be sent at
792          * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
793          * including unicast ones will be sent at the minimum MTU.  Multicast
794          * packets will always be sent at the minimum MTU unless
795          * IP6PO_MINMTU_DISABLE is explicitly specified.
796          * See RFC 3542 for more details.
797          */
798         if (mtu > IPV6_MMTU) {
799                 if ((flags & IPV6_MINMTU))
800                         mtu = IPV6_MMTU;
801                 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
802                         mtu = IPV6_MMTU;
803                 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
804                          (opt == NULL ||
805                           opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
806                         mtu = IPV6_MMTU;
807                 }
808         }
809
810         /*
811          * clear embedded scope identifiers if necessary.
812          * in6_clearscope will touch the addresses only when necessary.
813          */
814         in6_clearscope(&ip6->ip6_src);
815         in6_clearscope(&ip6->ip6_dst);
816
817         /*
818          * If the outgoing packet contains a hop-by-hop options header,
819          * it must be examined and processed even by the source node.
820          * (RFC 2460, section 4.)
821          */
822         if (exthdrs.ip6e_hbh) {
823                 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
824                 u_int32_t dummy; /* XXX unused */
825                 u_int32_t plen = 0; /* XXX: ip6_process will check the value */
826
827 #ifdef DIAGNOSTIC
828                 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
829                         panic("ip6e_hbh is not continuous");
830 #endif
831                 /*
832                  *  XXX: if we have to send an ICMPv6 error to the sender,
833                  *       we need the M_LOOP flag since icmp6_error() expects
834                  *       the IPv6 and the hop-by-hop options header are
835                  *       continuous unless the flag is set.
836                  */
837                 m->m_flags |= M_LOOP;
838                 m->m_pkthdr.rcvif = ifp;
839                 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
840                     ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
841                     &dummy, &plen) < 0) {
842                         /* m was already freed at this point */
843                         error = EINVAL;/* better error? */
844                         goto done;
845                 }
846                 m->m_flags &= ~M_LOOP; /* XXX */
847                 m->m_pkthdr.rcvif = NULL;
848         }
849
850         /* Jump over all PFIL processing if hooks are not active. */
851         if (!PFIL_HOOKED(&inet6_pfil_hook))
852                 goto passout;
853
854         odst = ip6->ip6_dst;
855         /* Run through list of hooks for output packets. */
856         error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
857         if (error != 0 || m == NULL)
858                 goto done;
859         ip6 = mtod(m, struct ip6_hdr *);
860
861         /* See if destination IP address was changed by packet filter. */
862         if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
863                 m->m_flags |= M_SKIP_FIREWALL;
864                 /* If destination is now ourself drop to ip6_input(). */
865                 if (in6_localaddr(&ip6->ip6_dst)) {
866                         if (m->m_pkthdr.rcvif == NULL)
867                                 m->m_pkthdr.rcvif = V_loif;
868                         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
869                                 m->m_pkthdr.csum_flags |=
870                                     CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
871                                 m->m_pkthdr.csum_data = 0xffff;
872                         }
873                         m->m_pkthdr.csum_flags |=
874                             CSUM_IP_CHECKED | CSUM_IP_VALID;
875                         error = netisr_queue(NETISR_IPV6, m);
876                         goto done;
877                 } else
878                         goto again;     /* Redo the routing table lookup. */
879         }
880
881         /* XXX: IPFIREWALL_FORWARD */
882
883 passout:
884         /*
885          * Send the packet to the outgoing interface.
886          * If necessary, do IPv6 fragmentation before sending.
887          *
888          * the logic here is rather complex:
889          * 1: normal case (dontfrag == 0, alwaysfrag == 0)
890          * 1-a: send as is if tlen <= path mtu
891          * 1-b: fragment if tlen > path mtu
892          *
893          * 2: if user asks us not to fragment (dontfrag == 1)
894          * 2-a: send as is if tlen <= interface mtu
895          * 2-b: error if tlen > interface mtu
896          *
897          * 3: if we always need to attach fragment header (alwaysfrag == 1)
898          *      always fragment
899          *
900          * 4: if dontfrag == 1 && alwaysfrag == 1
901          *      error, as we cannot handle this conflicting request
902          */
903         tlen = m->m_pkthdr.len;
904
905         if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
906                 dontfrag = 1;
907         else
908                 dontfrag = 0;
909         if (dontfrag && alwaysfrag) {   /* case 4 */
910                 /* conflicting request - can't transmit */
911                 error = EMSGSIZE;
912                 goto bad;
913         }
914         if (dontfrag && tlen > IN6_LINKMTU(ifp)) {      /* case 2-b */
915                 /*
916                  * Even if the DONTFRAG option is specified, we cannot send the
917                  * packet when the data length is larger than the MTU of the
918                  * outgoing interface.
919                  * Notify the error by sending IPV6_PATHMTU ancillary data as
920                  * well as returning an error code (the latter is not described
921                  * in the API spec.)
922                  */
923                 u_int32_t mtu32;
924                 struct ip6ctlparam ip6cp;
925
926                 mtu32 = (u_int32_t)mtu;
927                 bzero(&ip6cp, sizeof(ip6cp));
928                 ip6cp.ip6c_cmdarg = (void *)&mtu32;
929                 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
930                     (void *)&ip6cp);
931
932                 error = EMSGSIZE;
933                 goto bad;
934         }
935
936         /*
937          * transmit packet without fragmentation
938          */
939         if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
940                 struct in6_ifaddr *ia6;
941
942                 ip6 = mtod(m, struct ip6_hdr *);
943                 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
944                 if (ia6) {
945                         /* Record statistics for this interface address. */
946                         ia6->ia_ifa.if_opackets++;
947                         ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
948                 }
949                 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
950                 goto done;
951         }
952
953         /*
954          * try to fragment the packet.  case 1-b and 3
955          */
956         if (mtu < IPV6_MMTU) {
957                 /* path MTU cannot be less than IPV6_MMTU */
958                 error = EMSGSIZE;
959                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
960                 goto bad;
961         } else if (ip6->ip6_plen == 0) {
962                 /* jumbo payload cannot be fragmented */
963                 error = EMSGSIZE;
964                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
965                 goto bad;
966         } else {
967                 struct mbuf **mnext, *m_frgpart;
968                 struct ip6_frag *ip6f;
969                 u_int32_t id = htonl(ip6_randomid());
970                 u_char nextproto;
971
972                 int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
973
974                 /*
975                  * Too large for the destination or interface;
976                  * fragment if possible.
977                  * Must be able to put at least 8 bytes per fragment.
978                  */
979                 hlen = unfragpartlen;
980                 if (mtu > IPV6_MAXPACKET)
981                         mtu = IPV6_MAXPACKET;
982
983                 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
984                 if (len < 8) {
985                         error = EMSGSIZE;
986                         in6_ifstat_inc(ifp, ifs6_out_fragfail);
987                         goto bad;
988                 }
989
990                 /*
991                  * Verify that we have any chance at all of being able to queue
992                  *      the packet or packet fragments
993                  */
994                 if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
995                     < tlen  /* - hlen */)) {
996                         error = ENOBUFS;
997                         V_ip6stat.ip6s_odropped++;
998                         goto bad;
999                 }
1000
1001                 mnext = &m->m_nextpkt;
1002
1003                 /*
1004                  * Change the next header field of the last header in the
1005                  * unfragmentable part.
1006                  */
1007                 if (exthdrs.ip6e_rthdr) {
1008                         nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1009                         *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1010                 } else if (exthdrs.ip6e_dest1) {
1011                         nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1012                         *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1013                 } else if (exthdrs.ip6e_hbh) {
1014                         nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1015                         *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1016                 } else {
1017                         nextproto = ip6->ip6_nxt;
1018                         ip6->ip6_nxt = IPPROTO_FRAGMENT;
1019                 }
1020
1021                 /*
1022                  * Loop through length of segment after first fragment,
1023                  * make new header and copy data of each part and link onto
1024                  * chain.
1025                  */
1026                 m0 = m;
1027                 for (off = hlen; off < tlen; off += len) {
1028                         MGETHDR(m, M_DONTWAIT, MT_HEADER);
1029                         if (!m) {
1030                                 error = ENOBUFS;
1031                                 V_ip6stat.ip6s_odropped++;
1032                                 goto sendorfree;
1033                         }
1034                         m->m_pkthdr.rcvif = NULL;
1035                         m->m_flags = m0->m_flags & M_COPYFLAGS;
1036                         *mnext = m;
1037                         mnext = &m->m_nextpkt;
1038                         m->m_data += max_linkhdr;
1039                         mhip6 = mtod(m, struct ip6_hdr *);
1040                         *mhip6 = *ip6;
1041                         m->m_len = sizeof(*mhip6);
1042                         error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1043                         if (error) {
1044                                 V_ip6stat.ip6s_odropped++;
1045                                 goto sendorfree;
1046                         }
1047                         ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1048                         if (off + len >= tlen)
1049                                 len = tlen - off;
1050                         else
1051                                 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1052                         mhip6->ip6_plen = htons((u_short)(len + hlen +
1053                             sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1054                         if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1055                                 error = ENOBUFS;
1056                                 V_ip6stat.ip6s_odropped++;
1057                                 goto sendorfree;
1058                         }
1059                         m_cat(m, m_frgpart);
1060                         m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1061                         m->m_pkthdr.rcvif = NULL;
1062                         ip6f->ip6f_reserved = 0;
1063                         ip6f->ip6f_ident = id;
1064                         ip6f->ip6f_nxt = nextproto;
1065                         V_ip6stat.ip6s_ofragments++;
1066                         in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1067                 }
1068
1069                 in6_ifstat_inc(ifp, ifs6_out_fragok);
1070         }
1071
1072         /*
1073          * Remove leading garbages.
1074          */
1075 sendorfree:
1076         m = m0->m_nextpkt;
1077         m0->m_nextpkt = 0;
1078         m_freem(m0);
1079         for (m0 = m; m; m = m0) {
1080                 m0 = m->m_nextpkt;
1081                 m->m_nextpkt = 0;
1082                 if (error == 0) {
1083                         /* Record statistics for this interface address. */
1084                         if (ia) {
1085                                 ia->ia_ifa.if_opackets++;
1086                                 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1087                         }
1088                         error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1089                 } else
1090                         m_freem(m);
1091         }
1092
1093         if (error == 0)
1094                 V_ip6stat.ip6s_fragmented++;
1095
1096 done:
1097         if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1098                 RTFREE(ro->ro_rt);
1099         } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1100                 RTFREE(ro_pmtu->ro_rt);
1101         }
1102 #ifdef IPSEC
1103         if (sp != NULL)
1104                 KEY_FREESP(&sp);
1105 #endif
1106
1107         return (error);
1108
1109 freehdrs:
1110         m_freem(exthdrs.ip6e_hbh);      /* m_freem will check if mbuf is 0 */
1111         m_freem(exthdrs.ip6e_dest1);
1112         m_freem(exthdrs.ip6e_rthdr);
1113         m_freem(exthdrs.ip6e_dest2);
1114         /* FALLTHROUGH */
1115 bad:
1116         if (m)
1117                 m_freem(m);
1118         goto done;
1119 }
1120
1121 static int
1122 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1123 {
1124         struct mbuf *m;
1125
1126         if (hlen > MCLBYTES)
1127                 return (ENOBUFS); /* XXX */
1128
1129         MGET(m, M_DONTWAIT, MT_DATA);
1130         if (!m)
1131                 return (ENOBUFS);
1132
1133         if (hlen > MLEN) {
1134                 MCLGET(m, M_DONTWAIT);
1135                 if ((m->m_flags & M_EXT) == 0) {
1136                         m_free(m);
1137                         return (ENOBUFS);
1138                 }
1139         }
1140         m->m_len = hlen;
1141         if (hdr)
1142                 bcopy(hdr, mtod(m, caddr_t), hlen);
1143
1144         *mp = m;
1145         return (0);
1146 }
1147
1148 /*
1149  * Insert jumbo payload option.
1150  */
1151 static int
1152 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1153 {
1154         struct mbuf *mopt;
1155         u_char *optbuf;
1156         u_int32_t v;
1157
1158 #define JUMBOOPTLEN     8       /* length of jumbo payload option and padding */
1159
1160         /*
1161          * If there is no hop-by-hop options header, allocate new one.
1162          * If there is one but it doesn't have enough space to store the
1163          * jumbo payload option, allocate a cluster to store the whole options.
1164          * Otherwise, use it to store the options.
1165          */
1166         if (exthdrs->ip6e_hbh == 0) {
1167                 MGET(mopt, M_DONTWAIT, MT_DATA);
1168                 if (mopt == 0)
1169                         return (ENOBUFS);
1170                 mopt->m_len = JUMBOOPTLEN;
1171                 optbuf = mtod(mopt, u_char *);
1172                 optbuf[1] = 0;  /* = ((JUMBOOPTLEN) >> 3) - 1 */
1173                 exthdrs->ip6e_hbh = mopt;
1174         } else {
1175                 struct ip6_hbh *hbh;
1176
1177                 mopt = exthdrs->ip6e_hbh;
1178                 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1179                         /*
1180                          * XXX assumption:
1181                          * - exthdrs->ip6e_hbh is not referenced from places
1182                          *   other than exthdrs.
1183                          * - exthdrs->ip6e_hbh is not an mbuf chain.
1184                          */
1185                         int oldoptlen = mopt->m_len;
1186                         struct mbuf *n;
1187
1188                         /*
1189                          * XXX: give up if the whole (new) hbh header does
1190                          * not fit even in an mbuf cluster.
1191                          */
1192                         if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1193                                 return (ENOBUFS);
1194
1195                         /*
1196                          * As a consequence, we must always prepare a cluster
1197                          * at this point.
1198                          */
1199                         MGET(n, M_DONTWAIT, MT_DATA);
1200                         if (n) {
1201                                 MCLGET(n, M_DONTWAIT);
1202                                 if ((n->m_flags & M_EXT) == 0) {
1203                                         m_freem(n);
1204                                         n = NULL;
1205                                 }
1206                         }
1207                         if (!n)
1208                                 return (ENOBUFS);
1209                         n->m_len = oldoptlen + JUMBOOPTLEN;
1210                         bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1211                             oldoptlen);
1212                         optbuf = mtod(n, caddr_t) + oldoptlen;
1213                         m_freem(mopt);
1214                         mopt = exthdrs->ip6e_hbh = n;
1215                 } else {
1216                         optbuf = mtod(mopt, u_char *) + mopt->m_len;
1217                         mopt->m_len += JUMBOOPTLEN;
1218                 }
1219                 optbuf[0] = IP6OPT_PADN;
1220                 optbuf[1] = 1;
1221
1222                 /*
1223                  * Adjust the header length according to the pad and
1224                  * the jumbo payload option.
1225                  */
1226                 hbh = mtod(mopt, struct ip6_hbh *);
1227                 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1228         }
1229
1230         /* fill in the option. */
1231         optbuf[2] = IP6OPT_JUMBO;
1232         optbuf[3] = 4;
1233         v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1234         bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1235
1236         /* finally, adjust the packet header length */
1237         exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1238
1239         return (0);
1240 #undef JUMBOOPTLEN
1241 }
1242
1243 /*
1244  * Insert fragment header and copy unfragmentable header portions.
1245  */
1246 static int
1247 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1248     struct ip6_frag **frghdrp)
1249 {
1250         struct mbuf *n, *mlast;
1251
1252         if (hlen > sizeof(struct ip6_hdr)) {
1253                 n = m_copym(m0, sizeof(struct ip6_hdr),
1254                     hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1255                 if (n == 0)
1256                         return (ENOBUFS);
1257                 m->m_next = n;
1258         } else
1259                 n = m;
1260
1261         /* Search for the last mbuf of unfragmentable part. */
1262         for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1263                 ;
1264
1265         if ((mlast->m_flags & M_EXT) == 0 &&
1266             M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1267                 /* use the trailing space of the last mbuf for the fragment hdr */
1268                 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1269                     mlast->m_len);
1270                 mlast->m_len += sizeof(struct ip6_frag);
1271                 m->m_pkthdr.len += sizeof(struct ip6_frag);
1272         } else {
1273                 /* allocate a new mbuf for the fragment header */
1274                 struct mbuf *mfrg;
1275
1276                 MGET(mfrg, M_DONTWAIT, MT_DATA);
1277                 if (mfrg == 0)
1278                         return (ENOBUFS);
1279                 mfrg->m_len = sizeof(struct ip6_frag);
1280                 *frghdrp = mtod(mfrg, struct ip6_frag *);
1281                 mlast->m_next = mfrg;
1282         }
1283
1284         return (0);
1285 }
1286
1287 static int
1288 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1289     struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
1290     int *alwaysfragp)
1291 {
1292         u_int32_t mtu = 0;
1293         int alwaysfrag = 0;
1294         int error = 0;
1295
1296         if (ro_pmtu != ro) {
1297                 /* The first hop and the final destination may differ. */
1298                 struct sockaddr_in6 *sa6_dst =
1299                     (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1300                 if (ro_pmtu->ro_rt &&
1301                     ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1302                      !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1303                         RTFREE(ro_pmtu->ro_rt);
1304                         ro_pmtu->ro_rt = (struct rtentry *)NULL;
1305                 }
1306                 if (ro_pmtu->ro_rt == NULL) {
1307                         bzero(sa6_dst, sizeof(*sa6_dst));
1308                         sa6_dst->sin6_family = AF_INET6;
1309                         sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1310                         sa6_dst->sin6_addr = *dst;
1311
1312                         rtalloc((struct route *)ro_pmtu);
1313                 }
1314         }
1315         if (ro_pmtu->ro_rt) {
1316                 u_int32_t ifmtu;
1317                 struct in_conninfo inc;
1318
1319                 bzero(&inc, sizeof(inc));
1320                 inc.inc_flags = 1; /* IPv6 */
1321                 inc.inc6_faddr = *dst;
1322
1323                 if (ifp == NULL)
1324                         ifp = ro_pmtu->ro_rt->rt_ifp;
1325                 ifmtu = IN6_LINKMTU(ifp);
1326                 mtu = tcp_hc_getmtu(&inc);
1327                 if (mtu)
1328                         mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1329                 else
1330                         mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1331                 if (mtu == 0)
1332                         mtu = ifmtu;
1333                 else if (mtu < IPV6_MMTU) {
1334                         /*
1335                          * RFC2460 section 5, last paragraph:
1336                          * if we record ICMPv6 too big message with
1337                          * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1338                          * or smaller, with framgent header attached.
1339                          * (fragment header is needed regardless from the
1340                          * packet size, for translators to identify packets)
1341                          */
1342                         alwaysfrag = 1;
1343                         mtu = IPV6_MMTU;
1344                 } else if (mtu > ifmtu) {
1345                         /*
1346                          * The MTU on the route is larger than the MTU on
1347                          * the interface!  This shouldn't happen, unless the
1348                          * MTU of the interface has been changed after the
1349                          * interface was brought up.  Change the MTU in the
1350                          * route to match the interface MTU (as long as the
1351                          * field isn't locked).
1352                          */
1353                         mtu = ifmtu;
1354                         ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1355                 }
1356         } else if (ifp) {
1357                 mtu = IN6_LINKMTU(ifp);
1358         } else
1359                 error = EHOSTUNREACH; /* XXX */
1360
1361         *mtup = mtu;
1362         if (alwaysfragp)
1363                 *alwaysfragp = alwaysfrag;
1364         return (error);
1365 }
1366
1367 /*
1368  * IP6 socket option processing.
1369  */
1370 int
1371 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1372 {
1373         int optdatalen, uproto;
1374         void *optdata;
1375         struct inpcb *in6p = sotoinpcb(so);
1376         int error, optval;
1377         int level, op, optname;
1378         int optlen;
1379         struct thread *td;
1380
1381         level = sopt->sopt_level;
1382         op = sopt->sopt_dir;
1383         optname = sopt->sopt_name;
1384         optlen = sopt->sopt_valsize;
1385         td = sopt->sopt_td;
1386         error = 0;
1387         optval = 0;
1388         uproto = (int)so->so_proto->pr_protocol;
1389
1390         if (level == IPPROTO_IPV6) {
1391                 switch (op) {
1392
1393                 case SOPT_SET:
1394                         switch (optname) {
1395                         case IPV6_2292PKTOPTIONS:
1396 #ifdef IPV6_PKTOPTIONS
1397                         case IPV6_PKTOPTIONS:
1398 #endif
1399                         {
1400                                 struct mbuf *m;
1401
1402                                 error = soopt_getm(sopt, &m); /* XXX */
1403                                 if (error != 0)
1404                                         break;
1405                                 error = soopt_mcopyin(sopt, m); /* XXX */
1406                                 if (error != 0)
1407                                         break;
1408                                 error = ip6_pcbopts(&in6p->in6p_outputopts,
1409                                                     m, so, sopt);
1410                                 m_freem(m); /* XXX */
1411                                 break;
1412                         }
1413
1414                         /*
1415                          * Use of some Hop-by-Hop options or some
1416                          * Destination options, might require special
1417                          * privilege.  That is, normal applications
1418                          * (without special privilege) might be forbidden
1419                          * from setting certain options in outgoing packets,
1420                          * and might never see certain options in received
1421                          * packets. [RFC 2292 Section 6]
1422                          * KAME specific note:
1423                          *  KAME prevents non-privileged users from sending or
1424                          *  receiving ANY hbh/dst options in order to avoid
1425                          *  overhead of parsing options in the kernel.
1426                          */
1427                         case IPV6_RECVHOPOPTS:
1428                         case IPV6_RECVDSTOPTS:
1429                         case IPV6_RECVRTHDRDSTOPTS:
1430                                 if (td != NULL) {
1431                                         error = priv_check(td,
1432                                             PRIV_NETINET_SETHDROPTS);
1433                                         if (error)
1434                                                 break;
1435                                 }
1436                                 /* FALLTHROUGH */
1437                         case IPV6_UNICAST_HOPS:
1438                         case IPV6_HOPLIMIT:
1439                         case IPV6_FAITH:
1440
1441                         case IPV6_RECVPKTINFO:
1442                         case IPV6_RECVHOPLIMIT:
1443                         case IPV6_RECVRTHDR:
1444                         case IPV6_RECVPATHMTU:
1445                         case IPV6_RECVTCLASS:
1446                         case IPV6_V6ONLY:
1447                         case IPV6_AUTOFLOWLABEL:
1448                                 if (optlen != sizeof(int)) {
1449                                         error = EINVAL;
1450                                         break;
1451                                 }
1452                                 error = sooptcopyin(sopt, &optval,
1453                                         sizeof optval, sizeof optval);
1454                                 if (error)
1455                                         break;
1456                                 switch (optname) {
1457
1458                                 case IPV6_UNICAST_HOPS:
1459                                         if (optval < -1 || optval >= 256)
1460                                                 error = EINVAL;
1461                                         else {
1462                                                 /* -1 = kernel default */
1463                                                 in6p->in6p_hops = optval;
1464                                                 if ((in6p->in6p_vflag &
1465                                                      INP_IPV4) != 0)
1466                                                         in6p->inp_ip_ttl = optval;
1467                                         }
1468                                         break;
1469 #define OPTSET(bit) \
1470 do { \
1471         if (optval) \
1472                 in6p->in6p_flags |= (bit); \
1473         else \
1474                 in6p->in6p_flags &= ~(bit); \
1475 } while (/*CONSTCOND*/ 0)
1476 #define OPTSET2292(bit) \
1477 do { \
1478         in6p->in6p_flags |= IN6P_RFC2292; \
1479         if (optval) \
1480                 in6p->in6p_flags |= (bit); \
1481         else \
1482                 in6p->in6p_flags &= ~(bit); \
1483 } while (/*CONSTCOND*/ 0)
1484 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1485
1486                                 case IPV6_RECVPKTINFO:
1487                                         /* cannot mix with RFC2292 */
1488                                         if (OPTBIT(IN6P_RFC2292)) {
1489                                                 error = EINVAL;
1490                                                 break;
1491                                         }
1492                                         OPTSET(IN6P_PKTINFO);
1493                                         break;
1494
1495                                 case IPV6_HOPLIMIT:
1496                                 {
1497                                         struct ip6_pktopts **optp;
1498
1499                                         /* cannot mix with RFC2292 */
1500                                         if (OPTBIT(IN6P_RFC2292)) {
1501                                                 error = EINVAL;
1502                                                 break;
1503                                         }
1504                                         optp = &in6p->in6p_outputopts;
1505                                         error = ip6_pcbopt(IPV6_HOPLIMIT,
1506                                             (u_char *)&optval, sizeof(optval),
1507                                             optp, (td != NULL) ? td->td_ucred :
1508                                             NULL, uproto);
1509                                         break;
1510                                 }
1511
1512                                 case IPV6_RECVHOPLIMIT:
1513                                         /* cannot mix with RFC2292 */
1514                                         if (OPTBIT(IN6P_RFC2292)) {
1515                                                 error = EINVAL;
1516                                                 break;
1517                                         }
1518                                         OPTSET(IN6P_HOPLIMIT);
1519                                         break;
1520
1521                                 case IPV6_RECVHOPOPTS:
1522                                         /* cannot mix with RFC2292 */
1523                                         if (OPTBIT(IN6P_RFC2292)) {
1524                                                 error = EINVAL;
1525                                                 break;
1526                                         }
1527                                         OPTSET(IN6P_HOPOPTS);
1528                                         break;
1529
1530                                 case IPV6_RECVDSTOPTS:
1531                                         /* cannot mix with RFC2292 */
1532                                         if (OPTBIT(IN6P_RFC2292)) {
1533                                                 error = EINVAL;
1534                                                 break;
1535                                         }
1536                                         OPTSET(IN6P_DSTOPTS);
1537                                         break;
1538
1539                                 case IPV6_RECVRTHDRDSTOPTS:
1540                                         /* cannot mix with RFC2292 */
1541                                         if (OPTBIT(IN6P_RFC2292)) {
1542                                                 error = EINVAL;
1543                                                 break;
1544                                         }
1545                                         OPTSET(IN6P_RTHDRDSTOPTS);
1546                                         break;
1547
1548                                 case IPV6_RECVRTHDR:
1549                                         /* cannot mix with RFC2292 */
1550                                         if (OPTBIT(IN6P_RFC2292)) {
1551                                                 error = EINVAL;
1552                                                 break;
1553                                         }
1554                                         OPTSET(IN6P_RTHDR);
1555                                         break;
1556
1557                                 case IPV6_FAITH:
1558                                         OPTSET(IN6P_FAITH);
1559                                         break;
1560
1561                                 case IPV6_RECVPATHMTU:
1562                                         /*
1563                                          * We ignore this option for TCP
1564                                          * sockets.
1565                                          * (RFC3542 leaves this case
1566                                          * unspecified.)
1567                                          */
1568                                         if (uproto != IPPROTO_TCP)
1569                                                 OPTSET(IN6P_MTU);
1570                                         break;
1571
1572                                 case IPV6_V6ONLY:
1573                                         /*
1574                                          * make setsockopt(IPV6_V6ONLY)
1575                                          * available only prior to bind(2).
1576                                          * see ipng mailing list, Jun 22 2001.
1577                                          */
1578                                         if (in6p->in6p_lport ||
1579                                             !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1580                                                 error = EINVAL;
1581                                                 break;
1582                                         }
1583                                         OPTSET(IN6P_IPV6_V6ONLY);
1584                                         if (optval)
1585                                                 in6p->in6p_vflag &= ~INP_IPV4;
1586                                         else
1587                                                 in6p->in6p_vflag |= INP_IPV4;
1588                                         break;
1589                                 case IPV6_RECVTCLASS:
1590                                         /* cannot mix with RFC2292 XXX */
1591                                         if (OPTBIT(IN6P_RFC2292)) {
1592                                                 error = EINVAL;
1593                                                 break;
1594                                         }
1595                                         OPTSET(IN6P_TCLASS);
1596                                         break;
1597                                 case IPV6_AUTOFLOWLABEL:
1598                                         OPTSET(IN6P_AUTOFLOWLABEL);
1599                                         break;
1600
1601                                 }
1602                                 break;
1603
1604                         case IPV6_TCLASS:
1605                         case IPV6_DONTFRAG:
1606                         case IPV6_USE_MIN_MTU:
1607                         case IPV6_PREFER_TEMPADDR:
1608                                 if (optlen != sizeof(optval)) {
1609                                         error = EINVAL;
1610                                         break;
1611                                 }
1612                                 error = sooptcopyin(sopt, &optval,
1613                                         sizeof optval, sizeof optval);
1614                                 if (error)
1615                                         break;
1616                                 {
1617                                         struct ip6_pktopts **optp;
1618                                         optp = &in6p->in6p_outputopts;
1619                                         error = ip6_pcbopt(optname,
1620                                             (u_char *)&optval, sizeof(optval),
1621                                             optp, (td != NULL) ? td->td_ucred :
1622                                             NULL, uproto);
1623                                         break;
1624                                 }
1625
1626                         case IPV6_2292PKTINFO:
1627                         case IPV6_2292HOPLIMIT:
1628                         case IPV6_2292HOPOPTS:
1629                         case IPV6_2292DSTOPTS:
1630                         case IPV6_2292RTHDR:
1631                                 /* RFC 2292 */
1632                                 if (optlen != sizeof(int)) {
1633                                         error = EINVAL;
1634                                         break;
1635                                 }
1636                                 error = sooptcopyin(sopt, &optval,
1637                                         sizeof optval, sizeof optval);
1638                                 if (error)
1639                                         break;
1640                                 switch (optname) {
1641                                 case IPV6_2292PKTINFO:
1642                                         OPTSET2292(IN6P_PKTINFO);
1643                                         break;
1644                                 case IPV6_2292HOPLIMIT:
1645                                         OPTSET2292(IN6P_HOPLIMIT);
1646                                         break;
1647                                 case IPV6_2292HOPOPTS:
1648                                         /*
1649                                          * Check super-user privilege.
1650                                          * See comments for IPV6_RECVHOPOPTS.
1651                                          */
1652                                         if (td != NULL) {
1653                                                 error = priv_check(td,
1654                                                     PRIV_NETINET_SETHDROPTS);
1655                                                 if (error)
1656                                                         return (error);
1657                                         }
1658                                         OPTSET2292(IN6P_HOPOPTS);
1659                                         break;
1660                                 case IPV6_2292DSTOPTS:
1661                                         if (td != NULL) {
1662                                                 error = priv_check(td,
1663                                                     PRIV_NETINET_SETHDROPTS);
1664                                                 if (error)
1665                                                         return (error);
1666                                         }
1667                                         OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1668                                         break;
1669                                 case IPV6_2292RTHDR:
1670                                         OPTSET2292(IN6P_RTHDR);
1671                                         break;
1672                                 }
1673                                 break;
1674                         case IPV6_PKTINFO:
1675                         case IPV6_HOPOPTS:
1676                         case IPV6_RTHDR:
1677                         case IPV6_DSTOPTS:
1678                         case IPV6_RTHDRDSTOPTS:
1679                         case IPV6_NEXTHOP:
1680                         {
1681                                 /* new advanced API (RFC3542) */
1682                                 u_char *optbuf;
1683                                 u_char optbuf_storage[MCLBYTES];
1684                                 int optlen;
1685                                 struct ip6_pktopts **optp;
1686
1687                                 /* cannot mix with RFC2292 */
1688                                 if (OPTBIT(IN6P_RFC2292)) {
1689                                         error = EINVAL;
1690                                         break;
1691                                 }
1692
1693                                 /*
1694                                  * We only ensure valsize is not too large
1695                                  * here.  Further validation will be done
1696                                  * later.
1697                                  */
1698                                 error = sooptcopyin(sopt, optbuf_storage,
1699                                     sizeof(optbuf_storage), 0);
1700                                 if (error)
1701                                         break;
1702                                 optlen = sopt->sopt_valsize;
1703                                 optbuf = optbuf_storage;
1704                                 optp = &in6p->in6p_outputopts;
1705                                 error = ip6_pcbopt(optname, optbuf, optlen,
1706                                     optp, (td != NULL) ? td->td_ucred : NULL,
1707                                     uproto);
1708                                 break;
1709                         }
1710 #undef OPTSET
1711
1712                         case IPV6_MULTICAST_IF:
1713                         case IPV6_MULTICAST_HOPS:
1714                         case IPV6_MULTICAST_LOOP:
1715                         case IPV6_JOIN_GROUP:
1716                         case IPV6_LEAVE_GROUP:
1717                             {
1718                                 if (sopt->sopt_valsize > MLEN) {
1719                                         error = EMSGSIZE;
1720                                         break;
1721                                 }
1722                                 /* XXX */
1723                             }
1724                             /* FALLTHROUGH */
1725                             {
1726                                 struct mbuf *m;
1727
1728                                 if (sopt->sopt_valsize > MCLBYTES) {
1729                                         error = EMSGSIZE;
1730                                         break;
1731                                 }
1732                                 /* XXX */
1733                                 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
1734                                 if (m == 0) {
1735                                         error = ENOBUFS;
1736                                         break;
1737                                 }
1738                                 if (sopt->sopt_valsize > MLEN) {
1739                                         MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
1740                                         if ((m->m_flags & M_EXT) == 0) {
1741                                                 m_free(m);
1742                                                 error = ENOBUFS;
1743                                                 break;
1744                                         }
1745                                 }
1746                                 m->m_len = sopt->sopt_valsize;
1747                                 error = sooptcopyin(sopt, mtod(m, char *),
1748                                                     m->m_len, m->m_len);
1749                                 if (error) {
1750                                         (void)m_free(m);
1751                                         break;
1752                                 }
1753                                 error = ip6_setmoptions(sopt->sopt_name,
1754                                                         &in6p->in6p_moptions,
1755                                                         m);
1756                                 (void)m_free(m);
1757                             }
1758                                 break;
1759
1760                         case IPV6_PORTRANGE:
1761                                 error = sooptcopyin(sopt, &optval,
1762                                     sizeof optval, sizeof optval);
1763                                 if (error)
1764                                         break;
1765
1766                                 switch (optval) {
1767                                 case IPV6_PORTRANGE_DEFAULT:
1768                                         in6p->in6p_flags &= ~(IN6P_LOWPORT);
1769                                         in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1770                                         break;
1771
1772                                 case IPV6_PORTRANGE_HIGH:
1773                                         in6p->in6p_flags &= ~(IN6P_LOWPORT);
1774                                         in6p->in6p_flags |= IN6P_HIGHPORT;
1775                                         break;
1776
1777                                 case IPV6_PORTRANGE_LOW:
1778                                         in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1779                                         in6p->in6p_flags |= IN6P_LOWPORT;
1780                                         break;
1781
1782                                 default:
1783                                         error = EINVAL;
1784                                         break;
1785                                 }
1786                                 break;
1787
1788 #ifdef IPSEC
1789                         case IPV6_IPSEC_POLICY:
1790                         {
1791                                 caddr_t req;
1792                                 struct mbuf *m;
1793
1794                                 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1795                                         break;
1796                                 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1797                                         break;
1798                                 req = mtod(m, caddr_t);
1799                                 error = ipsec6_set_policy(in6p, optname, req,
1800                                     m->m_len, (sopt->sopt_td != NULL) ?
1801                                     sopt->sopt_td->td_ucred : NULL);
1802                                 m_freem(m);
1803                                 break;
1804                         }
1805 #endif /* IPSEC */
1806
1807                         default:
1808                                 error = ENOPROTOOPT;
1809                                 break;
1810                         }
1811                         break;
1812
1813                 case SOPT_GET:
1814                         switch (optname) {
1815
1816                         case IPV6_2292PKTOPTIONS:
1817 #ifdef IPV6_PKTOPTIONS
1818                         case IPV6_PKTOPTIONS:
1819 #endif
1820                                 /*
1821                                  * RFC3542 (effectively) deprecated the
1822                                  * semantics of the 2292-style pktoptions.
1823                                  * Since it was not reliable in nature (i.e.,
1824                                  * applications had to expect the lack of some
1825                                  * information after all), it would make sense
1826                                  * to simplify this part by always returning
1827                                  * empty data.
1828                                  */
1829                                 sopt->sopt_valsize = 0;
1830                                 break;
1831
1832                         case IPV6_RECVHOPOPTS:
1833                         case IPV6_RECVDSTOPTS:
1834                         case IPV6_RECVRTHDRDSTOPTS:
1835                         case IPV6_UNICAST_HOPS:
1836                         case IPV6_RECVPKTINFO:
1837                         case IPV6_RECVHOPLIMIT:
1838                         case IPV6_RECVRTHDR:
1839                         case IPV6_RECVPATHMTU:
1840
1841                         case IPV6_FAITH:
1842                         case IPV6_V6ONLY:
1843                         case IPV6_PORTRANGE:
1844                         case IPV6_RECVTCLASS:
1845                         case IPV6_AUTOFLOWLABEL:
1846                                 switch (optname) {
1847
1848                                 case IPV6_RECVHOPOPTS:
1849                                         optval = OPTBIT(IN6P_HOPOPTS);
1850                                         break;
1851
1852                                 case IPV6_RECVDSTOPTS:
1853                                         optval = OPTBIT(IN6P_DSTOPTS);
1854                                         break;
1855
1856                                 case IPV6_RECVRTHDRDSTOPTS:
1857                                         optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1858                                         break;
1859
1860                                 case IPV6_UNICAST_HOPS:
1861                                         optval = in6p->in6p_hops;
1862                                         break;
1863
1864                                 case IPV6_RECVPKTINFO:
1865                                         optval = OPTBIT(IN6P_PKTINFO);
1866                                         break;
1867
1868                                 case IPV6_RECVHOPLIMIT:
1869                                         optval = OPTBIT(IN6P_HOPLIMIT);
1870                                         break;
1871
1872                                 case IPV6_RECVRTHDR:
1873                                         optval = OPTBIT(IN6P_RTHDR);
1874                                         break;
1875
1876                                 case IPV6_RECVPATHMTU:
1877                                         optval = OPTBIT(IN6P_MTU);
1878                                         break;
1879
1880                                 case IPV6_FAITH:
1881                                         optval = OPTBIT(IN6P_FAITH);
1882                                         break;
1883
1884                                 case IPV6_V6ONLY:
1885                                         optval = OPTBIT(IN6P_IPV6_V6ONLY);
1886                                         break;
1887
1888                                 case IPV6_PORTRANGE:
1889                                     {
1890                                         int flags;
1891                                         flags = in6p->in6p_flags;
1892                                         if (flags & IN6P_HIGHPORT)
1893                                                 optval = IPV6_PORTRANGE_HIGH;
1894                                         else if (flags & IN6P_LOWPORT)
1895                                                 optval = IPV6_PORTRANGE_LOW;
1896                                         else
1897                                                 optval = 0;
1898                                         break;
1899                                     }
1900                                 case IPV6_RECVTCLASS:
1901                                         optval = OPTBIT(IN6P_TCLASS);
1902                                         break;
1903
1904                                 case IPV6_AUTOFLOWLABEL:
1905                                         optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1906                                         break;
1907                                 }
1908                                 if (error)
1909                                         break;
1910                                 error = sooptcopyout(sopt, &optval,
1911                                         sizeof optval);
1912                                 break;
1913
1914                         case IPV6_PATHMTU:
1915                         {
1916                                 u_long pmtu = 0;
1917                                 struct ip6_mtuinfo mtuinfo;
1918                                 struct route_in6 sro;
1919
1920                                 bzero(&sro, sizeof(sro));
1921
1922                                 if (!(so->so_state & SS_ISCONNECTED))
1923                                         return (ENOTCONN);
1924                                 /*
1925                                  * XXX: we dot not consider the case of source
1926                                  * routing, or optional information to specify
1927                                  * the outgoing interface.
1928                                  */
1929                                 error = ip6_getpmtu(&sro, NULL, NULL,
1930                                     &in6p->in6p_faddr, &pmtu, NULL);
1931                                 if (sro.ro_rt)
1932                                         RTFREE(sro.ro_rt);
1933                                 if (error)
1934                                         break;
1935                                 if (pmtu > IPV6_MAXPACKET)
1936                                         pmtu = IPV6_MAXPACKET;
1937
1938                                 bzero(&mtuinfo, sizeof(mtuinfo));
1939                                 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1940                                 optdata = (void *)&mtuinfo;
1941                                 optdatalen = sizeof(mtuinfo);
1942                                 error = sooptcopyout(sopt, optdata,
1943                                     optdatalen);
1944                                 break;
1945                         }
1946
1947                         case IPV6_2292PKTINFO:
1948                         case IPV6_2292HOPLIMIT:
1949                         case IPV6_2292HOPOPTS:
1950                         case IPV6_2292RTHDR:
1951                         case IPV6_2292DSTOPTS:
1952                                 switch (optname) {
1953                                 case IPV6_2292PKTINFO:
1954                                         optval = OPTBIT(IN6P_PKTINFO);
1955                                         break;
1956                                 case IPV6_2292HOPLIMIT:
1957                                         optval = OPTBIT(IN6P_HOPLIMIT);
1958                                         break;
1959                                 case IPV6_2292HOPOPTS:
1960                                         optval = OPTBIT(IN6P_HOPOPTS);
1961                                         break;
1962                                 case IPV6_2292RTHDR:
1963                                         optval = OPTBIT(IN6P_RTHDR);
1964                                         break;
1965                                 case IPV6_2292DSTOPTS:
1966                                         optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1967                                         break;
1968                                 }
1969                                 error = sooptcopyout(sopt, &optval,
1970                                     sizeof optval);
1971                                 break;
1972                         case IPV6_PKTINFO:
1973                         case IPV6_HOPOPTS:
1974                         case IPV6_RTHDR:
1975                         case IPV6_DSTOPTS:
1976                         case IPV6_RTHDRDSTOPTS:
1977                         case IPV6_NEXTHOP:
1978                         case IPV6_TCLASS:
1979                         case IPV6_DONTFRAG:
1980                         case IPV6_USE_MIN_MTU:
1981                         case IPV6_PREFER_TEMPADDR:
1982                                 error = ip6_getpcbopt(in6p->in6p_outputopts,
1983                                     optname, sopt);
1984                                 break;
1985
1986                         case IPV6_MULTICAST_IF:
1987                         case IPV6_MULTICAST_HOPS:
1988                         case IPV6_MULTICAST_LOOP:
1989                         case IPV6_JOIN_GROUP:
1990                         case IPV6_LEAVE_GROUP:
1991                             {
1992                                 struct mbuf *m;
1993                                 error = ip6_getmoptions(sopt->sopt_name,
1994                                     in6p->in6p_moptions, &m);
1995                                 if (error == 0)
1996                                         error = sooptcopyout(sopt,
1997                                             mtod(m, char *), m->m_len);
1998                                 m_freem(m);
1999                             }
2000                                 break;
2001
2002 #ifdef IPSEC
2003                         case IPV6_IPSEC_POLICY:
2004                           {
2005                                 caddr_t req = NULL;
2006                                 size_t len = 0;
2007                                 struct mbuf *m = NULL;
2008                                 struct mbuf **mp = &m;
2009                                 size_t ovalsize = sopt->sopt_valsize;
2010                                 caddr_t oval = (caddr_t)sopt->sopt_val;
2011
2012                                 error = soopt_getm(sopt, &m); /* XXX */
2013                                 if (error != 0)
2014                                         break;
2015                                 error = soopt_mcopyin(sopt, m); /* XXX */
2016                                 if (error != 0)
2017                                         break;
2018                                 sopt->sopt_valsize = ovalsize;
2019                                 sopt->sopt_val = oval;
2020                                 if (m) {
2021                                         req = mtod(m, caddr_t);
2022                                         len = m->m_len;
2023                                 }
2024                                 error = ipsec6_get_policy(in6p, req, len, mp);
2025                                 if (error == 0)
2026                                         error = soopt_mcopyout(sopt, m); /* XXX */
2027                                 if (error == 0 && m)
2028                                         m_freem(m);
2029                                 break;
2030                           }
2031 #endif /* IPSEC */
2032
2033                         default:
2034                                 error = ENOPROTOOPT;
2035                                 break;
2036                         }
2037                         break;
2038                 }
2039         } else {                /* level != IPPROTO_IPV6 */
2040                 error = EINVAL;
2041         }
2042         return (error);
2043 }
2044
2045 int
2046 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2047 {
2048         int error = 0, optval, optlen;
2049         const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2050         struct in6pcb *in6p = sotoin6pcb(so);
2051         int level, op, optname;
2052
2053         level = sopt->sopt_level;
2054         op = sopt->sopt_dir;
2055         optname = sopt->sopt_name;
2056         optlen = sopt->sopt_valsize;
2057
2058         if (level != IPPROTO_IPV6) {
2059                 return (EINVAL);
2060         }
2061
2062         switch (optname) {
2063         case IPV6_CHECKSUM:
2064                 /*
2065                  * For ICMPv6 sockets, no modification allowed for checksum
2066                  * offset, permit "no change" values to help existing apps.
2067                  *
2068                  * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2069                  * for an ICMPv6 socket will fail."
2070                  * The current behavior does not meet RFC3542.
2071                  */
2072                 switch (op) {
2073                 case SOPT_SET:
2074                         if (optlen != sizeof(int)) {
2075                                 error = EINVAL;
2076                                 break;
2077                         }
2078                         error = sooptcopyin(sopt, &optval, sizeof(optval),
2079                                             sizeof(optval));
2080                         if (error)
2081                                 break;
2082                         if ((optval % 2) != 0) {
2083                                 /* the API assumes even offset values */
2084                                 error = EINVAL;
2085                         } else if (so->so_proto->pr_protocol ==
2086                             IPPROTO_ICMPV6) {
2087                                 if (optval != icmp6off)
2088                                         error = EINVAL;
2089                         } else
2090                                 in6p->in6p_cksum = optval;
2091                         break;
2092
2093                 case SOPT_GET:
2094                         if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2095                                 optval = icmp6off;
2096                         else
2097                                 optval = in6p->in6p_cksum;
2098
2099                         error = sooptcopyout(sopt, &optval, sizeof(optval));
2100                         break;
2101
2102                 default:
2103                         error = EINVAL;
2104                         break;
2105                 }
2106                 break;
2107
2108         default:
2109                 error = ENOPROTOOPT;
2110                 break;
2111         }
2112
2113         return (error);
2114 }
2115
2116 /*
2117  * Set up IP6 options in pcb for insertion in output packets or
2118  * specifying behavior of outgoing packets.
2119  */
2120 static int
2121 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2122     struct socket *so, struct sockopt *sopt)
2123 {
2124         struct ip6_pktopts *opt = *pktopt;
2125         int error = 0;
2126         struct thread *td = sopt->sopt_td;
2127
2128         /* turn off any old options. */
2129         if (opt) {
2130 #ifdef DIAGNOSTIC
2131                 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2132                     opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2133                     opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2134                         printf("ip6_pcbopts: all specified options are cleared.\n");
2135 #endif
2136                 ip6_clearpktopts(opt, -1);
2137         } else
2138                 opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2139         *pktopt = NULL;
2140
2141         if (!m || m->m_len == 0) {
2142                 /*
2143                  * Only turning off any previous options, regardless of
2144                  * whether the opt is just created or given.
2145                  */
2146                 free(opt, M_IP6OPT);
2147                 return (0);
2148         }
2149
2150         /*  set options specified by user. */
2151         if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
2152             td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
2153                 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2154                 free(opt, M_IP6OPT);
2155                 return (error);
2156         }
2157         *pktopt = opt;
2158         return (0);
2159 }
2160
2161 /*
2162  * initialize ip6_pktopts.  beware that there are non-zero default values in
2163  * the struct.
2164  */
2165 void
2166 ip6_initpktopts(struct ip6_pktopts *opt)
2167 {
2168
2169         bzero(opt, sizeof(*opt));
2170         opt->ip6po_hlim = -1;   /* -1 means default hop limit */
2171         opt->ip6po_tclass = -1; /* -1 means default traffic class */
2172         opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2173         opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2174 }
2175
2176 static int
2177 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2178     struct ucred *cred, int uproto)
2179 {
2180         struct ip6_pktopts *opt;
2181
2182         if (*pktopt == NULL) {
2183                 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2184                     M_WAITOK);
2185                 ip6_initpktopts(*pktopt);
2186         }
2187         opt = *pktopt;
2188
2189         return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2190 }
2191
2192 static int
2193 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2194 {
2195         void *optdata = NULL;
2196         int optdatalen = 0;
2197         struct ip6_ext *ip6e;
2198         int error = 0;
2199         struct in6_pktinfo null_pktinfo;
2200         int deftclass = 0, on;
2201         int defminmtu = IP6PO_MINMTU_MCASTONLY;
2202         int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2203
2204         switch (optname) {
2205         case IPV6_PKTINFO:
2206                 if (pktopt && pktopt->ip6po_pktinfo)
2207                         optdata = (void *)pktopt->ip6po_pktinfo;
2208                 else {
2209                         /* XXX: we don't have to do this every time... */
2210                         bzero(&null_pktinfo, sizeof(null_pktinfo));
2211                         optdata = (void *)&null_pktinfo;
2212                 }
2213                 optdatalen = sizeof(struct in6_pktinfo);
2214                 break;
2215         case IPV6_TCLASS:
2216                 if (pktopt && pktopt->ip6po_tclass >= 0)
2217                         optdata = (void *)&pktopt->ip6po_tclass;
2218                 else
2219                         optdata = (void *)&deftclass;
2220                 optdatalen = sizeof(int);
2221                 break;
2222         case IPV6_HOPOPTS:
2223                 if (pktopt && pktopt->ip6po_hbh) {
2224                         optdata = (void *)pktopt->ip6po_hbh;
2225                         ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2226                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2227                 }
2228                 break;
2229         case IPV6_RTHDR:
2230                 if (pktopt && pktopt->ip6po_rthdr) {
2231                         optdata = (void *)pktopt->ip6po_rthdr;
2232                         ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2233                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2234                 }
2235                 break;
2236         case IPV6_RTHDRDSTOPTS:
2237                 if (pktopt && pktopt->ip6po_dest1) {
2238                         optdata = (void *)pktopt->ip6po_dest1;
2239                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2240                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2241                 }
2242                 break;
2243         case IPV6_DSTOPTS:
2244                 if (pktopt && pktopt->ip6po_dest2) {
2245                         optdata = (void *)pktopt->ip6po_dest2;
2246                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2247                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2248                 }
2249                 break;
2250         case IPV6_NEXTHOP:
2251                 if (pktopt && pktopt->ip6po_nexthop) {
2252                         optdata = (void *)pktopt->ip6po_nexthop;
2253                         optdatalen = pktopt->ip6po_nexthop->sa_len;
2254                 }
2255                 break;
2256         case IPV6_USE_MIN_MTU:
2257                 if (pktopt)
2258                         optdata = (void *)&pktopt->ip6po_minmtu;
2259                 else
2260                         optdata = (void *)&defminmtu;
2261                 optdatalen = sizeof(int);
2262                 break;
2263         case IPV6_DONTFRAG:
2264                 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2265                         on = 1;
2266                 else
2267                         on = 0;
2268                 optdata = (void *)&on;
2269                 optdatalen = sizeof(on);
2270                 break;
2271         case IPV6_PREFER_TEMPADDR:
2272                 if (pktopt)
2273                         optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2274                 else
2275                         optdata = (void *)&defpreftemp;
2276                 optdatalen = sizeof(int);
2277                 break;
2278         default:                /* should not happen */
2279 #ifdef DIAGNOSTIC
2280                 panic("ip6_getpcbopt: unexpected option\n");
2281 #endif
2282                 return (ENOPROTOOPT);
2283         }
2284
2285         error = sooptcopyout(sopt, optdata, optdatalen);
2286
2287         return (error);
2288 }
2289
2290 void
2291 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2292 {
2293         if (pktopt == NULL)
2294                 return;
2295
2296         if (optname == -1 || optname == IPV6_PKTINFO) {
2297                 if (pktopt->ip6po_pktinfo)
2298                         free(pktopt->ip6po_pktinfo, M_IP6OPT);
2299                 pktopt->ip6po_pktinfo = NULL;
2300         }
2301         if (optname == -1 || optname == IPV6_HOPLIMIT)
2302                 pktopt->ip6po_hlim = -1;
2303         if (optname == -1 || optname == IPV6_TCLASS)
2304                 pktopt->ip6po_tclass = -1;
2305         if (optname == -1 || optname == IPV6_NEXTHOP) {
2306                 if (pktopt->ip6po_nextroute.ro_rt) {
2307                         RTFREE(pktopt->ip6po_nextroute.ro_rt);
2308                         pktopt->ip6po_nextroute.ro_rt = NULL;
2309                 }
2310                 if (pktopt->ip6po_nexthop)
2311                         free(pktopt->ip6po_nexthop, M_IP6OPT);
2312                 pktopt->ip6po_nexthop = NULL;
2313         }
2314         if (optname == -1 || optname == IPV6_HOPOPTS) {
2315                 if (pktopt->ip6po_hbh)
2316                         free(pktopt->ip6po_hbh, M_IP6OPT);
2317                 pktopt->ip6po_hbh = NULL;
2318         }
2319         if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2320                 if (pktopt->ip6po_dest1)
2321                         free(pktopt->ip6po_dest1, M_IP6OPT);
2322                 pktopt->ip6po_dest1 = NULL;
2323         }
2324         if (optname == -1 || optname == IPV6_RTHDR) {
2325                 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2326                         free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2327                 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2328                 if (pktopt->ip6po_route.ro_rt) {
2329                         RTFREE(pktopt->ip6po_route.ro_rt);
2330                         pktopt->ip6po_route.ro_rt = NULL;
2331                 }
2332         }
2333         if (optname == -1 || optname == IPV6_DSTOPTS) {
2334                 if (pktopt->ip6po_dest2)
2335                         free(pktopt->ip6po_dest2, M_IP6OPT);
2336                 pktopt->ip6po_dest2 = NULL;
2337         }
2338 }
2339
2340 #define PKTOPT_EXTHDRCPY(type) \
2341 do {\
2342         if (src->type) {\
2343                 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2344                 dst->type = malloc(hlen, M_IP6OPT, canwait);\
2345                 if (dst->type == NULL && canwait == M_NOWAIT)\
2346                         goto bad;\
2347                 bcopy(src->type, dst->type, hlen);\
2348         }\
2349 } while (/*CONSTCOND*/ 0)
2350
2351 static int
2352 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2353 {
2354         if (dst == NULL || src == NULL)  {
2355                 printf("ip6_clearpktopts: invalid argument\n");
2356                 return (EINVAL);
2357         }
2358
2359         dst->ip6po_hlim = src->ip6po_hlim;
2360         dst->ip6po_tclass = src->ip6po_tclass;
2361         dst->ip6po_flags = src->ip6po_flags;
2362         if (src->ip6po_pktinfo) {
2363                 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2364                     M_IP6OPT, canwait);
2365                 if (dst->ip6po_pktinfo == NULL)
2366                         goto bad;
2367                 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2368         }
2369         if (src->ip6po_nexthop) {
2370                 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2371                     M_IP6OPT, canwait);
2372                 if (dst->ip6po_nexthop == NULL)
2373                         goto bad;
2374                 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2375                     src->ip6po_nexthop->sa_len);
2376         }
2377         PKTOPT_EXTHDRCPY(ip6po_hbh);
2378         PKTOPT_EXTHDRCPY(ip6po_dest1);
2379         PKTOPT_EXTHDRCPY(ip6po_dest2);
2380         PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2381         return (0);
2382
2383   bad:
2384         ip6_clearpktopts(dst, -1);
2385         return (ENOBUFS);
2386 }
2387 #undef PKTOPT_EXTHDRCPY
2388
2389 struct ip6_pktopts *
2390 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2391 {
2392         int error;
2393         struct ip6_pktopts *dst;
2394
2395         dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2396         if (dst == NULL)
2397                 return (NULL);
2398         ip6_initpktopts(dst);
2399
2400         if ((error = copypktopts(dst, src, canwait)) != 0) {
2401                 free(dst, M_IP6OPT);
2402                 return (NULL);
2403         }
2404
2405         return (dst);
2406 }
2407
2408 void
2409 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2410 {
2411         if (pktopt == NULL)
2412                 return;
2413
2414         ip6_clearpktopts(pktopt, -1);
2415
2416         free(pktopt, M_IP6OPT);
2417 }
2418
2419 /*
2420  * Set the IP6 multicast options in response to user setsockopt().
2421  */
2422 static int
2423 ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
2424 {
2425         INIT_VNET_NET(curvnet);
2426         INIT_VNET_INET6(curvnet);
2427         int error = 0;
2428         u_int loop, ifindex;
2429         struct ipv6_mreq *mreq;
2430         struct ifnet *ifp;
2431         struct ip6_moptions *im6o = *im6op;
2432         struct route_in6 ro;
2433         struct in6_multi_mship *imm;
2434
2435         if (im6o == NULL) {
2436                 /*
2437                  * No multicast option buffer attached to the pcb;
2438                  * allocate one and initialize to default values.
2439                  */
2440                 im6o = (struct ip6_moptions *)
2441                         malloc(sizeof(*im6o), M_IP6MOPTS, M_WAITOK);
2442
2443                 if (im6o == NULL)
2444                         return (ENOBUFS);
2445                 *im6op = im6o;
2446                 im6o->im6o_multicast_ifp = NULL;
2447                 im6o->im6o_multicast_hlim = V_ip6_defmcasthlim;
2448                 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2449                 LIST_INIT(&im6o->im6o_memberships);
2450         }
2451
2452         switch (optname) {
2453
2454         case IPV6_MULTICAST_IF:
2455                 /*
2456                  * Select the interface for outgoing multicast packets.
2457                  */
2458                 if (m == NULL || m->m_len != sizeof(u_int)) {
2459                         error = EINVAL;
2460                         break;
2461                 }
2462                 bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2463                 if (ifindex < 0 || V_if_index < ifindex) {
2464                         error = ENXIO;  /* XXX EINVAL? */
2465                         break;
2466                 }
2467                 ifp = ifnet_byindex(ifindex);
2468                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2469                         error = EADDRNOTAVAIL;
2470                         break;
2471                 }
2472                 im6o->im6o_multicast_ifp = ifp;
2473                 break;
2474
2475         case IPV6_MULTICAST_HOPS:
2476             {
2477                 /*
2478                  * Set the IP6 hoplimit for outgoing multicast packets.
2479                  */
2480                 int optval;
2481                 if (m == NULL || m->m_len != sizeof(int)) {
2482                         error = EINVAL;
2483                         break;
2484                 }
2485                 bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2486                 if (optval < -1 || optval >= 256)
2487                         error = EINVAL;
2488                 else if (optval == -1)
2489                         im6o->im6o_multicast_hlim = V_ip6_defmcasthlim;
2490                 else
2491                         im6o->im6o_multicast_hlim = optval;
2492                 break;
2493             }
2494
2495         case IPV6_MULTICAST_LOOP:
2496                 /*
2497                  * Set the loopback flag for outgoing multicast packets.
2498                  * Must be zero or one.
2499                  */
2500                 if (m == NULL || m->m_len != sizeof(u_int)) {
2501                         error = EINVAL;
2502                         break;
2503                 }
2504                 bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2505                 if (loop > 1) {
2506                         error = EINVAL;
2507                         break;
2508                 }
2509                 im6o->im6o_multicast_loop = loop;
2510                 break;
2511
2512         case IPV6_JOIN_GROUP:
2513                 /*
2514                  * Add a multicast group membership.
2515                  * Group must be a valid IP6 multicast address.
2516                  */
2517                 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2518                         error = EINVAL;
2519                         break;
2520                 }
2521                 mreq = mtod(m, struct ipv6_mreq *);
2522
2523                 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2524                         /*
2525                          * We use the unspecified address to specify to accept
2526                          * all multicast addresses. Only super user is allowed
2527                          * to do this.
2528                          */
2529                         /* XXX-BZ might need a better PRIV_NETINET_x for this */
2530                         error = priv_check(curthread, PRIV_NETINET_MROUTE);
2531                         if (error)
2532                                 break;
2533                 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2534                         error = EINVAL;
2535                         break;
2536                 }
2537
2538                 /*
2539                  * If no interface was explicitly specified, choose an
2540                  * appropriate one according to the given multicast address.
2541                  */
2542                 if (mreq->ipv6mr_interface == 0) {
2543                         struct sockaddr_in6 *dst;
2544
2545                         /*
2546                          * Look up the routing table for the
2547                          * address, and choose the outgoing interface.
2548                          *   XXX: is it a good approach?
2549                          */
2550                         ro.ro_rt = NULL;
2551                         dst = (struct sockaddr_in6 *)&ro.ro_dst;
2552                         bzero(dst, sizeof(*dst));
2553                         dst->sin6_family = AF_INET6;
2554                         dst->sin6_len = sizeof(*dst);
2555                         dst->sin6_addr = mreq->ipv6mr_multiaddr;
2556                         rtalloc((struct route *)&ro);
2557                         if (ro.ro_rt == NULL) {
2558                                 error = EADDRNOTAVAIL;
2559                                 break;
2560                         }
2561                         ifp = ro.ro_rt->rt_ifp;
2562                         RTFREE(ro.ro_rt);
2563                 } else {
2564                         /*
2565                          * If the interface is specified, validate it.
2566                          */
2567                         if (mreq->ipv6mr_interface < 0 ||
2568                             V_if_index < mreq->ipv6mr_interface) {
2569                                 error = ENXIO;  /* XXX EINVAL? */
2570                                 break;
2571                         }
2572                         ifp = ifnet_byindex(mreq->ipv6mr_interface);
2573                         if (!ifp) {
2574                                 error = ENXIO;  /* XXX EINVAL? */
2575                                 break;
2576                         }
2577                 }
2578
2579                 /*
2580                  * See if we found an interface, and confirm that it
2581                  * supports multicast
2582                  */
2583                 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2584                         error = EADDRNOTAVAIL;
2585                         break;
2586                 }
2587
2588                 if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2589                         error = EADDRNOTAVAIL; /* XXX: should not happen */
2590                         break;
2591                 }
2592
2593                 /*
2594                  * See if the membership already exists.
2595                  */
2596                 for (imm = im6o->im6o_memberships.lh_first;
2597                      imm != NULL; imm = imm->i6mm_chain.le_next)
2598                         if (imm->i6mm_maddr->in6m_ifp == ifp &&
2599                             IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2600                                                &mreq->ipv6mr_multiaddr))
2601                                 break;
2602                 if (imm != NULL) {
2603                         error = EADDRINUSE;
2604                         break;
2605                 }
2606                 /*
2607                  * Everything looks good; add a new record to the multicast
2608                  * address list for the given interface.
2609                  */
2610                 imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr,  &error, 0);
2611                 if (imm == NULL)
2612                         break;
2613                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2614                 break;
2615
2616         case IPV6_LEAVE_GROUP:
2617                 /*
2618                  * Drop a multicast group membership.
2619                  * Group must be a valid IP6 multicast address.
2620                  */
2621                 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2622                         error = EINVAL;
2623                         break;
2624                 }
2625                 mreq = mtod(m, struct ipv6_mreq *);
2626
2627                 /*
2628                  * If an interface address was specified, get a pointer
2629                  * to its ifnet structure.
2630                  */
2631                 if (mreq->ipv6mr_interface < 0 ||
2632                     V_if_index < mreq->ipv6mr_interface) {
2633                         error = ENXIO;  /* XXX EINVAL? */
2634                         break;
2635                 }
2636                 if (mreq->ipv6mr_interface == 0)
2637                         ifp = NULL;
2638                 else
2639                         ifp = ifnet_byindex(mreq->ipv6mr_interface);
2640
2641                 /* Fill in the scope zone ID */
2642                 if (ifp) {
2643                         if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2644                                 /* XXX: should not happen */
2645                                 error = EADDRNOTAVAIL;
2646                                 break;
2647                         }
2648                 } else if (mreq->ipv6mr_interface != 0) {
2649                         /*
2650                          * This case happens when the (positive) index is in
2651                          * the valid range, but the corresponding interface has
2652                          * been detached dynamically (XXX).
2653                          */
2654                         error = EADDRNOTAVAIL;
2655                         break;
2656                 } else {        /* ipv6mr_interface == 0 */
2657                         struct sockaddr_in6 sa6_mc;
2658
2659                         /*
2660                          * The API spec says as follows:
2661                          *  If the interface index is specified as 0, the
2662                          *  system may choose a multicast group membership to
2663                          *  drop by matching the multicast address only.
2664                          * On the other hand, we cannot disambiguate the scope
2665                          * zone unless an interface is provided.  Thus, we
2666                          * check if there's ambiguity with the default scope
2667                          * zone as the last resort.
2668                          */
2669                         bzero(&sa6_mc, sizeof(sa6_mc));
2670                         sa6_mc.sin6_family = AF_INET6;
2671                         sa6_mc.sin6_len = sizeof(sa6_mc);
2672                         sa6_mc.sin6_addr = mreq->ipv6mr_multiaddr;
2673                         error = sa6_embedscope(&sa6_mc, V_ip6_use_defzone);
2674                         if (error != 0)
2675                                 break;
2676                         mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
2677                 }
2678
2679                 /*
2680                  * Find the membership in the membership list.
2681                  */
2682                 for (imm = im6o->im6o_memberships.lh_first;
2683                      imm != NULL; imm = imm->i6mm_chain.le_next) {
2684                         if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2685                             IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2686                             &mreq->ipv6mr_multiaddr))
2687                                 break;
2688                 }
2689                 if (imm == NULL) {
2690                         /* Unable to resolve interface */
2691                         error = EADDRNOTAVAIL;
2692                         break;
2693                 }
2694                 /*
2695                  * Give up the multicast address record to which the
2696                  * membership points.
2697                  */
2698                 LIST_REMOVE(imm, i6mm_chain);
2699                 in6_delmulti(imm->i6mm_maddr);
2700                 free(imm, M_IP6MADDR);
2701                 break;
2702
2703         default:
2704                 error = EOPNOTSUPP;
2705                 break;
2706         }
2707
2708         /*
2709          * If all options have default values, no need to keep the mbuf.
2710          */
2711         if (im6o->im6o_multicast_ifp == NULL &&
2712             im6o->im6o_multicast_hlim == V_ip6_defmcasthlim &&
2713             im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2714             im6o->im6o_memberships.lh_first == NULL) {
2715                 free(*im6op, M_IP6MOPTS);
2716                 *im6op = NULL;
2717         }
2718
2719         return (error);
2720 }
2721
2722 /*
2723  * Return the IP6 multicast options in response to user getsockopt().
2724  */
2725 static int
2726 ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
2727 {
2728         INIT_VNET_INET6(curvnet);
2729         u_int *hlim, *loop, *ifindex;
2730
2731         *mp = m_get(M_WAIT, MT_HEADER);         /* XXX */
2732
2733         switch (optname) {
2734
2735         case IPV6_MULTICAST_IF:
2736                 ifindex = mtod(*mp, u_int *);
2737                 (*mp)->m_len = sizeof(u_int);
2738                 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2739                         *ifindex = 0;
2740                 else
2741                         *ifindex = im6o->im6o_multicast_ifp->if_index;
2742                 return (0);
2743
2744         case IPV6_MULTICAST_HOPS:
2745                 hlim = mtod(*mp, u_int *);
2746                 (*mp)->m_len = sizeof(u_int);
2747                 if (im6o == NULL)
2748                         *hlim = V_ip6_defmcasthlim;
2749                 else
2750                         *hlim = im6o->im6o_multicast_hlim;
2751                 return (0);
2752
2753         case IPV6_MULTICAST_LOOP:
2754                 loop = mtod(*mp, u_int *);
2755                 (*mp)->m_len = sizeof(u_int);
2756                 if (im6o == NULL)
2757                         *loop = V_ip6_defmcasthlim;
2758                 else
2759                         *loop = im6o->im6o_multicast_loop;
2760                 return (0);
2761
2762         default:
2763                 return (EOPNOTSUPP);
2764         }
2765 }
2766
2767 /*
2768  * Discard the IP6 multicast options.
2769  */
2770 void
2771 ip6_freemoptions(struct ip6_moptions *im6o)
2772 {
2773         struct in6_multi_mship *imm;
2774
2775         if (im6o == NULL)
2776                 return;
2777
2778         while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2779                 LIST_REMOVE(imm, i6mm_chain);
2780                 if (imm->i6mm_maddr)
2781                         in6_delmulti(imm->i6mm_maddr);
2782                 free(imm, M_IP6MADDR);
2783         }
2784         free(im6o, M_IP6MOPTS);
2785 }
2786
2787 /*
2788  * Set IPv6 outgoing packet options based on advanced API.
2789  */
2790 int
2791 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2792     struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
2793 {
2794         struct cmsghdr *cm = 0;
2795
2796         if (control == NULL || opt == NULL)
2797                 return (EINVAL);
2798
2799         ip6_initpktopts(opt);
2800         if (stickyopt) {
2801                 int error;
2802
2803                 /*
2804                  * If stickyopt is provided, make a local copy of the options
2805                  * for this particular packet, then override them by ancillary
2806                  * objects.
2807                  * XXX: copypktopts() does not copy the cached route to a next
2808                  * hop (if any).  This is not very good in terms of efficiency,
2809                  * but we can allow this since this option should be rarely
2810                  * used.
2811                  */
2812                 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2813                         return (error);
2814         }
2815
2816         /*
2817          * XXX: Currently, we assume all the optional information is stored
2818          * in a single mbuf.
2819          */
2820         if (control->m_next)
2821                 return (EINVAL);
2822
2823         for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2824             control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2825                 int error;
2826
2827                 if (control->m_len < CMSG_LEN(0))
2828                         return (EINVAL);
2829
2830                 cm = mtod(control, struct cmsghdr *);
2831                 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2832                         return (EINVAL);
2833                 if (cm->cmsg_level != IPPROTO_IPV6)
2834                         continue;
2835
2836                 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2837                     cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2838                 if (error)
2839                         return (error);
2840         }
2841
2842         return (0);
2843 }
2844
2845 /*
2846  * Set a particular packet option, as a sticky option or an ancillary data
2847  * item.  "len" can be 0 only when it's a sticky option.
2848  * We have 4 cases of combination of "sticky" and "cmsg":
2849  * "sticky=0, cmsg=0": impossible
2850  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2851  * "sticky=1, cmsg=0": RFC3542 socket option
2852  * "sticky=1, cmsg=1": RFC2292 socket option
2853  */
2854 static int
2855 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2856     struct ucred *cred, int sticky, int cmsg, int uproto)
2857 {
2858         INIT_VNET_NET(curvnet);
2859         INIT_VNET_INET6(curvnet);
2860         int minmtupolicy, preftemp;
2861         int error;
2862
2863         if (!sticky && !cmsg) {
2864 #ifdef DIAGNOSTIC
2865                 printf("ip6_setpktopt: impossible case\n");
2866 #endif
2867                 return (EINVAL);
2868         }
2869
2870         /*
2871          * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2872          * not be specified in the context of RFC3542.  Conversely,
2873          * RFC3542 types should not be specified in the context of RFC2292.
2874          */
2875         if (!cmsg) {
2876                 switch (optname) {
2877                 case IPV6_2292PKTINFO:
2878                 case IPV6_2292HOPLIMIT:
2879                 case IPV6_2292NEXTHOP:
2880                 case IPV6_2292HOPOPTS:
2881                 case IPV6_2292DSTOPTS:
2882                 case IPV6_2292RTHDR:
2883                 case IPV6_2292PKTOPTIONS:
2884                         return (ENOPROTOOPT);
2885                 }
2886         }
2887         if (sticky && cmsg) {
2888                 switch (optname) {
2889                 case IPV6_PKTINFO:
2890                 case IPV6_HOPLIMIT:
2891                 case IPV6_NEXTHOP:
2892                 case IPV6_HOPOPTS:
2893                 case IPV6_DSTOPTS:
2894                 case IPV6_RTHDRDSTOPTS:
2895                 case IPV6_RTHDR:
2896                 case IPV6_USE_MIN_MTU:
2897                 case IPV6_DONTFRAG:
2898                 case IPV6_TCLASS:
2899                 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2900                         return (ENOPROTOOPT);
2901                 }
2902         }
2903
2904         switch (optname) {
2905         case IPV6_2292PKTINFO:
2906         case IPV6_PKTINFO:
2907         {
2908                 struct ifnet *ifp = NULL;
2909                 struct in6_pktinfo *pktinfo;
2910
2911                 if (len != sizeof(struct in6_pktinfo))
2912                         return (EINVAL);
2913
2914                 pktinfo = (struct in6_pktinfo *)buf;
2915
2916                 /*
2917                  * An application can clear any sticky IPV6_PKTINFO option by
2918                  * doing a "regular" setsockopt with ipi6_addr being
2919                  * in6addr_any and ipi6_ifindex being zero.
2920                  * [RFC 3542, Section 6]
2921                  */
2922                 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2923                     pktinfo->ipi6_ifindex == 0 &&
2924                     IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2925                         ip6_clearpktopts(opt, optname);
2926                         break;
2927                 }
2928
2929                 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2930                     sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2931                         return (EINVAL);
2932                 }
2933
2934                 /* validate the interface index if specified. */
2935                 if (pktinfo->ipi6_ifindex > V_if_index ||
2936                     pktinfo->ipi6_ifindex < 0) {
2937                          return (ENXIO);
2938                 }
2939                 if (pktinfo->ipi6_ifindex) {
2940                         ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
2941                         if (ifp == NULL)
2942                                 return (ENXIO);
2943                 }
2944
2945                 /*
2946                  * We store the address anyway, and let in6_selectsrc()
2947                  * validate the specified address.  This is because ipi6_addr
2948                  * may not have enough information about its scope zone, and
2949                  * we may need additional information (such as outgoing
2950                  * interface or the scope zone of a destination address) to
2951                  * disambiguate the scope.
2952                  * XXX: the delay of the validation may confuse the
2953                  * application when it is used as a sticky option.
2954                  */
2955                 if (opt->ip6po_pktinfo == NULL) {
2956                         opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2957                             M_IP6OPT, M_NOWAIT);
2958                         if (opt->ip6po_pktinfo == NULL)
2959                                 return (ENOBUFS);
2960                 }
2961                 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2962                 break;
2963         }
2964
2965         case IPV6_2292HOPLIMIT:
2966         case IPV6_HOPLIMIT:
2967         {
2968                 int *hlimp;
2969
2970                 /*
2971                  * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2972                  * to simplify the ordering among hoplimit options.
2973                  */
2974                 if (optname == IPV6_HOPLIMIT && sticky)
2975                         return (ENOPROTOOPT);
2976
2977                 if (len != sizeof(int))
2978                         return (EINVAL);
2979                 hlimp = (int *)buf;
2980                 if (*hlimp < -1 || *hlimp > 255)
2981                         return (EINVAL);
2982
2983                 opt->ip6po_hlim = *hlimp;
2984                 break;
2985         }
2986
2987         case IPV6_TCLASS:
2988         {
2989                 int tclass;
2990
2991                 if (len != sizeof(int))
2992                         return (EINVAL);
2993                 tclass = *(int *)buf;
2994                 if (tclass < -1 || tclass > 255)
2995                         return (EINVAL);
2996
2997                 opt->ip6po_tclass = tclass;
2998                 break;
2999         }
3000
3001         case IPV6_2292NEXTHOP:
3002         case IPV6_NEXTHOP:
3003                 if (cred != NULL) {
3004                         error = priv_check_cred(cred,
3005                             PRIV_NETINET_SETHDROPTS, 0);
3006                         if (error)
3007                                 return (error);
3008                 }
3009
3010                 if (len == 0) { /* just remove the option */
3011                         ip6_clearpktopts(opt, IPV6_NEXTHOP);
3012                         break;
3013                 }
3014
3015                 /* check if cmsg_len is large enough for sa_len */
3016                 if (len < sizeof(struct sockaddr) || len < *buf)
3017                         return (EINVAL);
3018
3019                 switch (((struct sockaddr *)buf)->sa_family) {
3020                 case AF_INET6:
3021                 {
3022                         struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3023                         int error;
3024
3025                         if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3026                                 return (EINVAL);
3027
3028                         if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3029                             IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3030                                 return (EINVAL);
3031                         }
3032                         if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
3033                             != 0) {
3034                                 return (error);
3035                         }
3036                         break;
3037                 }
3038                 case AF_LINK:   /* should eventually be supported */
3039                 default:
3040                         return (EAFNOSUPPORT);
3041                 }
3042
3043                 /* turn off the previous option, then set the new option. */
3044                 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3045                 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3046                 if (opt->ip6po_nexthop == NULL)
3047                         return (ENOBUFS);
3048                 bcopy(buf, opt->ip6po_nexthop, *buf);
3049                 break;
3050
3051         case IPV6_2292HOPOPTS:
3052         case IPV6_HOPOPTS:
3053         {
3054                 struct ip6_hbh *hbh;
3055                 int hbhlen;
3056
3057                 /*
3058                  * XXX: We don't allow a non-privileged user to set ANY HbH
3059                  * options, since per-option restriction has too much
3060                  * overhead.
3061                  */
3062                 if (cred != NULL) {
3063                         error = priv_check_cred(cred,
3064                             PRIV_NETINET_SETHDROPTS, 0);
3065                         if (error)
3066                                 return (error);
3067                 }
3068
3069                 if (len == 0) {
3070                         ip6_clearpktopts(opt, IPV6_HOPOPTS);
3071                         break;  /* just remove the option */
3072                 }
3073
3074                 /* message length validation */
3075                 if (len < sizeof(struct ip6_hbh))
3076                         return (EINVAL);
3077                 hbh = (struct ip6_hbh *)buf;
3078                 hbhlen = (hbh->ip6h_len + 1) << 3;
3079                 if (len != hbhlen)
3080                         return (EINVAL);
3081
3082                 /* turn off the previous option, then set the new option. */
3083                 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3084                 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3085                 if (opt->ip6po_hbh == NULL)
3086                         return (ENOBUFS);
3087                 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3088
3089                 break;
3090         }
3091
3092         case IPV6_2292DSTOPTS:
3093         case IPV6_DSTOPTS:
3094         case IPV6_RTHDRDSTOPTS:
3095         {
3096                 struct ip6_dest *dest, **newdest = NULL;
3097                 int destlen;
3098
3099                 if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
3100                         error = priv_check_cred(cred,
3101                             PRIV_NETINET_SETHDROPTS, 0);
3102                         if (error)
3103                                 return (error);
3104                 }
3105
3106                 if (len == 0) {
3107                         ip6_clearpktopts(opt, optname);
3108                         break;  /* just remove the option */
3109                 }
3110
3111                 /* message length validation */
3112                 if (len < sizeof(struct ip6_dest))
3113                         return (EINVAL);
3114                 dest = (struct ip6_dest *)buf;
3115                 destlen = (dest->ip6d_len + 1) << 3;
3116                 if (len != destlen)
3117                         return (EINVAL);
3118
3119                 /*
3120                  * Determine the position that the destination options header
3121                  * should be inserted; before or after the routing header.
3122                  */
3123                 switch (optname) {
3124                 case IPV6_2292DSTOPTS:
3125                         /*
3126                          * The old advacned API is ambiguous on this point.
3127                          * Our approach is to determine the position based
3128                          * according to the existence of a routing header.
3129                          * Note, however, that this depends on the order of the
3130                          * extension headers in the ancillary data; the 1st
3131                          * part of the destination options header must appear
3132                          * before the routing header in the ancillary data,
3133                          * too.
3134                          * RFC3542 solved the ambiguity by introducing
3135                          * separate ancillary data or option types.
3136                          */
3137                         if (opt->ip6po_rthdr == NULL)
3138                                 newdest = &opt->ip6po_dest1;
3139                         else
3140                                 newdest = &opt->ip6po_dest2;
3141                         break;
3142                 case IPV6_RTHDRDSTOPTS:
3143                         newdest = &opt->ip6po_dest1;
3144                         break;
3145                 case IPV6_DSTOPTS:
3146                         newdest = &opt->ip6po_dest2;
3147                         break;
3148                 }
3149
3150                 /* turn off the previous option, then set the new option. */
3151                 ip6_clearpktopts(opt, optname);
3152                 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3153                 if (*newdest == NULL)
3154                         return (ENOBUFS);
3155                 bcopy(dest, *newdest, destlen);
3156
3157                 break;
3158         }
3159
3160         case IPV6_2292RTHDR:
3161         case IPV6_RTHDR:
3162         {
3163                 struct ip6_rthdr *rth;
3164                 int rthlen;
3165
3166                 if (len == 0) {
3167                         ip6_clearpktopts(opt, IPV6_RTHDR);
3168                         break;  /* just remove the option */
3169                 }
3170
3171                 /* message length validation */
3172                 if (len < sizeof(struct ip6_rthdr))
3173                         return (EINVAL);
3174                 rth = (struct ip6_rthdr *)buf;
3175                 rthlen = (rth->ip6r_len + 1) << 3;
3176                 if (len != rthlen)
3177                         return (EINVAL);
3178
3179                 switch (rth->ip6r_type) {
3180                 case IPV6_RTHDR_TYPE_0:
3181                         if (rth->ip6r_len == 0) /* must contain one addr */
3182                                 return (EINVAL);
3183                         if (rth->ip6r_len % 2) /* length must be even */
3184                                 return (EINVAL);
3185                         if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3186                                 return (EINVAL);
3187                         break;
3188                 default:
3189                         return (EINVAL);        /* not supported */
3190                 }
3191
3192                 /* turn off the previous option */
3193                 ip6_clearpktopts(opt, IPV6_RTHDR);
3194                 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3195                 if (opt->ip6po_rthdr == NULL)
3196                         return (ENOBUFS);
3197                 bcopy(rth, opt->ip6po_rthdr, rthlen);
3198
3199                 break;
3200         }
3201
3202         case IPV6_USE_MIN_MTU:
3203                 if (len != sizeof(int))
3204                         return (EINVAL);
3205                 minmtupolicy = *(int *)buf;
3206                 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3207                     minmtupolicy != IP6PO_MINMTU_DISABLE &&
3208                     minmtupolicy != IP6PO_MINMTU_ALL) {
3209                         return (EINVAL);
3210                 }
3211                 opt->ip6po_minmtu = minmtupolicy;
3212                 break;
3213
3214         case IPV6_DONTFRAG:
3215                 if (len != sizeof(int))
3216                         return (EINVAL);
3217
3218                 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3219                         /*
3220                          * we ignore this option for TCP sockets.
3221                          * (RFC3542 leaves this case unspecified.)
3222                          */
3223                         opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3224                 } else
3225                         opt->ip6po_flags |= IP6PO_DONTFRAG;
3226                 break;
3227
3228         case IPV6_PREFER_TEMPADDR:
3229                 if (len != sizeof(int))
3230                         return (EINVAL);
3231                 preftemp = *(int *)buf;
3232                 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3233                     preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3234                     preftemp != IP6PO_TEMPADDR_PREFER) {
3235                         return (EINVAL);
3236                 }
3237                 opt->ip6po_prefer_tempaddr = preftemp;
3238                 break;
3239
3240         default:
3241                 return (ENOPROTOOPT);
3242         } /* end of switch */
3243
3244         return (0);
3245 }
3246
3247 /*
3248  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3249  * packet to the input queue of a specified interface.  Note that this
3250  * calls the output routine of the loopback "driver", but with an interface
3251  * pointer that might NOT be &loif -- easier than replicating that code here.
3252  */
3253 void
3254 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
3255 {
3256         struct mbuf *copym;
3257         struct ip6_hdr *ip6;
3258
3259         copym = m_copy(m, 0, M_COPYALL);
3260         if (copym == NULL)
3261                 return;
3262
3263         /*
3264          * Make sure to deep-copy IPv6 header portion in case the data
3265          * is in an mbuf cluster, so that we can safely override the IPv6
3266          * header portion later.
3267          */
3268         if ((copym->m_flags & M_EXT) != 0 ||
3269             copym->m_len < sizeof(struct ip6_hdr)) {
3270                 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3271                 if (copym == NULL)
3272                         return;
3273         }
3274
3275 #ifdef DIAGNOSTIC
3276         if (copym->m_len < sizeof(*ip6)) {
3277                 m_freem(copym);
3278                 return;
3279         }
3280 #endif
3281
3282         ip6 = mtod(copym, struct ip6_hdr *);
3283         /*
3284          * clear embedded scope identifiers if necessary.
3285          * in6_clearscope will touch the addresses only when necessary.
3286          */
3287         in6_clearscope(&ip6->ip6_src);
3288         in6_clearscope(&ip6->ip6_dst);
3289
3290         (void)if_simloop(ifp, copym, dst->sin6_family, 0);
3291 }
3292
3293 /*
3294  * Chop IPv6 header off from the payload.
3295  */
3296 static int
3297 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3298 {
3299         struct mbuf *mh;
3300         struct ip6_hdr *ip6;
3301
3302         ip6 = mtod(m, struct ip6_hdr *);
3303         if (m->m_len > sizeof(*ip6)) {
3304                 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3305                 if (mh == 0) {
3306                         m_freem(m);
3307                         return ENOBUFS;
3308                 }
3309                 M_MOVE_PKTHDR(mh, m);
3310                 MH_ALIGN(mh, sizeof(*ip6));
3311                 m->m_len -= sizeof(*ip6);
3312                 m->m_data += sizeof(*ip6);
3313                 mh->m_next = m;
3314                 m = mh;
3315                 m->m_len = sizeof(*ip6);
3316                 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3317         }
3318         exthdrs->ip6e_ip6 = m;
3319         return 0;
3320 }
3321
3322 /*
3323  * Compute IPv6 extension header length.
3324  */
3325 int
3326 ip6_optlen(struct in6pcb *in6p)
3327 {
3328         int len;
3329
3330         if (!in6p->in6p_outputopts)
3331                 return 0;
3332
3333         len = 0;
3334 #define elen(x) \
3335     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3336
3337         len += elen(in6p->in6p_outputopts->ip6po_hbh);
3338         if (in6p->in6p_outputopts->ip6po_rthdr)
3339                 /* dest1 is valid with rthdr only */
3340                 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3341         len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3342         len += elen(in6p->in6p_outputopts->ip6po_dest2);
3343         return len;
3344 #undef elen
3345 }