]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/netinet6/ip6_output.c
Merge r232054 by kmacy:
[FreeBSD/stable/9.git] / sys / netinet6 / ip6_output.c
1 /*-
2  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the project nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
30  */
31
32 /*-
33  * Copyright (c) 1982, 1986, 1988, 1990, 1993
34  *      The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 4. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
61  */
62
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68 #include "opt_ipfw.h"
69 #include "opt_ipsec.h"
70 #include "opt_sctp.h"
71 #include "opt_route.h"
72
73 #include <sys/param.h>
74 #include <sys/kernel.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/errno.h>
78 #include <sys/priv.h>
79 #include <sys/proc.h>
80 #include <sys/protosw.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/syslog.h>
84 #include <sys/ucred.h>
85
86 #include <machine/in_cksum.h>
87
88 #include <net/if.h>
89 #include <net/netisr.h>
90 #include <net/route.h>
91 #include <net/pfil.h>
92 #include <net/vnet.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_var.h>
96 #include <netinet/ip_var.h>
97 #include <netinet6/in6_var.h>
98 #include <netinet/ip6.h>
99 #include <netinet/icmp6.h>
100 #include <netinet6/ip6_var.h>
101 #include <netinet/in_pcb.h>
102 #include <netinet/tcp_var.h>
103 #include <netinet6/nd6.h>
104
105 #ifdef IPSEC
106 #include <netipsec/ipsec.h>
107 #include <netipsec/ipsec6.h>
108 #include <netipsec/key.h>
109 #include <netinet6/ip6_ipsec.h>
110 #endif /* IPSEC */
111 #ifdef SCTP
112 #include <netinet/sctp.h>
113 #include <netinet/sctp_crc32.h>
114 #endif
115
116 #include <netinet6/ip6protosw.h>
117 #include <netinet6/scope6_var.h>
118
119 #ifdef FLOWTABLE
120 #include <net/flowtable.h>
121 #endif
122
123 extern int in6_mcast_loop;
124
125 struct ip6_exthdrs {
126         struct mbuf *ip6e_ip6;
127         struct mbuf *ip6e_hbh;
128         struct mbuf *ip6e_dest1;
129         struct mbuf *ip6e_rthdr;
130         struct mbuf *ip6e_dest2;
131 };
132
133 static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
134                            struct ucred *, int));
135 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
136         struct socket *, struct sockopt *));
137 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
138 static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *,
139         struct ucred *, int, int, int));
140
141 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
142 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
143         struct ip6_frag **));
144 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
145 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
146 static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
147         struct ifnet *, struct in6_addr *, u_long *, int *, u_int));
148 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
149
150
151 /*
152  * Make an extension header from option data.  hp is the source, and
153  * mp is the destination.
154  */
155 #define MAKE_EXTHDR(hp, mp)                                             \
156     do {                                                                \
157         if (hp) {                                                       \
158                 struct ip6_ext *eh = (struct ip6_ext *)(hp);            \
159                 error = ip6_copyexthdr((mp), (caddr_t)(hp),             \
160                     ((eh)->ip6e_len + 1) << 3);                         \
161                 if (error)                                              \
162                         goto freehdrs;                                  \
163         }                                                               \
164     } while (/*CONSTCOND*/ 0)
165
166 /*
167  * Form a chain of extension headers.
168  * m is the extension header mbuf
169  * mp is the previous mbuf in the chain
170  * p is the next header
171  * i is the type of option.
172  */
173 #define MAKE_CHAIN(m, mp, p, i)\
174     do {\
175         if (m) {\
176                 if (!hdrsplit) \
177                         panic("assumption failed: hdr not split"); \
178                 *mtod((m), u_char *) = *(p);\
179                 *(p) = (i);\
180                 p = mtod((m), u_char *);\
181                 (m)->m_next = (mp)->m_next;\
182                 (mp)->m_next = (m);\
183                 (mp) = (m);\
184         }\
185     } while (/*CONSTCOND*/ 0)
186
187 static void
188 in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
189 {
190         u_short csum;
191
192         csum = in_cksum_skip(m, offset + plen, offset);
193         if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
194                 csum = 0xffff;
195         offset += m->m_pkthdr.csum_data;        /* checksum offset */
196
197         if (offset + sizeof(u_short) > m->m_len) {
198                 printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
199                     "csum_flags=0x%04x\n", __func__, m->m_len, plen, offset,
200                     m->m_pkthdr.csum_flags);
201                 /*
202                  * XXX this should not happen, but if it does, the correct
203                  * behavior may be to insert the checksum in the appropriate
204                  * next mbuf in the chain.
205                  */
206                 return;
207         }
208         *(u_short *)(m->m_data + offset) = csum;
209 }
210
211 /*
212  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
213  * header (with pri, len, nxt, hlim, src, dst).
214  * This function may modify ver and hlim only.
215  * The mbuf chain containing the packet will be freed.
216  * The mbuf opt, if present, will not be freed.
217  * If route_in6 ro is present and has ro_rt initialized, route lookup would be
218  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
219  * then result of route lookup is stored in ro->ro_rt.
220  *
221  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
222  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
223  * which is rt_rmx.rmx_mtu.
224  *
225  * ifpp - XXX: just for statistics
226  */
227 int
228 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
229     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
230     struct ifnet **ifpp, struct inpcb *inp)
231 {
232         struct ip6_hdr *ip6, *mhip6;
233         struct ifnet *ifp, *origifp;
234         struct mbuf *m = m0;
235         struct mbuf *mprev = NULL;
236         int hlen, tlen, len, off;
237         struct route_in6 ip6route;
238         struct rtentry *rt = NULL;
239         struct sockaddr_in6 *dst, src_sa, dst_sa;
240         struct in6_addr odst;
241         int error = 0;
242         struct in6_ifaddr *ia = NULL;
243         u_long mtu;
244         int alwaysfrag, dontfrag;
245         u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
246         struct ip6_exthdrs exthdrs;
247         struct in6_addr finaldst, src0, dst0;
248         u_int32_t zone;
249         struct route_in6 *ro_pmtu = NULL;
250         int hdrsplit = 0;
251         int needipsec = 0;
252         int sw_csum, tso;
253 #ifdef IPSEC
254         struct ipsec_output_state state;
255         struct ip6_rthdr *rh = NULL;
256         int needipsectun = 0;
257         int segleft_org = 0;
258         struct secpolicy *sp = NULL;
259 #endif /* IPSEC */
260 #ifdef IPFIREWALL_FORWARD
261         struct m_tag *fwd_tag;
262 #endif
263
264         ip6 = mtod(m, struct ip6_hdr *);
265         if (ip6 == NULL) {
266                 printf ("ip6 is NULL");
267                 goto bad;
268         }
269
270         if (inp != NULL)
271                 M_SETFIB(m, inp->inp_inc.inc_fibnum);
272
273         finaldst = ip6->ip6_dst;
274         bzero(&exthdrs, sizeof(exthdrs));
275         if (opt) {
276                 /* Hop-by-Hop options header */
277                 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
278                 /* Destination options header(1st part) */
279                 if (opt->ip6po_rthdr) {
280                         /*
281                          * Destination options header(1st part)
282                          * This only makes sense with a routing header.
283                          * See Section 9.2 of RFC 3542.
284                          * Disabling this part just for MIP6 convenience is
285                          * a bad idea.  We need to think carefully about a
286                          * way to make the advanced API coexist with MIP6
287                          * options, which might automatically be inserted in
288                          * the kernel.
289                          */
290                         MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
291                 }
292                 /* Routing header */
293                 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
294                 /* Destination options header(2nd part) */
295                 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
296         }
297
298 #ifdef IPSEC
299         /*
300          * IPSec checking which handles several cases.
301          * FAST IPSEC: We re-injected the packet.
302          */
303         switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
304         {
305         case 1:                 /* Bad packet */
306                 goto freehdrs;
307         case -1:                /* Do IPSec */
308                 needipsec = 1;
309                 /*
310                  * Do delayed checksums now, as we may send before returning.
311                  */
312                 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
313                         plen = m->m_pkthdr.len - sizeof(*ip6);
314                         in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
315                         m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
316                 }
317 #ifdef SCTP
318                 if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
319                         sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
320                         m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
321                 }
322 #endif
323         case 0:                 /* No IPSec */
324         default:
325                 break;
326         }
327 #endif /* IPSEC */
328
329         /*
330          * Calculate the total length of the extension header chain.
331          * Keep the length of the unfragmentable part for fragmentation.
332          */
333         optlen = 0;
334         if (exthdrs.ip6e_hbh)
335                 optlen += exthdrs.ip6e_hbh->m_len;
336         if (exthdrs.ip6e_dest1)
337                 optlen += exthdrs.ip6e_dest1->m_len;
338         if (exthdrs.ip6e_rthdr)
339                 optlen += exthdrs.ip6e_rthdr->m_len;
340         unfragpartlen = optlen + sizeof(struct ip6_hdr);
341
342         /* NOTE: we don't add AH/ESP length here. do that later. */
343         if (exthdrs.ip6e_dest2)
344                 optlen += exthdrs.ip6e_dest2->m_len;
345
346         /*
347          * If we need IPsec, or there is at least one extension header,
348          * separate IP6 header from the payload.
349          */
350         if ((needipsec || optlen) && !hdrsplit) {
351                 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
352                         m = NULL;
353                         goto freehdrs;
354                 }
355                 m = exthdrs.ip6e_ip6;
356                 hdrsplit++;
357         }
358
359         /* adjust pointer */
360         ip6 = mtod(m, struct ip6_hdr *);
361
362         /* adjust mbuf packet header length */
363         m->m_pkthdr.len += optlen;
364         plen = m->m_pkthdr.len - sizeof(*ip6);
365
366         /* If this is a jumbo payload, insert a jumbo payload option. */
367         if (plen > IPV6_MAXPACKET) {
368                 if (!hdrsplit) {
369                         if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
370                                 m = NULL;
371                                 goto freehdrs;
372                         }
373                         m = exthdrs.ip6e_ip6;
374                         hdrsplit++;
375                 }
376                 /* adjust pointer */
377                 ip6 = mtod(m, struct ip6_hdr *);
378                 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
379                         goto freehdrs;
380                 ip6->ip6_plen = 0;
381         } else
382                 ip6->ip6_plen = htons(plen);
383
384         /*
385          * Concatenate headers and fill in next header fields.
386          * Here we have, on "m"
387          *      IPv6 payload
388          * and we insert headers accordingly.  Finally, we should be getting:
389          *      IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
390          *
391          * during the header composing process, "m" points to IPv6 header.
392          * "mprev" points to an extension header prior to esp.
393          */
394         u_char *nexthdrp = &ip6->ip6_nxt;
395         mprev = m;
396
397         /*
398          * we treat dest2 specially.  this makes IPsec processing
399          * much easier.  the goal here is to make mprev point the
400          * mbuf prior to dest2.
401          *
402          * result: IPv6 dest2 payload
403          * m and mprev will point to IPv6 header.
404          */
405         if (exthdrs.ip6e_dest2) {
406                 if (!hdrsplit)
407                         panic("assumption failed: hdr not split");
408                 exthdrs.ip6e_dest2->m_next = m->m_next;
409                 m->m_next = exthdrs.ip6e_dest2;
410                 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
411                 ip6->ip6_nxt = IPPROTO_DSTOPTS;
412         }
413
414         /*
415          * result: IPv6 hbh dest1 rthdr dest2 payload
416          * m will point to IPv6 header.  mprev will point to the
417          * extension header prior to dest2 (rthdr in the above case).
418          */
419         MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
420         MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
421                    IPPROTO_DSTOPTS);
422         MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
423                    IPPROTO_ROUTING);
424
425 #ifdef IPSEC
426         if (!needipsec)
427                 goto skip_ipsec2;
428
429         /*
430          * pointers after IPsec headers are not valid any more.
431          * other pointers need a great care too.
432          * (IPsec routines should not mangle mbufs prior to AH/ESP)
433          */
434         exthdrs.ip6e_dest2 = NULL;
435
436         if (exthdrs.ip6e_rthdr) {
437                 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
438                 segleft_org = rh->ip6r_segleft;
439                 rh->ip6r_segleft = 0;
440         }
441
442         bzero(&state, sizeof(state));
443         state.m = m;
444         error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
445                                     &needipsectun);
446         m = state.m;
447         if (error == EJUSTRETURN) {
448                 /*
449                  * We had a SP with a level of 'use' and no SA. We
450                  * will just continue to process the packet without
451                  * IPsec processing.
452                  */
453                 ;
454         } else if (error) {
455                 /* mbuf is already reclaimed in ipsec6_output_trans. */
456                 m = NULL;
457                 switch (error) {
458                 case EHOSTUNREACH:
459                 case ENETUNREACH:
460                 case EMSGSIZE:
461                 case ENOBUFS:
462                 case ENOMEM:
463                         break;
464                 default:
465                         printf("[%s:%d] (ipsec): error code %d\n",
466                             __func__, __LINE__, error);
467                         /* FALLTHROUGH */
468                 case ENOENT:
469                         /* don't show these error codes to the user */
470                         error = 0;
471                         break;
472                 }
473                 goto bad;
474         } else if (!needipsectun) {
475                 /*
476                  * In the FAST IPSec case we have already
477                  * re-injected the packet and it has been freed
478                  * by the ipsec_done() function.  So, just clean
479                  * up after ourselves.
480                  */
481                 m = NULL;
482                 goto done;
483         }
484         if (exthdrs.ip6e_rthdr) {
485                 /* ah6_output doesn't modify mbuf chain */
486                 rh->ip6r_segleft = segleft_org;
487         }
488 skip_ipsec2:;
489 #endif /* IPSEC */
490
491         /*
492          * If there is a routing header, discard the packet.
493          */
494         if (exthdrs.ip6e_rthdr) {
495                  error = EINVAL;
496                  goto bad;
497         }
498
499         /* Source address validation */
500         if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
501             (flags & IPV6_UNSPECSRC) == 0) {
502                 error = EOPNOTSUPP;
503                 V_ip6stat.ip6s_badscope++;
504                 goto bad;
505         }
506         if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
507                 error = EOPNOTSUPP;
508                 V_ip6stat.ip6s_badscope++;
509                 goto bad;
510         }
511
512         V_ip6stat.ip6s_localout++;
513
514         /*
515          * Route packet.
516          */
517         if (ro == 0) {
518                 ro = &ip6route;
519                 bzero((caddr_t)ro, sizeof(*ro));
520         }
521         ro_pmtu = ro;
522         if (opt && opt->ip6po_rthdr)
523                 ro = &opt->ip6po_route;
524         dst = (struct sockaddr_in6 *)&ro->ro_dst;
525 #ifdef FLOWTABLE
526         if (ro->ro_rt == NULL) {
527                 struct flentry *fle;
528
529                 /*
530                  * The flow table returns route entries valid for up to 30
531                  * seconds; we rely on the remainder of ip_output() taking no
532                  * longer than that long for the stability of ro_rt.  The
533                  * flow ID assignment must have happened before this point.
534                  */
535                 fle = flowtable_lookup_mbuf(V_ip6_ft, m, AF_INET6);
536                 if (fle != NULL)
537                         flow_to_route_in6(fle, ro);
538         }
539 #endif
540 again:
541         /*
542          * if specified, try to fill in the traffic class field.
543          * do not override if a non-zero value is already set.
544          * we check the diffserv field and the ecn field separately.
545          */
546         if (opt && opt->ip6po_tclass >= 0) {
547                 int mask = 0;
548
549                 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
550                         mask |= 0xfc;
551                 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
552                         mask |= 0x03;
553                 if (mask != 0)
554                         ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
555         }
556
557         /* fill in or override the hop limit field, if necessary. */
558         if (opt && opt->ip6po_hlim != -1)
559                 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
560         else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
561                 if (im6o != NULL)
562                         ip6->ip6_hlim = im6o->im6o_multicast_hlim;
563                 else
564                         ip6->ip6_hlim = V_ip6_defmcasthlim;
565         }
566
567 #ifdef IPSEC
568         /*
569          * We may re-inject packets into the stack here.
570          */
571         if (needipsec && needipsectun) {
572                 struct ipsec_output_state state;
573
574                 /*
575                  * All the extension headers will become inaccessible
576                  * (since they can be encrypted).
577                  * Don't panic, we need no more updates to extension headers
578                  * on inner IPv6 packet (since they are now encapsulated).
579                  *
580                  * IPv6 [ESP|AH] IPv6 [extension headers] payload
581                  */
582                 bzero(&exthdrs, sizeof(exthdrs));
583                 exthdrs.ip6e_ip6 = m;
584
585                 bzero(&state, sizeof(state));
586                 state.m = m;
587                 state.ro = (struct route *)ro;
588                 state.dst = (struct sockaddr *)dst;
589
590                 error = ipsec6_output_tunnel(&state, sp, flags);
591
592                 m = state.m;
593                 ro = (struct route_in6 *)state.ro;
594                 dst = (struct sockaddr_in6 *)state.dst;
595                 if (error == EJUSTRETURN) {
596                         /*
597                          * We had a SP with a level of 'use' and no SA. We
598                          * will just continue to process the packet without
599                          * IPsec processing.
600                          */
601                         ;
602                 } else if (error) {
603                         /* mbuf is already reclaimed in ipsec6_output_tunnel. */
604                         m0 = m = NULL;
605                         m = NULL;
606                         switch (error) {
607                         case EHOSTUNREACH:
608                         case ENETUNREACH:
609                         case EMSGSIZE:
610                         case ENOBUFS:
611                         case ENOMEM:
612                                 break;
613                         default:
614                                 printf("[%s:%d] (ipsec): error code %d\n",
615                                     __func__, __LINE__, error);
616                                 /* FALLTHROUGH */
617                         case ENOENT:
618                                 /* don't show these error codes to the user */
619                                 error = 0;
620                                 break;
621                         }
622                         goto bad;
623                 } else {
624                         /*
625                          * In the FAST IPSec case we have already
626                          * re-injected the packet and it has been freed
627                          * by the ipsec_done() function.  So, just clean
628                          * up after ourselves.
629                          */
630                         m = NULL;
631                         goto done;
632                 }
633
634                 exthdrs.ip6e_ip6 = m;
635         }
636 #endif /* IPSEC */
637
638         /* adjust pointer */
639         ip6 = mtod(m, struct ip6_hdr *);
640
641         bzero(&dst_sa, sizeof(dst_sa));
642         dst_sa.sin6_family = AF_INET6;
643         dst_sa.sin6_len = sizeof(dst_sa);
644         dst_sa.sin6_addr = ip6->ip6_dst;
645         if (ro->ro_rt) {
646                 rt = ro->ro_rt;
647                 ifp = ro->ro_rt->rt_ifp;
648         } else if ((error = in6_selectroute_fib(&dst_sa, opt, im6o, ro,
649             &ifp, &rt, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0) {
650                 switch (error) {
651                 case EHOSTUNREACH:
652                         V_ip6stat.ip6s_noroute++;
653                         break;
654                 case EADDRNOTAVAIL:
655                 default:
656                         break; /* XXX statistics? */
657                 }
658                 if (ifp != NULL)
659                         in6_ifstat_inc(ifp, ifs6_out_discard);
660                 goto bad;
661         }
662         if (rt == NULL) {
663                 /*
664                  * If in6_selectroute() does not return a route entry,
665                  * dst may not have been updated.
666                  */
667                 *dst = dst_sa;  /* XXX */
668         }
669
670         /*
671          * then rt (for unicast) and ifp must be non-NULL valid values.
672          */
673         if ((flags & IPV6_FORWARDING) == 0) {
674                 /* XXX: the FORWARDING flag can be set for mrouting. */
675                 in6_ifstat_inc(ifp, ifs6_out_request);
676         }
677         if (rt != NULL) {
678                 ia = (struct in6_ifaddr *)(rt->rt_ifa);
679                 rt->rt_use++;
680         }
681
682
683         /*
684          * The outgoing interface must be in the zone of source and
685          * destination addresses.
686          */
687         origifp = ifp;
688
689         src0 = ip6->ip6_src;
690         if (in6_setscope(&src0, origifp, &zone))
691                 goto badscope;
692         bzero(&src_sa, sizeof(src_sa));
693         src_sa.sin6_family = AF_INET6;
694         src_sa.sin6_len = sizeof(src_sa);
695         src_sa.sin6_addr = ip6->ip6_src;
696         if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
697                 goto badscope;
698
699         dst0 = ip6->ip6_dst;
700         if (in6_setscope(&dst0, origifp, &zone))
701                 goto badscope;
702         /* re-initialize to be sure */
703         bzero(&dst_sa, sizeof(dst_sa));
704         dst_sa.sin6_family = AF_INET6;
705         dst_sa.sin6_len = sizeof(dst_sa);
706         dst_sa.sin6_addr = ip6->ip6_dst;
707         if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
708                 goto badscope;
709         }
710
711         /* We should use ia_ifp to support the case of
712          * sending packets to an address of our own.
713          */
714         if (ia != NULL && ia->ia_ifp)
715                 ifp = ia->ia_ifp;
716
717         /* scope check is done. */
718         goto routefound;
719
720   badscope:
721         V_ip6stat.ip6s_badscope++;
722         in6_ifstat_inc(origifp, ifs6_out_discard);
723         if (error == 0)
724                 error = EHOSTUNREACH; /* XXX */
725         goto bad;
726
727   routefound:
728         if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
729                 if (opt && opt->ip6po_nextroute.ro_rt) {
730                         /*
731                          * The nexthop is explicitly specified by the
732                          * application.  We assume the next hop is an IPv6
733                          * address.
734                          */
735                         dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
736                 }
737                 else if ((rt->rt_flags & RTF_GATEWAY))
738                         dst = (struct sockaddr_in6 *)rt->rt_gateway;
739         }
740
741         if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
742                 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
743         } else {
744                 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
745                 in6_ifstat_inc(ifp, ifs6_out_mcast);
746                 /*
747                  * Confirm that the outgoing interface supports multicast.
748                  */
749                 if (!(ifp->if_flags & IFF_MULTICAST)) {
750                         V_ip6stat.ip6s_noroute++;
751                         in6_ifstat_inc(ifp, ifs6_out_discard);
752                         error = ENETUNREACH;
753                         goto bad;
754                 }
755                 if ((im6o == NULL && in6_mcast_loop) ||
756                     (im6o && im6o->im6o_multicast_loop)) {
757                         /*
758                          * Loop back multicast datagram if not expressly
759                          * forbidden to do so, even if we have not joined
760                          * the address; protocols will filter it later,
761                          * thus deferring a hash lookup and lock acquisition
762                          * at the expense of an m_copym().
763                          */
764                         ip6_mloopback(ifp, m, dst);
765                 } else {
766                         /*
767                          * If we are acting as a multicast router, perform
768                          * multicast forwarding as if the packet had just
769                          * arrived on the interface to which we are about
770                          * to send.  The multicast forwarding function
771                          * recursively calls this function, using the
772                          * IPV6_FORWARDING flag to prevent infinite recursion.
773                          *
774                          * Multicasts that are looped back by ip6_mloopback(),
775                          * above, will be forwarded by the ip6_input() routine,
776                          * if necessary.
777                          */
778                         if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
779                                 /*
780                                  * XXX: ip6_mforward expects that rcvif is NULL
781                                  * when it is called from the originating path.
782                                  * However, it is not always the case, since
783                                  * some versions of MGETHDR() does not
784                                  * initialize the field.
785                                  */
786                                 m->m_pkthdr.rcvif = NULL;
787                                 if (ip6_mforward(ip6, ifp, m) != 0) {
788                                         m_freem(m);
789                                         goto done;
790                                 }
791                         }
792                 }
793                 /*
794                  * Multicasts with a hoplimit of zero may be looped back,
795                  * above, but must not be transmitted on a network.
796                  * Also, multicasts addressed to the loopback interface
797                  * are not sent -- the above call to ip6_mloopback() will
798                  * loop back a copy if this host actually belongs to the
799                  * destination group on the loopback interface.
800                  */
801                 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
802                     IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
803                         m_freem(m);
804                         goto done;
805                 }
806         }
807
808         /*
809          * Fill the outgoing inteface to tell the upper layer
810          * to increment per-interface statistics.
811          */
812         if (ifpp)
813                 *ifpp = ifp;
814
815         /* Determine path MTU. */
816         if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
817             &alwaysfrag, inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m))) != 0)
818                 goto bad;
819
820         /*
821          * The caller of this function may specify to use the minimum MTU
822          * in some cases.
823          * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
824          * setting.  The logic is a bit complicated; by default, unicast
825          * packets will follow path MTU while multicast packets will be sent at
826          * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
827          * including unicast ones will be sent at the minimum MTU.  Multicast
828          * packets will always be sent at the minimum MTU unless
829          * IP6PO_MINMTU_DISABLE is explicitly specified.
830          * See RFC 3542 for more details.
831          */
832         if (mtu > IPV6_MMTU) {
833                 if ((flags & IPV6_MINMTU))
834                         mtu = IPV6_MMTU;
835                 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
836                         mtu = IPV6_MMTU;
837                 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
838                          (opt == NULL ||
839                           opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
840                         mtu = IPV6_MMTU;
841                 }
842         }
843
844         /*
845          * clear embedded scope identifiers if necessary.
846          * in6_clearscope will touch the addresses only when necessary.
847          */
848         in6_clearscope(&ip6->ip6_src);
849         in6_clearscope(&ip6->ip6_dst);
850
851         /*
852          * If the outgoing packet contains a hop-by-hop options header,
853          * it must be examined and processed even by the source node.
854          * (RFC 2460, section 4.)
855          */
856         if (exthdrs.ip6e_hbh) {
857                 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
858                 u_int32_t dummy; /* XXX unused */
859                 u_int32_t plen = 0; /* XXX: ip6_process will check the value */
860
861 #ifdef DIAGNOSTIC
862                 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
863                         panic("ip6e_hbh is not contiguous");
864 #endif
865                 /*
866                  *  XXX: if we have to send an ICMPv6 error to the sender,
867                  *       we need the M_LOOP flag since icmp6_error() expects
868                  *       the IPv6 and the hop-by-hop options header are
869                  *       contiguous unless the flag is set.
870                  */
871                 m->m_flags |= M_LOOP;
872                 m->m_pkthdr.rcvif = ifp;
873                 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
874                     ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
875                     &dummy, &plen) < 0) {
876                         /* m was already freed at this point */
877                         error = EINVAL;/* better error? */
878                         goto done;
879                 }
880                 m->m_flags &= ~M_LOOP; /* XXX */
881                 m->m_pkthdr.rcvif = NULL;
882         }
883
884         /* Jump over all PFIL processing if hooks are not active. */
885         if (!PFIL_HOOKED(&V_inet6_pfil_hook))
886                 goto passout;
887
888         odst = ip6->ip6_dst;
889         /* Run through list of hooks for output packets. */
890         error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
891         if (error != 0 || m == NULL)
892                 goto done;
893         ip6 = mtod(m, struct ip6_hdr *);
894
895         /* See if destination IP address was changed by packet filter. */
896         if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
897                 m->m_flags |= M_SKIP_FIREWALL;
898                 /* If destination is now ourself drop to ip6_input(). */
899                 if (in6_localip(&ip6->ip6_dst)) {
900                         m->m_flags |= M_FASTFWD_OURS;
901                         if (m->m_pkthdr.rcvif == NULL)
902                                 m->m_pkthdr.rcvif = V_loif;
903                         if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
904                                 m->m_pkthdr.csum_flags |=
905                                     CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
906                                 m->m_pkthdr.csum_data = 0xffff;
907                         }
908 #ifdef SCTP
909                         if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
910                                 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
911 #endif
912                         error = netisr_queue(NETISR_IPV6, m);
913                         goto done;
914                 } else
915                         goto again;     /* Redo the routing table lookup. */
916         }
917
918 #ifdef IPFIREWALL_FORWARD
919         /* See if local, if yes, send it to netisr. */
920         if (m->m_flags & M_FASTFWD_OURS) {
921                 if (m->m_pkthdr.rcvif == NULL)
922                         m->m_pkthdr.rcvif = V_loif;
923                 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
924                         m->m_pkthdr.csum_flags |=
925                             CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
926                         m->m_pkthdr.csum_data = 0xffff;
927                 }
928 #ifdef SCTP
929                 if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
930                         m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
931 #endif
932                 error = netisr_queue(NETISR_IPV6, m);
933                 goto done;
934         }
935         /* Or forward to some other address? */
936         fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
937         if (fwd_tag) {
938                 dst = (struct sockaddr_in6 *)&ro->ro_dst;
939                 bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in6));
940                 m->m_flags |= M_SKIP_FIREWALL;
941                 m_tag_delete(m, fwd_tag);
942                 goto again;
943         }
944 #endif /* IPFIREWALL_FORWARD */
945
946 passout:
947         /*
948          * Send the packet to the outgoing interface.
949          * If necessary, do IPv6 fragmentation before sending.
950          *
951          * the logic here is rather complex:
952          * 1: normal case (dontfrag == 0, alwaysfrag == 0)
953          * 1-a: send as is if tlen <= path mtu
954          * 1-b: fragment if tlen > path mtu
955          *
956          * 2: if user asks us not to fragment (dontfrag == 1)
957          * 2-a: send as is if tlen <= interface mtu
958          * 2-b: error if tlen > interface mtu
959          *
960          * 3: if we always need to attach fragment header (alwaysfrag == 1)
961          *      always fragment
962          *
963          * 4: if dontfrag == 1 && alwaysfrag == 1
964          *      error, as we cannot handle this conflicting request
965          */
966         sw_csum = m->m_pkthdr.csum_flags;
967         if (!hdrsplit) {
968                 tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0;
969                 sw_csum &= ~ifp->if_hwassist;
970         } else
971                 tso = 0;
972         /*
973          * If we added extension headers, we will not do TSO and calculate the
974          * checksums ourselves for now.
975          * XXX-BZ  Need a framework to know when the NIC can handle it, even
976          * with ext. hdrs.
977          */
978         if (sw_csum & CSUM_DELAY_DATA_IPV6) {
979                 sw_csum &= ~CSUM_DELAY_DATA_IPV6;
980                 in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
981         }
982 #ifdef SCTP
983         if (sw_csum & CSUM_SCTP_IPV6) {
984                 sw_csum &= ~CSUM_SCTP_IPV6;
985                 sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
986         }
987 #endif
988         m->m_pkthdr.csum_flags &= ifp->if_hwassist;
989         tlen = m->m_pkthdr.len;
990
991         if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
992                 dontfrag = 1;
993         else
994                 dontfrag = 0;
995         if (dontfrag && alwaysfrag) {   /* case 4 */
996                 /* conflicting request - can't transmit */
997                 error = EMSGSIZE;
998                 goto bad;
999         }
1000         if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) {      /* case 2-b */
1001                 /*
1002                  * Even if the DONTFRAG option is specified, we cannot send the
1003                  * packet when the data length is larger than the MTU of the
1004                  * outgoing interface.
1005                  * Notify the error by sending IPV6_PATHMTU ancillary data as
1006                  * well as returning an error code (the latter is not described
1007                  * in the API spec.)
1008                  */
1009                 u_int32_t mtu32;
1010                 struct ip6ctlparam ip6cp;
1011
1012                 mtu32 = (u_int32_t)mtu;
1013                 bzero(&ip6cp, sizeof(ip6cp));
1014                 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1015                 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1016                     (void *)&ip6cp);
1017
1018                 error = EMSGSIZE;
1019                 goto bad;
1020         }
1021
1022         /*
1023          * transmit packet without fragmentation
1024          */
1025         if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
1026                 struct in6_ifaddr *ia6;
1027
1028                 ip6 = mtod(m, struct ip6_hdr *);
1029                 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1030                 if (ia6) {
1031                         /* Record statistics for this interface address. */
1032                         ia6->ia_ifa.if_opackets++;
1033                         ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
1034                         ifa_free(&ia6->ia_ifa);
1035                 }
1036                 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1037                 goto done;
1038         }
1039
1040         /*
1041          * try to fragment the packet.  case 1-b and 3
1042          */
1043         if (mtu < IPV6_MMTU) {
1044                 /* path MTU cannot be less than IPV6_MMTU */
1045                 error = EMSGSIZE;
1046                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1047                 goto bad;
1048         } else if (ip6->ip6_plen == 0) {
1049                 /* jumbo payload cannot be fragmented */
1050                 error = EMSGSIZE;
1051                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1052                 goto bad;
1053         } else {
1054                 struct mbuf **mnext, *m_frgpart;
1055                 struct ip6_frag *ip6f;
1056                 u_int32_t id = htonl(ip6_randomid());
1057                 u_char nextproto;
1058
1059                 int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
1060
1061                 /*
1062                  * Too large for the destination or interface;
1063                  * fragment if possible.
1064                  * Must be able to put at least 8 bytes per fragment.
1065                  */
1066                 hlen = unfragpartlen;
1067                 if (mtu > IPV6_MAXPACKET)
1068                         mtu = IPV6_MAXPACKET;
1069
1070                 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1071                 if (len < 8) {
1072                         error = EMSGSIZE;
1073                         in6_ifstat_inc(ifp, ifs6_out_fragfail);
1074                         goto bad;
1075                 }
1076
1077                 /*
1078                  * Verify that we have any chance at all of being able to queue
1079                  *      the packet or packet fragments
1080                  */
1081                 if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
1082                     < tlen  /* - hlen */)) {
1083                         error = ENOBUFS;
1084                         V_ip6stat.ip6s_odropped++;
1085                         goto bad;
1086                 }
1087
1088
1089                 /*
1090                  * If the interface will not calculate checksums on
1091                  * fragmented packets, then do it here.
1092                  * XXX-BZ handle the hw offloading case.  Need flags.
1093                  */
1094                 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
1095                         in6_delayed_cksum(m, plen, hlen);
1096                         m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
1097                 }
1098 #ifdef SCTP
1099                 if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
1100                         sctp_delayed_cksum(m, hlen);
1101                         m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
1102                 }
1103 #endif
1104                 mnext = &m->m_nextpkt;
1105
1106                 /*
1107                  * Change the next header field of the last header in the
1108                  * unfragmentable part.
1109                  */
1110                 if (exthdrs.ip6e_rthdr) {
1111                         nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1112                         *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1113                 } else if (exthdrs.ip6e_dest1) {
1114                         nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1115                         *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1116                 } else if (exthdrs.ip6e_hbh) {
1117                         nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1118                         *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1119                 } else {
1120                         nextproto = ip6->ip6_nxt;
1121                         ip6->ip6_nxt = IPPROTO_FRAGMENT;
1122                 }
1123
1124                 /*
1125                  * Loop through length of segment after first fragment,
1126                  * make new header and copy data of each part and link onto
1127                  * chain.
1128                  */
1129                 m0 = m;
1130                 for (off = hlen; off < tlen; off += len) {
1131                         MGETHDR(m, M_DONTWAIT, MT_HEADER);
1132                         if (!m) {
1133                                 error = ENOBUFS;
1134                                 V_ip6stat.ip6s_odropped++;
1135                                 goto sendorfree;
1136                         }
1137                         m->m_pkthdr.rcvif = NULL;
1138                         m->m_flags = m0->m_flags & M_COPYFLAGS; /* incl. FIB */
1139                         *mnext = m;
1140                         mnext = &m->m_nextpkt;
1141                         m->m_data += max_linkhdr;
1142                         mhip6 = mtod(m, struct ip6_hdr *);
1143                         *mhip6 = *ip6;
1144                         m->m_len = sizeof(*mhip6);
1145                         error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1146                         if (error) {
1147                                 V_ip6stat.ip6s_odropped++;
1148                                 goto sendorfree;
1149                         }
1150                         ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1151                         if (off + len >= tlen)
1152                                 len = tlen - off;
1153                         else
1154                                 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1155                         mhip6->ip6_plen = htons((u_short)(len + hlen +
1156                             sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1157                         if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1158                                 error = ENOBUFS;
1159                                 V_ip6stat.ip6s_odropped++;
1160                                 goto sendorfree;
1161                         }
1162                         m_cat(m, m_frgpart);
1163                         m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1164                         m->m_pkthdr.rcvif = NULL;
1165                         ip6f->ip6f_reserved = 0;
1166                         ip6f->ip6f_ident = id;
1167                         ip6f->ip6f_nxt = nextproto;
1168                         V_ip6stat.ip6s_ofragments++;
1169                         in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1170                 }
1171
1172                 in6_ifstat_inc(ifp, ifs6_out_fragok);
1173         }
1174
1175         /*
1176          * Remove leading garbages.
1177          */
1178 sendorfree:
1179         m = m0->m_nextpkt;
1180         m0->m_nextpkt = 0;
1181         m_freem(m0);
1182         for (m0 = m; m; m = m0) {
1183                 m0 = m->m_nextpkt;
1184                 m->m_nextpkt = 0;
1185                 if (error == 0) {
1186                         /* Record statistics for this interface address. */
1187                         if (ia) {
1188                                 ia->ia_ifa.if_opackets++;
1189                                 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1190                         }
1191                         error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1192                 } else
1193                         m_freem(m);
1194         }
1195
1196         if (error == 0)
1197                 V_ip6stat.ip6s_fragmented++;
1198
1199 done:
1200         if (ro == &ip6route)
1201                 RO_RTFREE(ro);
1202         if (ro_pmtu == &ip6route)
1203                 RO_RTFREE(ro_pmtu);
1204 #ifdef IPSEC
1205         if (sp != NULL)
1206                 KEY_FREESP(&sp);
1207 #endif
1208
1209         return (error);
1210
1211 freehdrs:
1212         m_freem(exthdrs.ip6e_hbh);      /* m_freem will check if mbuf is 0 */
1213         m_freem(exthdrs.ip6e_dest1);
1214         m_freem(exthdrs.ip6e_rthdr);
1215         m_freem(exthdrs.ip6e_dest2);
1216         /* FALLTHROUGH */
1217 bad:
1218         if (m)
1219                 m_freem(m);
1220         goto done;
1221 }
1222
1223 static int
1224 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1225 {
1226         struct mbuf *m;
1227
1228         if (hlen > MCLBYTES)
1229                 return (ENOBUFS); /* XXX */
1230
1231         MGET(m, M_DONTWAIT, MT_DATA);
1232         if (!m)
1233                 return (ENOBUFS);
1234
1235         if (hlen > MLEN) {
1236                 MCLGET(m, M_DONTWAIT);
1237                 if ((m->m_flags & M_EXT) == 0) {
1238                         m_free(m);
1239                         return (ENOBUFS);
1240                 }
1241         }
1242         m->m_len = hlen;
1243         if (hdr)
1244                 bcopy(hdr, mtod(m, caddr_t), hlen);
1245
1246         *mp = m;
1247         return (0);
1248 }
1249
1250 /*
1251  * Insert jumbo payload option.
1252  */
1253 static int
1254 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1255 {
1256         struct mbuf *mopt;
1257         u_char *optbuf;
1258         u_int32_t v;
1259
1260 #define JUMBOOPTLEN     8       /* length of jumbo payload option and padding */
1261
1262         /*
1263          * If there is no hop-by-hop options header, allocate new one.
1264          * If there is one but it doesn't have enough space to store the
1265          * jumbo payload option, allocate a cluster to store the whole options.
1266          * Otherwise, use it to store the options.
1267          */
1268         if (exthdrs->ip6e_hbh == 0) {
1269                 MGET(mopt, M_DONTWAIT, MT_DATA);
1270                 if (mopt == 0)
1271                         return (ENOBUFS);
1272                 mopt->m_len = JUMBOOPTLEN;
1273                 optbuf = mtod(mopt, u_char *);
1274                 optbuf[1] = 0;  /* = ((JUMBOOPTLEN) >> 3) - 1 */
1275                 exthdrs->ip6e_hbh = mopt;
1276         } else {
1277                 struct ip6_hbh *hbh;
1278
1279                 mopt = exthdrs->ip6e_hbh;
1280                 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1281                         /*
1282                          * XXX assumption:
1283                          * - exthdrs->ip6e_hbh is not referenced from places
1284                          *   other than exthdrs.
1285                          * - exthdrs->ip6e_hbh is not an mbuf chain.
1286                          */
1287                         int oldoptlen = mopt->m_len;
1288                         struct mbuf *n;
1289
1290                         /*
1291                          * XXX: give up if the whole (new) hbh header does
1292                          * not fit even in an mbuf cluster.
1293                          */
1294                         if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1295                                 return (ENOBUFS);
1296
1297                         /*
1298                          * As a consequence, we must always prepare a cluster
1299                          * at this point.
1300                          */
1301                         MGET(n, M_DONTWAIT, MT_DATA);
1302                         if (n) {
1303                                 MCLGET(n, M_DONTWAIT);
1304                                 if ((n->m_flags & M_EXT) == 0) {
1305                                         m_freem(n);
1306                                         n = NULL;
1307                                 }
1308                         }
1309                         if (!n)
1310                                 return (ENOBUFS);
1311                         n->m_len = oldoptlen + JUMBOOPTLEN;
1312                         bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1313                             oldoptlen);
1314                         optbuf = mtod(n, caddr_t) + oldoptlen;
1315                         m_freem(mopt);
1316                         mopt = exthdrs->ip6e_hbh = n;
1317                 } else {
1318                         optbuf = mtod(mopt, u_char *) + mopt->m_len;
1319                         mopt->m_len += JUMBOOPTLEN;
1320                 }
1321                 optbuf[0] = IP6OPT_PADN;
1322                 optbuf[1] = 1;
1323
1324                 /*
1325                  * Adjust the header length according to the pad and
1326                  * the jumbo payload option.
1327                  */
1328                 hbh = mtod(mopt, struct ip6_hbh *);
1329                 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1330         }
1331
1332         /* fill in the option. */
1333         optbuf[2] = IP6OPT_JUMBO;
1334         optbuf[3] = 4;
1335         v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1336         bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1337
1338         /* finally, adjust the packet header length */
1339         exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1340
1341         return (0);
1342 #undef JUMBOOPTLEN
1343 }
1344
1345 /*
1346  * Insert fragment header and copy unfragmentable header portions.
1347  */
1348 static int
1349 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1350     struct ip6_frag **frghdrp)
1351 {
1352         struct mbuf *n, *mlast;
1353
1354         if (hlen > sizeof(struct ip6_hdr)) {
1355                 n = m_copym(m0, sizeof(struct ip6_hdr),
1356                     hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1357                 if (n == 0)
1358                         return (ENOBUFS);
1359                 m->m_next = n;
1360         } else
1361                 n = m;
1362
1363         /* Search for the last mbuf of unfragmentable part. */
1364         for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1365                 ;
1366
1367         if ((mlast->m_flags & M_EXT) == 0 &&
1368             M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1369                 /* use the trailing space of the last mbuf for the fragment hdr */
1370                 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1371                     mlast->m_len);
1372                 mlast->m_len += sizeof(struct ip6_frag);
1373                 m->m_pkthdr.len += sizeof(struct ip6_frag);
1374         } else {
1375                 /* allocate a new mbuf for the fragment header */
1376                 struct mbuf *mfrg;
1377
1378                 MGET(mfrg, M_DONTWAIT, MT_DATA);
1379                 if (mfrg == 0)
1380                         return (ENOBUFS);
1381                 mfrg->m_len = sizeof(struct ip6_frag);
1382                 *frghdrp = mtod(mfrg, struct ip6_frag *);
1383                 mlast->m_next = mfrg;
1384         }
1385
1386         return (0);
1387 }
1388
1389 static int
1390 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1391     struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
1392     int *alwaysfragp, u_int fibnum)
1393 {
1394         u_int32_t mtu = 0;
1395         int alwaysfrag = 0;
1396         int error = 0;
1397
1398         if (ro_pmtu != ro) {
1399                 /* The first hop and the final destination may differ. */
1400                 struct sockaddr_in6 *sa6_dst =
1401                     (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1402                 if (ro_pmtu->ro_rt &&
1403                     ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1404                      !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1405                         RTFREE(ro_pmtu->ro_rt);
1406                         ro_pmtu->ro_rt = (struct rtentry *)NULL;
1407                 }
1408                 if (ro_pmtu->ro_rt == NULL) {
1409                         bzero(sa6_dst, sizeof(*sa6_dst));
1410                         sa6_dst->sin6_family = AF_INET6;
1411                         sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1412                         sa6_dst->sin6_addr = *dst;
1413
1414                         in6_rtalloc(ro_pmtu, fibnum);
1415                 }
1416         }
1417         if (ro_pmtu->ro_rt) {
1418                 u_int32_t ifmtu;
1419                 struct in_conninfo inc;
1420
1421                 bzero(&inc, sizeof(inc));
1422                 inc.inc_flags |= INC_ISIPV6;
1423                 inc.inc6_faddr = *dst;
1424
1425                 if (ifp == NULL)
1426                         ifp = ro_pmtu->ro_rt->rt_ifp;
1427                 ifmtu = IN6_LINKMTU(ifp);
1428                 mtu = tcp_hc_getmtu(&inc);
1429                 if (mtu)
1430                         mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
1431                 else
1432                         mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1433                 if (mtu == 0)
1434                         mtu = ifmtu;
1435                 else if (mtu < IPV6_MMTU) {
1436                         /*
1437                          * RFC2460 section 5, last paragraph:
1438                          * if we record ICMPv6 too big message with
1439                          * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1440                          * or smaller, with framgent header attached.
1441                          * (fragment header is needed regardless from the
1442                          * packet size, for translators to identify packets)
1443                          */
1444                         alwaysfrag = 1;
1445                         mtu = IPV6_MMTU;
1446                 } else if (mtu > ifmtu) {
1447                         /*
1448                          * The MTU on the route is larger than the MTU on
1449                          * the interface!  This shouldn't happen, unless the
1450                          * MTU of the interface has been changed after the
1451                          * interface was brought up.  Change the MTU in the
1452                          * route to match the interface MTU (as long as the
1453                          * field isn't locked).
1454                          */
1455                         mtu = ifmtu;
1456                         ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1457                 }
1458         } else if (ifp) {
1459                 mtu = IN6_LINKMTU(ifp);
1460         } else
1461                 error = EHOSTUNREACH; /* XXX */
1462
1463         *mtup = mtu;
1464         if (alwaysfragp)
1465                 *alwaysfragp = alwaysfrag;
1466         return (error);
1467 }
1468
1469 /*
1470  * IP6 socket option processing.
1471  */
1472 int
1473 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1474 {
1475         int optdatalen, uproto;
1476         void *optdata;
1477         struct inpcb *in6p = sotoinpcb(so);
1478         int error, optval;
1479         int level, op, optname;
1480         int optlen;
1481         struct thread *td;
1482
1483         level = sopt->sopt_level;
1484         op = sopt->sopt_dir;
1485         optname = sopt->sopt_name;
1486         optlen = sopt->sopt_valsize;
1487         td = sopt->sopt_td;
1488         error = 0;
1489         optval = 0;
1490         uproto = (int)so->so_proto->pr_protocol;
1491
1492         if (level != IPPROTO_IPV6) {
1493                 error = EINVAL;
1494
1495                 if (sopt->sopt_level == SOL_SOCKET &&
1496                     sopt->sopt_dir == SOPT_SET) {
1497                         switch (sopt->sopt_name) {
1498                         case SO_REUSEADDR:
1499                                 INP_WLOCK(in6p);
1500                                 if (IN_MULTICAST(ntohl(in6p->inp_laddr.s_addr))) {
1501                                         if ((so->so_options &
1502                                             (SO_REUSEADDR | SO_REUSEPORT)) != 0)
1503                                                 in6p->inp_flags2 |= INP_REUSEPORT;
1504                                         else
1505                                                 in6p->inp_flags2 &= ~INP_REUSEPORT;
1506                                 }
1507                                 INP_WUNLOCK(in6p);
1508                                 error = 0;
1509                                 break;
1510                         case SO_REUSEPORT:
1511                                 INP_WLOCK(in6p);
1512                                 if ((so->so_options & SO_REUSEPORT) != 0)
1513                                         in6p->inp_flags2 |= INP_REUSEPORT;
1514                                 else
1515                                         in6p->inp_flags2 &= ~INP_REUSEPORT;
1516                                 INP_WUNLOCK(in6p);
1517                                 error = 0;
1518                                 break;
1519                         case SO_SETFIB:
1520                                 INP_WLOCK(in6p);
1521                                 in6p->inp_inc.inc_fibnum = so->so_fibnum;
1522                                 INP_WUNLOCK(in6p);
1523                                 error = 0;
1524                                 break;
1525                         default:
1526                                 break;
1527                         }
1528                 }
1529         } else {                /* level == IPPROTO_IPV6 */
1530                 switch (op) {
1531
1532                 case SOPT_SET:
1533                         switch (optname) {
1534                         case IPV6_2292PKTOPTIONS:
1535 #ifdef IPV6_PKTOPTIONS
1536                         case IPV6_PKTOPTIONS:
1537 #endif
1538                         {
1539                                 struct mbuf *m;
1540
1541                                 error = soopt_getm(sopt, &m); /* XXX */
1542                                 if (error != 0)
1543                                         break;
1544                                 error = soopt_mcopyin(sopt, m); /* XXX */
1545                                 if (error != 0)
1546                                         break;
1547                                 error = ip6_pcbopts(&in6p->in6p_outputopts,
1548                                                     m, so, sopt);
1549                                 m_freem(m); /* XXX */
1550                                 break;
1551                         }
1552
1553                         /*
1554                          * Use of some Hop-by-Hop options or some
1555                          * Destination options, might require special
1556                          * privilege.  That is, normal applications
1557                          * (without special privilege) might be forbidden
1558                          * from setting certain options in outgoing packets,
1559                          * and might never see certain options in received
1560                          * packets. [RFC 2292 Section 6]
1561                          * KAME specific note:
1562                          *  KAME prevents non-privileged users from sending or
1563                          *  receiving ANY hbh/dst options in order to avoid
1564                          *  overhead of parsing options in the kernel.
1565                          */
1566                         case IPV6_RECVHOPOPTS:
1567                         case IPV6_RECVDSTOPTS:
1568                         case IPV6_RECVRTHDRDSTOPTS:
1569                                 if (td != NULL) {
1570                                         error = priv_check(td,
1571                                             PRIV_NETINET_SETHDROPTS);
1572                                         if (error)
1573                                                 break;
1574                                 }
1575                                 /* FALLTHROUGH */
1576                         case IPV6_UNICAST_HOPS:
1577                         case IPV6_HOPLIMIT:
1578                         case IPV6_FAITH:
1579
1580                         case IPV6_RECVPKTINFO:
1581                         case IPV6_RECVHOPLIMIT:
1582                         case IPV6_RECVRTHDR:
1583                         case IPV6_RECVPATHMTU:
1584                         case IPV6_RECVTCLASS:
1585                         case IPV6_V6ONLY:
1586                         case IPV6_AUTOFLOWLABEL:
1587                         case IPV6_BINDANY:
1588                                 if (optname == IPV6_BINDANY && td != NULL) {
1589                                         error = priv_check(td,
1590                                             PRIV_NETINET_BINDANY);
1591                                         if (error)
1592                                                 break;
1593                                 }
1594
1595                                 if (optlen != sizeof(int)) {
1596                                         error = EINVAL;
1597                                         break;
1598                                 }
1599                                 error = sooptcopyin(sopt, &optval,
1600                                         sizeof optval, sizeof optval);
1601                                 if (error)
1602                                         break;
1603                                 switch (optname) {
1604
1605                                 case IPV6_UNICAST_HOPS:
1606                                         if (optval < -1 || optval >= 256)
1607                                                 error = EINVAL;
1608                                         else {
1609                                                 /* -1 = kernel default */
1610                                                 in6p->in6p_hops = optval;
1611                                                 if ((in6p->inp_vflag &
1612                                                      INP_IPV4) != 0)
1613                                                         in6p->inp_ip_ttl = optval;
1614                                         }
1615                                         break;
1616 #define OPTSET(bit) \
1617 do { \
1618         INP_WLOCK(in6p); \
1619         if (optval) \
1620                 in6p->inp_flags |= (bit); \
1621         else \
1622                 in6p->inp_flags &= ~(bit); \
1623         INP_WUNLOCK(in6p); \
1624 } while (/*CONSTCOND*/ 0)
1625 #define OPTSET2292(bit) \
1626 do { \
1627         INP_WLOCK(in6p); \
1628         in6p->inp_flags |= IN6P_RFC2292; \
1629         if (optval) \
1630                 in6p->inp_flags |= (bit); \
1631         else \
1632                 in6p->inp_flags &= ~(bit); \
1633         INP_WUNLOCK(in6p); \
1634 } while (/*CONSTCOND*/ 0)
1635 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
1636
1637                                 case IPV6_RECVPKTINFO:
1638                                         /* cannot mix with RFC2292 */
1639                                         if (OPTBIT(IN6P_RFC2292)) {
1640                                                 error = EINVAL;
1641                                                 break;
1642                                         }
1643                                         OPTSET(IN6P_PKTINFO);
1644                                         break;
1645
1646                                 case IPV6_HOPLIMIT:
1647                                 {
1648                                         struct ip6_pktopts **optp;
1649
1650                                         /* cannot mix with RFC2292 */
1651                                         if (OPTBIT(IN6P_RFC2292)) {
1652                                                 error = EINVAL;
1653                                                 break;
1654                                         }
1655                                         optp = &in6p->in6p_outputopts;
1656                                         error = ip6_pcbopt(IPV6_HOPLIMIT,
1657                                             (u_char *)&optval, sizeof(optval),
1658                                             optp, (td != NULL) ? td->td_ucred :
1659                                             NULL, uproto);
1660                                         break;
1661                                 }
1662
1663                                 case IPV6_RECVHOPLIMIT:
1664                                         /* cannot mix with RFC2292 */
1665                                         if (OPTBIT(IN6P_RFC2292)) {
1666                                                 error = EINVAL;
1667                                                 break;
1668                                         }
1669                                         OPTSET(IN6P_HOPLIMIT);
1670                                         break;
1671
1672                                 case IPV6_RECVHOPOPTS:
1673                                         /* cannot mix with RFC2292 */
1674                                         if (OPTBIT(IN6P_RFC2292)) {
1675                                                 error = EINVAL;
1676                                                 break;
1677                                         }
1678                                         OPTSET(IN6P_HOPOPTS);
1679                                         break;
1680
1681                                 case IPV6_RECVDSTOPTS:
1682                                         /* cannot mix with RFC2292 */
1683                                         if (OPTBIT(IN6P_RFC2292)) {
1684                                                 error = EINVAL;
1685                                                 break;
1686                                         }
1687                                         OPTSET(IN6P_DSTOPTS);
1688                                         break;
1689
1690                                 case IPV6_RECVRTHDRDSTOPTS:
1691                                         /* cannot mix with RFC2292 */
1692                                         if (OPTBIT(IN6P_RFC2292)) {
1693                                                 error = EINVAL;
1694                                                 break;
1695                                         }
1696                                         OPTSET(IN6P_RTHDRDSTOPTS);
1697                                         break;
1698
1699                                 case IPV6_RECVRTHDR:
1700                                         /* cannot mix with RFC2292 */
1701                                         if (OPTBIT(IN6P_RFC2292)) {
1702                                                 error = EINVAL;
1703                                                 break;
1704                                         }
1705                                         OPTSET(IN6P_RTHDR);
1706                                         break;
1707
1708                                 case IPV6_FAITH:
1709                                         OPTSET(INP_FAITH);
1710                                         break;
1711
1712                                 case IPV6_RECVPATHMTU:
1713                                         /*
1714                                          * We ignore this option for TCP
1715                                          * sockets.
1716                                          * (RFC3542 leaves this case
1717                                          * unspecified.)
1718                                          */
1719                                         if (uproto != IPPROTO_TCP)
1720                                                 OPTSET(IN6P_MTU);
1721                                         break;
1722
1723                                 case IPV6_V6ONLY:
1724                                         /*
1725                                          * make setsockopt(IPV6_V6ONLY)
1726                                          * available only prior to bind(2).
1727                                          * see ipng mailing list, Jun 22 2001.
1728                                          */
1729                                         if (in6p->inp_lport ||
1730                                             !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1731                                                 error = EINVAL;
1732                                                 break;
1733                                         }
1734                                         OPTSET(IN6P_IPV6_V6ONLY);
1735                                         if (optval)
1736                                                 in6p->inp_vflag &= ~INP_IPV4;
1737                                         else
1738                                                 in6p->inp_vflag |= INP_IPV4;
1739                                         break;
1740                                 case IPV6_RECVTCLASS:
1741                                         /* cannot mix with RFC2292 XXX */
1742                                         if (OPTBIT(IN6P_RFC2292)) {
1743                                                 error = EINVAL;
1744                                                 break;
1745                                         }
1746                                         OPTSET(IN6P_TCLASS);
1747                                         break;
1748                                 case IPV6_AUTOFLOWLABEL:
1749                                         OPTSET(IN6P_AUTOFLOWLABEL);
1750                                         break;
1751
1752                                 case IPV6_BINDANY:
1753                                         OPTSET(INP_BINDANY);
1754                                         break;
1755                                 }
1756                                 break;
1757
1758                         case IPV6_TCLASS:
1759                         case IPV6_DONTFRAG:
1760                         case IPV6_USE_MIN_MTU:
1761                         case IPV6_PREFER_TEMPADDR:
1762                                 if (optlen != sizeof(optval)) {
1763                                         error = EINVAL;
1764                                         break;
1765                                 }
1766                                 error = sooptcopyin(sopt, &optval,
1767                                         sizeof optval, sizeof optval);
1768                                 if (error)
1769                                         break;
1770                                 {
1771                                         struct ip6_pktopts **optp;
1772                                         optp = &in6p->in6p_outputopts;
1773                                         error = ip6_pcbopt(optname,
1774                                             (u_char *)&optval, sizeof(optval),
1775                                             optp, (td != NULL) ? td->td_ucred :
1776                                             NULL, uproto);
1777                                         break;
1778                                 }
1779
1780                         case IPV6_2292PKTINFO:
1781                         case IPV6_2292HOPLIMIT:
1782                         case IPV6_2292HOPOPTS:
1783                         case IPV6_2292DSTOPTS:
1784                         case IPV6_2292RTHDR:
1785                                 /* RFC 2292 */
1786                                 if (optlen != sizeof(int)) {
1787                                         error = EINVAL;
1788                                         break;
1789                                 }
1790                                 error = sooptcopyin(sopt, &optval,
1791                                         sizeof optval, sizeof optval);
1792                                 if (error)
1793                                         break;
1794                                 switch (optname) {
1795                                 case IPV6_2292PKTINFO:
1796                                         OPTSET2292(IN6P_PKTINFO);
1797                                         break;
1798                                 case IPV6_2292HOPLIMIT:
1799                                         OPTSET2292(IN6P_HOPLIMIT);
1800                                         break;
1801                                 case IPV6_2292HOPOPTS:
1802                                         /*
1803                                          * Check super-user privilege.
1804                                          * See comments for IPV6_RECVHOPOPTS.
1805                                          */
1806                                         if (td != NULL) {
1807                                                 error = priv_check(td,
1808                                                     PRIV_NETINET_SETHDROPTS);
1809                                                 if (error)
1810                                                         return (error);
1811                                         }
1812                                         OPTSET2292(IN6P_HOPOPTS);
1813                                         break;
1814                                 case IPV6_2292DSTOPTS:
1815                                         if (td != NULL) {
1816                                                 error = priv_check(td,
1817                                                     PRIV_NETINET_SETHDROPTS);
1818                                                 if (error)
1819                                                         return (error);
1820                                         }
1821                                         OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1822                                         break;
1823                                 case IPV6_2292RTHDR:
1824                                         OPTSET2292(IN6P_RTHDR);
1825                                         break;
1826                                 }
1827                                 break;
1828                         case IPV6_PKTINFO:
1829                         case IPV6_HOPOPTS:
1830                         case IPV6_RTHDR:
1831                         case IPV6_DSTOPTS:
1832                         case IPV6_RTHDRDSTOPTS:
1833                         case IPV6_NEXTHOP:
1834                         {
1835                                 /* new advanced API (RFC3542) */
1836                                 u_char *optbuf;
1837                                 u_char optbuf_storage[MCLBYTES];
1838                                 int optlen;
1839                                 struct ip6_pktopts **optp;
1840
1841                                 /* cannot mix with RFC2292 */
1842                                 if (OPTBIT(IN6P_RFC2292)) {
1843                                         error = EINVAL;
1844                                         break;
1845                                 }
1846
1847                                 /*
1848                                  * We only ensure valsize is not too large
1849                                  * here.  Further validation will be done
1850                                  * later.
1851                                  */
1852                                 error = sooptcopyin(sopt, optbuf_storage,
1853                                     sizeof(optbuf_storage), 0);
1854                                 if (error)
1855                                         break;
1856                                 optlen = sopt->sopt_valsize;
1857                                 optbuf = optbuf_storage;
1858                                 optp = &in6p->in6p_outputopts;
1859                                 error = ip6_pcbopt(optname, optbuf, optlen,
1860                                     optp, (td != NULL) ? td->td_ucred : NULL,
1861                                     uproto);
1862                                 break;
1863                         }
1864 #undef OPTSET
1865
1866                         case IPV6_MULTICAST_IF:
1867                         case IPV6_MULTICAST_HOPS:
1868                         case IPV6_MULTICAST_LOOP:
1869                         case IPV6_JOIN_GROUP:
1870                         case IPV6_LEAVE_GROUP:
1871                         case IPV6_MSFILTER:
1872                         case MCAST_BLOCK_SOURCE:
1873                         case MCAST_UNBLOCK_SOURCE:
1874                         case MCAST_JOIN_GROUP:
1875                         case MCAST_LEAVE_GROUP:
1876                         case MCAST_JOIN_SOURCE_GROUP:
1877                         case MCAST_LEAVE_SOURCE_GROUP:
1878                                 error = ip6_setmoptions(in6p, sopt);
1879                                 break;
1880
1881                         case IPV6_PORTRANGE:
1882                                 error = sooptcopyin(sopt, &optval,
1883                                     sizeof optval, sizeof optval);
1884                                 if (error)
1885                                         break;
1886
1887                                 INP_WLOCK(in6p);
1888                                 switch (optval) {
1889                                 case IPV6_PORTRANGE_DEFAULT:
1890                                         in6p->inp_flags &= ~(INP_LOWPORT);
1891                                         in6p->inp_flags &= ~(INP_HIGHPORT);
1892                                         break;
1893
1894                                 case IPV6_PORTRANGE_HIGH:
1895                                         in6p->inp_flags &= ~(INP_LOWPORT);
1896                                         in6p->inp_flags |= INP_HIGHPORT;
1897                                         break;
1898
1899                                 case IPV6_PORTRANGE_LOW:
1900                                         in6p->inp_flags &= ~(INP_HIGHPORT);
1901                                         in6p->inp_flags |= INP_LOWPORT;
1902                                         break;
1903
1904                                 default:
1905                                         error = EINVAL;
1906                                         break;
1907                                 }
1908                                 INP_WUNLOCK(in6p);
1909                                 break;
1910
1911 #ifdef IPSEC
1912                         case IPV6_IPSEC_POLICY:
1913                         {
1914                                 caddr_t req;
1915                                 struct mbuf *m;
1916
1917                                 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1918                                         break;
1919                                 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1920                                         break;
1921                                 req = mtod(m, caddr_t);
1922                                 error = ipsec_set_policy(in6p, optname, req,
1923                                     m->m_len, (sopt->sopt_td != NULL) ?
1924                                     sopt->sopt_td->td_ucred : NULL);
1925                                 m_freem(m);
1926                                 break;
1927                         }
1928 #endif /* IPSEC */
1929
1930                         default:
1931                                 error = ENOPROTOOPT;
1932                                 break;
1933                         }
1934                         break;
1935
1936                 case SOPT_GET:
1937                         switch (optname) {
1938
1939                         case IPV6_2292PKTOPTIONS:
1940 #ifdef IPV6_PKTOPTIONS
1941                         case IPV6_PKTOPTIONS:
1942 #endif
1943                                 /*
1944                                  * RFC3542 (effectively) deprecated the
1945                                  * semantics of the 2292-style pktoptions.
1946                                  * Since it was not reliable in nature (i.e.,
1947                                  * applications had to expect the lack of some
1948                                  * information after all), it would make sense
1949                                  * to simplify this part by always returning
1950                                  * empty data.
1951                                  */
1952                                 sopt->sopt_valsize = 0;
1953                                 break;
1954
1955                         case IPV6_RECVHOPOPTS:
1956                         case IPV6_RECVDSTOPTS:
1957                         case IPV6_RECVRTHDRDSTOPTS:
1958                         case IPV6_UNICAST_HOPS:
1959                         case IPV6_RECVPKTINFO:
1960                         case IPV6_RECVHOPLIMIT:
1961                         case IPV6_RECVRTHDR:
1962                         case IPV6_RECVPATHMTU:
1963
1964                         case IPV6_FAITH:
1965                         case IPV6_V6ONLY:
1966                         case IPV6_PORTRANGE:
1967                         case IPV6_RECVTCLASS:
1968                         case IPV6_AUTOFLOWLABEL:
1969                         case IPV6_BINDANY:
1970                                 switch (optname) {
1971
1972                                 case IPV6_RECVHOPOPTS:
1973                                         optval = OPTBIT(IN6P_HOPOPTS);
1974                                         break;
1975
1976                                 case IPV6_RECVDSTOPTS:
1977                                         optval = OPTBIT(IN6P_DSTOPTS);
1978                                         break;
1979
1980                                 case IPV6_RECVRTHDRDSTOPTS:
1981                                         optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1982                                         break;
1983
1984                                 case IPV6_UNICAST_HOPS:
1985                                         optval = in6p->in6p_hops;
1986                                         break;
1987
1988                                 case IPV6_RECVPKTINFO:
1989                                         optval = OPTBIT(IN6P_PKTINFO);
1990                                         break;
1991
1992                                 case IPV6_RECVHOPLIMIT:
1993                                         optval = OPTBIT(IN6P_HOPLIMIT);
1994                                         break;
1995
1996                                 case IPV6_RECVRTHDR:
1997                                         optval = OPTBIT(IN6P_RTHDR);
1998                                         break;
1999
2000                                 case IPV6_RECVPATHMTU:
2001                                         optval = OPTBIT(IN6P_MTU);
2002                                         break;
2003
2004                                 case IPV6_FAITH:
2005                                         optval = OPTBIT(INP_FAITH);
2006                                         break;
2007
2008                                 case IPV6_V6ONLY:
2009                                         optval = OPTBIT(IN6P_IPV6_V6ONLY);
2010                                         break;
2011
2012                                 case IPV6_PORTRANGE:
2013                                     {
2014                                         int flags;
2015                                         flags = in6p->inp_flags;
2016                                         if (flags & INP_HIGHPORT)
2017                                                 optval = IPV6_PORTRANGE_HIGH;
2018                                         else if (flags & INP_LOWPORT)
2019                                                 optval = IPV6_PORTRANGE_LOW;
2020                                         else
2021                                                 optval = 0;
2022                                         break;
2023                                     }
2024                                 case IPV6_RECVTCLASS:
2025                                         optval = OPTBIT(IN6P_TCLASS);
2026                                         break;
2027
2028                                 case IPV6_AUTOFLOWLABEL:
2029                                         optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2030                                         break;
2031
2032                                 case IPV6_BINDANY:
2033                                         optval = OPTBIT(INP_BINDANY);
2034                                         break;
2035                                 }
2036                                 if (error)
2037                                         break;
2038                                 error = sooptcopyout(sopt, &optval,
2039                                         sizeof optval);
2040                                 break;
2041
2042                         case IPV6_PATHMTU:
2043                         {
2044                                 u_long pmtu = 0;
2045                                 struct ip6_mtuinfo mtuinfo;
2046                                 struct route_in6 sro;
2047
2048                                 bzero(&sro, sizeof(sro));
2049
2050                                 if (!(so->so_state & SS_ISCONNECTED))
2051                                         return (ENOTCONN);
2052                                 /*
2053                                  * XXX: we dot not consider the case of source
2054                                  * routing, or optional information to specify
2055                                  * the outgoing interface.
2056                                  */
2057                                 error = ip6_getpmtu(&sro, NULL, NULL,
2058                                     &in6p->in6p_faddr, &pmtu, NULL,
2059                                     so->so_fibnum);
2060                                 if (sro.ro_rt)
2061                                         RTFREE(sro.ro_rt);
2062                                 if (error)
2063                                         break;
2064                                 if (pmtu > IPV6_MAXPACKET)
2065                                         pmtu = IPV6_MAXPACKET;
2066
2067                                 bzero(&mtuinfo, sizeof(mtuinfo));
2068                                 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2069                                 optdata = (void *)&mtuinfo;
2070                                 optdatalen = sizeof(mtuinfo);
2071                                 error = sooptcopyout(sopt, optdata,
2072                                     optdatalen);
2073                                 break;
2074                         }
2075
2076                         case IPV6_2292PKTINFO:
2077                         case IPV6_2292HOPLIMIT:
2078                         case IPV6_2292HOPOPTS:
2079                         case IPV6_2292RTHDR:
2080                         case IPV6_2292DSTOPTS:
2081                                 switch (optname) {
2082                                 case IPV6_2292PKTINFO:
2083                                         optval = OPTBIT(IN6P_PKTINFO);
2084                                         break;
2085                                 case IPV6_2292HOPLIMIT:
2086                                         optval = OPTBIT(IN6P_HOPLIMIT);
2087                                         break;
2088                                 case IPV6_2292HOPOPTS:
2089                                         optval = OPTBIT(IN6P_HOPOPTS);
2090                                         break;
2091                                 case IPV6_2292RTHDR:
2092                                         optval = OPTBIT(IN6P_RTHDR);
2093                                         break;
2094                                 case IPV6_2292DSTOPTS:
2095                                         optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2096                                         break;
2097                                 }
2098                                 error = sooptcopyout(sopt, &optval,
2099                                     sizeof optval);
2100                                 break;
2101                         case IPV6_PKTINFO:
2102                         case IPV6_HOPOPTS:
2103                         case IPV6_RTHDR:
2104                         case IPV6_DSTOPTS:
2105                         case IPV6_RTHDRDSTOPTS:
2106                         case IPV6_NEXTHOP:
2107                         case IPV6_TCLASS:
2108                         case IPV6_DONTFRAG:
2109                         case IPV6_USE_MIN_MTU:
2110                         case IPV6_PREFER_TEMPADDR:
2111                                 error = ip6_getpcbopt(in6p->in6p_outputopts,
2112                                     optname, sopt);
2113                                 break;
2114
2115                         case IPV6_MULTICAST_IF:
2116                         case IPV6_MULTICAST_HOPS:
2117                         case IPV6_MULTICAST_LOOP:
2118                         case IPV6_MSFILTER:
2119                                 error = ip6_getmoptions(in6p, sopt);
2120                                 break;
2121
2122 #ifdef IPSEC
2123                         case IPV6_IPSEC_POLICY:
2124                           {
2125                                 caddr_t req = NULL;
2126                                 size_t len = 0;
2127                                 struct mbuf *m = NULL;
2128                                 struct mbuf **mp = &m;
2129                                 size_t ovalsize = sopt->sopt_valsize;
2130                                 caddr_t oval = (caddr_t)sopt->sopt_val;
2131
2132                                 error = soopt_getm(sopt, &m); /* XXX */
2133                                 if (error != 0)
2134                                         break;
2135                                 error = soopt_mcopyin(sopt, m); /* XXX */
2136                                 if (error != 0)
2137                                         break;
2138                                 sopt->sopt_valsize = ovalsize;
2139                                 sopt->sopt_val = oval;
2140                                 if (m) {
2141                                         req = mtod(m, caddr_t);
2142                                         len = m->m_len;
2143                                 }
2144                                 error = ipsec_get_policy(in6p, req, len, mp);
2145                                 if (error == 0)
2146                                         error = soopt_mcopyout(sopt, m); /* XXX */
2147                                 if (error == 0 && m)
2148                                         m_freem(m);
2149                                 break;
2150                           }
2151 #endif /* IPSEC */
2152
2153                         default:
2154                                 error = ENOPROTOOPT;
2155                                 break;
2156                         }
2157                         break;
2158                 }
2159         }
2160         return (error);
2161 }
2162
2163 int
2164 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2165 {
2166         int error = 0, optval, optlen;
2167         const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2168         struct inpcb *in6p = sotoinpcb(so);
2169         int level, op, optname;
2170
2171         level = sopt->sopt_level;
2172         op = sopt->sopt_dir;
2173         optname = sopt->sopt_name;
2174         optlen = sopt->sopt_valsize;
2175
2176         if (level != IPPROTO_IPV6) {
2177                 return (EINVAL);
2178         }
2179
2180         switch (optname) {
2181         case IPV6_CHECKSUM:
2182                 /*
2183                  * For ICMPv6 sockets, no modification allowed for checksum
2184                  * offset, permit "no change" values to help existing apps.
2185                  *
2186                  * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2187                  * for an ICMPv6 socket will fail."
2188                  * The current behavior does not meet RFC3542.
2189                  */
2190                 switch (op) {
2191                 case SOPT_SET:
2192                         if (optlen != sizeof(int)) {
2193                                 error = EINVAL;
2194                                 break;
2195                         }
2196                         error = sooptcopyin(sopt, &optval, sizeof(optval),
2197                                             sizeof(optval));
2198                         if (error)
2199                                 break;
2200                         if ((optval % 2) != 0) {
2201                                 /* the API assumes even offset values */
2202                                 error = EINVAL;
2203                         } else if (so->so_proto->pr_protocol ==
2204                             IPPROTO_ICMPV6) {
2205                                 if (optval != icmp6off)
2206                                         error = EINVAL;
2207                         } else
2208                                 in6p->in6p_cksum = optval;
2209                         break;
2210
2211                 case SOPT_GET:
2212                         if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2213                                 optval = icmp6off;
2214                         else
2215                                 optval = in6p->in6p_cksum;
2216
2217                         error = sooptcopyout(sopt, &optval, sizeof(optval));
2218                         break;
2219
2220                 default:
2221                         error = EINVAL;
2222                         break;
2223                 }
2224                 break;
2225
2226         default:
2227                 error = ENOPROTOOPT;
2228                 break;
2229         }
2230
2231         return (error);
2232 }
2233
2234 /*
2235  * Set up IP6 options in pcb for insertion in output packets or
2236  * specifying behavior of outgoing packets.
2237  */
2238 static int
2239 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2240     struct socket *so, struct sockopt *sopt)
2241 {
2242         struct ip6_pktopts *opt = *pktopt;
2243         int error = 0;
2244         struct thread *td = sopt->sopt_td;
2245
2246         /* turn off any old options. */
2247         if (opt) {
2248 #ifdef DIAGNOSTIC
2249                 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2250                     opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2251                     opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2252                         printf("ip6_pcbopts: all specified options are cleared.\n");
2253 #endif
2254                 ip6_clearpktopts(opt, -1);
2255         } else
2256                 opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2257         *pktopt = NULL;
2258
2259         if (!m || m->m_len == 0) {
2260                 /*
2261                  * Only turning off any previous options, regardless of
2262                  * whether the opt is just created or given.
2263                  */
2264                 free(opt, M_IP6OPT);
2265                 return (0);
2266         }
2267
2268         /*  set options specified by user. */
2269         if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
2270             td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
2271                 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2272                 free(opt, M_IP6OPT);
2273                 return (error);
2274         }
2275         *pktopt = opt;
2276         return (0);
2277 }
2278
2279 /*
2280  * initialize ip6_pktopts.  beware that there are non-zero default values in
2281  * the struct.
2282  */
2283 void
2284 ip6_initpktopts(struct ip6_pktopts *opt)
2285 {
2286
2287         bzero(opt, sizeof(*opt));
2288         opt->ip6po_hlim = -1;   /* -1 means default hop limit */
2289         opt->ip6po_tclass = -1; /* -1 means default traffic class */
2290         opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2291         opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2292 }
2293
2294 static int
2295 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2296     struct ucred *cred, int uproto)
2297 {
2298         struct ip6_pktopts *opt;
2299
2300         if (*pktopt == NULL) {
2301                 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2302                     M_WAITOK);
2303                 ip6_initpktopts(*pktopt);
2304         }
2305         opt = *pktopt;
2306
2307         return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2308 }
2309
2310 static int
2311 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2312 {
2313         void *optdata = NULL;
2314         int optdatalen = 0;
2315         struct ip6_ext *ip6e;
2316         int error = 0;
2317         struct in6_pktinfo null_pktinfo;
2318         int deftclass = 0, on;
2319         int defminmtu = IP6PO_MINMTU_MCASTONLY;
2320         int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2321
2322         switch (optname) {
2323         case IPV6_PKTINFO:
2324                 if (pktopt && pktopt->ip6po_pktinfo)
2325                         optdata = (void *)pktopt->ip6po_pktinfo;
2326                 else {
2327                         /* XXX: we don't have to do this every time... */
2328                         bzero(&null_pktinfo, sizeof(null_pktinfo));
2329                         optdata = (void *)&null_pktinfo;
2330                 }
2331                 optdatalen = sizeof(struct in6_pktinfo);
2332                 break;
2333         case IPV6_TCLASS:
2334                 if (pktopt && pktopt->ip6po_tclass >= 0)
2335                         optdata = (void *)&pktopt->ip6po_tclass;
2336                 else
2337                         optdata = (void *)&deftclass;
2338                 optdatalen = sizeof(int);
2339                 break;
2340         case IPV6_HOPOPTS:
2341                 if (pktopt && pktopt->ip6po_hbh) {
2342                         optdata = (void *)pktopt->ip6po_hbh;
2343                         ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2344                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2345                 }
2346                 break;
2347         case IPV6_RTHDR:
2348                 if (pktopt && pktopt->ip6po_rthdr) {
2349                         optdata = (void *)pktopt->ip6po_rthdr;
2350                         ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2351                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2352                 }
2353                 break;
2354         case IPV6_RTHDRDSTOPTS:
2355                 if (pktopt && pktopt->ip6po_dest1) {
2356                         optdata = (void *)pktopt->ip6po_dest1;
2357                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2358                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2359                 }
2360                 break;
2361         case IPV6_DSTOPTS:
2362                 if (pktopt && pktopt->ip6po_dest2) {
2363                         optdata = (void *)pktopt->ip6po_dest2;
2364                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2365                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2366                 }
2367                 break;
2368         case IPV6_NEXTHOP:
2369                 if (pktopt && pktopt->ip6po_nexthop) {
2370                         optdata = (void *)pktopt->ip6po_nexthop;
2371                         optdatalen = pktopt->ip6po_nexthop->sa_len;
2372                 }
2373                 break;
2374         case IPV6_USE_MIN_MTU:
2375                 if (pktopt)
2376                         optdata = (void *)&pktopt->ip6po_minmtu;
2377                 else
2378                         optdata = (void *)&defminmtu;
2379                 optdatalen = sizeof(int);
2380                 break;
2381         case IPV6_DONTFRAG:
2382                 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2383                         on = 1;
2384                 else
2385                         on = 0;
2386                 optdata = (void *)&on;
2387                 optdatalen = sizeof(on);
2388                 break;
2389         case IPV6_PREFER_TEMPADDR:
2390                 if (pktopt)
2391                         optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2392                 else
2393                         optdata = (void *)&defpreftemp;
2394                 optdatalen = sizeof(int);
2395                 break;
2396         default:                /* should not happen */
2397 #ifdef DIAGNOSTIC
2398                 panic("ip6_getpcbopt: unexpected option\n");
2399 #endif
2400                 return (ENOPROTOOPT);
2401         }
2402
2403         error = sooptcopyout(sopt, optdata, optdatalen);
2404
2405         return (error);
2406 }
2407
2408 void
2409 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2410 {
2411         if (pktopt == NULL)
2412                 return;
2413
2414         if (optname == -1 || optname == IPV6_PKTINFO) {
2415                 if (pktopt->ip6po_pktinfo)
2416                         free(pktopt->ip6po_pktinfo, M_IP6OPT);
2417                 pktopt->ip6po_pktinfo = NULL;
2418         }
2419         if (optname == -1 || optname == IPV6_HOPLIMIT)
2420                 pktopt->ip6po_hlim = -1;
2421         if (optname == -1 || optname == IPV6_TCLASS)
2422                 pktopt->ip6po_tclass = -1;
2423         if (optname == -1 || optname == IPV6_NEXTHOP) {
2424                 if (pktopt->ip6po_nextroute.ro_rt) {
2425                         RTFREE(pktopt->ip6po_nextroute.ro_rt);
2426                         pktopt->ip6po_nextroute.ro_rt = NULL;
2427                 }
2428                 if (pktopt->ip6po_nexthop)
2429                         free(pktopt->ip6po_nexthop, M_IP6OPT);
2430                 pktopt->ip6po_nexthop = NULL;
2431         }
2432         if (optname == -1 || optname == IPV6_HOPOPTS) {
2433                 if (pktopt->ip6po_hbh)
2434                         free(pktopt->ip6po_hbh, M_IP6OPT);
2435                 pktopt->ip6po_hbh = NULL;
2436         }
2437         if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2438                 if (pktopt->ip6po_dest1)
2439                         free(pktopt->ip6po_dest1, M_IP6OPT);
2440                 pktopt->ip6po_dest1 = NULL;
2441         }
2442         if (optname == -1 || optname == IPV6_RTHDR) {
2443                 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2444                         free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2445                 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2446                 if (pktopt->ip6po_route.ro_rt) {
2447                         RTFREE(pktopt->ip6po_route.ro_rt);
2448                         pktopt->ip6po_route.ro_rt = NULL;
2449                 }
2450         }
2451         if (optname == -1 || optname == IPV6_DSTOPTS) {
2452                 if (pktopt->ip6po_dest2)
2453                         free(pktopt->ip6po_dest2, M_IP6OPT);
2454                 pktopt->ip6po_dest2 = NULL;
2455         }
2456 }
2457
2458 #define PKTOPT_EXTHDRCPY(type) \
2459 do {\
2460         if (src->type) {\
2461                 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2462                 dst->type = malloc(hlen, M_IP6OPT, canwait);\
2463                 if (dst->type == NULL && canwait == M_NOWAIT)\
2464                         goto bad;\
2465                 bcopy(src->type, dst->type, hlen);\
2466         }\
2467 } while (/*CONSTCOND*/ 0)
2468
2469 static int
2470 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2471 {
2472         if (dst == NULL || src == NULL)  {
2473                 printf("ip6_clearpktopts: invalid argument\n");
2474                 return (EINVAL);
2475         }
2476
2477         dst->ip6po_hlim = src->ip6po_hlim;
2478         dst->ip6po_tclass = src->ip6po_tclass;
2479         dst->ip6po_flags = src->ip6po_flags;
2480         dst->ip6po_minmtu = src->ip6po_minmtu;
2481         dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
2482         if (src->ip6po_pktinfo) {
2483                 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2484                     M_IP6OPT, canwait);
2485                 if (dst->ip6po_pktinfo == NULL)
2486                         goto bad;
2487                 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2488         }
2489         if (src->ip6po_nexthop) {
2490                 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2491                     M_IP6OPT, canwait);
2492                 if (dst->ip6po_nexthop == NULL)
2493                         goto bad;
2494                 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2495                     src->ip6po_nexthop->sa_len);
2496         }
2497         PKTOPT_EXTHDRCPY(ip6po_hbh);
2498         PKTOPT_EXTHDRCPY(ip6po_dest1);
2499         PKTOPT_EXTHDRCPY(ip6po_dest2);
2500         PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2501         return (0);
2502
2503   bad:
2504         ip6_clearpktopts(dst, -1);
2505         return (ENOBUFS);
2506 }
2507 #undef PKTOPT_EXTHDRCPY
2508
2509 struct ip6_pktopts *
2510 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2511 {
2512         int error;
2513         struct ip6_pktopts *dst;
2514
2515         dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2516         if (dst == NULL)
2517                 return (NULL);
2518         ip6_initpktopts(dst);
2519
2520         if ((error = copypktopts(dst, src, canwait)) != 0) {
2521                 free(dst, M_IP6OPT);
2522                 return (NULL);
2523         }
2524
2525         return (dst);
2526 }
2527
2528 void
2529 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2530 {
2531         if (pktopt == NULL)
2532                 return;
2533
2534         ip6_clearpktopts(pktopt, -1);
2535
2536         free(pktopt, M_IP6OPT);
2537 }
2538
2539 /*
2540  * Set IPv6 outgoing packet options based on advanced API.
2541  */
2542 int
2543 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2544     struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
2545 {
2546         struct cmsghdr *cm = 0;
2547
2548         if (control == NULL || opt == NULL)
2549                 return (EINVAL);
2550
2551         ip6_initpktopts(opt);
2552         if (stickyopt) {
2553                 int error;
2554
2555                 /*
2556                  * If stickyopt is provided, make a local copy of the options
2557                  * for this particular packet, then override them by ancillary
2558                  * objects.
2559                  * XXX: copypktopts() does not copy the cached route to a next
2560                  * hop (if any).  This is not very good in terms of efficiency,
2561                  * but we can allow this since this option should be rarely
2562                  * used.
2563                  */
2564                 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2565                         return (error);
2566         }
2567
2568         /*
2569          * XXX: Currently, we assume all the optional information is stored
2570          * in a single mbuf.
2571          */
2572         if (control->m_next)
2573                 return (EINVAL);
2574
2575         for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2576             control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2577                 int error;
2578
2579                 if (control->m_len < CMSG_LEN(0))
2580                         return (EINVAL);
2581
2582                 cm = mtod(control, struct cmsghdr *);
2583                 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2584                         return (EINVAL);
2585                 if (cm->cmsg_level != IPPROTO_IPV6)
2586                         continue;
2587
2588                 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2589                     cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2590                 if (error)
2591                         return (error);
2592         }
2593
2594         return (0);
2595 }
2596
2597 /*
2598  * Set a particular packet option, as a sticky option or an ancillary data
2599  * item.  "len" can be 0 only when it's a sticky option.
2600  * We have 4 cases of combination of "sticky" and "cmsg":
2601  * "sticky=0, cmsg=0": impossible
2602  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2603  * "sticky=1, cmsg=0": RFC3542 socket option
2604  * "sticky=1, cmsg=1": RFC2292 socket option
2605  */
2606 static int
2607 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2608     struct ucred *cred, int sticky, int cmsg, int uproto)
2609 {
2610         int minmtupolicy, preftemp;
2611         int error;
2612
2613         if (!sticky && !cmsg) {
2614 #ifdef DIAGNOSTIC
2615                 printf("ip6_setpktopt: impossible case\n");
2616 #endif
2617                 return (EINVAL);
2618         }
2619
2620         /*
2621          * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2622          * not be specified in the context of RFC3542.  Conversely,
2623          * RFC3542 types should not be specified in the context of RFC2292.
2624          */
2625         if (!cmsg) {
2626                 switch (optname) {
2627                 case IPV6_2292PKTINFO:
2628                 case IPV6_2292HOPLIMIT:
2629                 case IPV6_2292NEXTHOP:
2630                 case IPV6_2292HOPOPTS:
2631                 case IPV6_2292DSTOPTS:
2632                 case IPV6_2292RTHDR:
2633                 case IPV6_2292PKTOPTIONS:
2634                         return (ENOPROTOOPT);
2635                 }
2636         }
2637         if (sticky && cmsg) {
2638                 switch (optname) {
2639                 case IPV6_PKTINFO:
2640                 case IPV6_HOPLIMIT:
2641                 case IPV6_NEXTHOP:
2642                 case IPV6_HOPOPTS:
2643                 case IPV6_DSTOPTS:
2644                 case IPV6_RTHDRDSTOPTS:
2645                 case IPV6_RTHDR:
2646                 case IPV6_USE_MIN_MTU:
2647                 case IPV6_DONTFRAG:
2648                 case IPV6_TCLASS:
2649                 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2650                         return (ENOPROTOOPT);
2651                 }
2652         }
2653
2654         switch (optname) {
2655         case IPV6_2292PKTINFO:
2656         case IPV6_PKTINFO:
2657         {
2658                 struct ifnet *ifp = NULL;
2659                 struct in6_pktinfo *pktinfo;
2660
2661                 if (len != sizeof(struct in6_pktinfo))
2662                         return (EINVAL);
2663
2664                 pktinfo = (struct in6_pktinfo *)buf;
2665
2666                 /*
2667                  * An application can clear any sticky IPV6_PKTINFO option by
2668                  * doing a "regular" setsockopt with ipi6_addr being
2669                  * in6addr_any and ipi6_ifindex being zero.
2670                  * [RFC 3542, Section 6]
2671                  */
2672                 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2673                     pktinfo->ipi6_ifindex == 0 &&
2674                     IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2675                         ip6_clearpktopts(opt, optname);
2676                         break;
2677                 }
2678
2679                 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2680                     sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2681                         return (EINVAL);
2682                 }
2683
2684                 /* validate the interface index if specified. */
2685                 if (pktinfo->ipi6_ifindex > V_if_index ||
2686                     pktinfo->ipi6_ifindex < 0) {
2687                          return (ENXIO);
2688                 }
2689                 if (pktinfo->ipi6_ifindex) {
2690                         ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
2691                         if (ifp == NULL)
2692                                 return (ENXIO);
2693                 }
2694
2695                 /*
2696                  * We store the address anyway, and let in6_selectsrc()
2697                  * validate the specified address.  This is because ipi6_addr
2698                  * may not have enough information about its scope zone, and
2699                  * we may need additional information (such as outgoing
2700                  * interface or the scope zone of a destination address) to
2701                  * disambiguate the scope.
2702                  * XXX: the delay of the validation may confuse the
2703                  * application when it is used as a sticky option.
2704                  */
2705                 if (opt->ip6po_pktinfo == NULL) {
2706                         opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2707                             M_IP6OPT, M_NOWAIT);
2708                         if (opt->ip6po_pktinfo == NULL)
2709                                 return (ENOBUFS);
2710                 }
2711                 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2712                 break;
2713         }
2714
2715         case IPV6_2292HOPLIMIT:
2716         case IPV6_HOPLIMIT:
2717         {
2718                 int *hlimp;
2719
2720                 /*
2721                  * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2722                  * to simplify the ordering among hoplimit options.
2723                  */
2724                 if (optname == IPV6_HOPLIMIT && sticky)
2725                         return (ENOPROTOOPT);
2726
2727                 if (len != sizeof(int))
2728                         return (EINVAL);
2729                 hlimp = (int *)buf;
2730                 if (*hlimp < -1 || *hlimp > 255)
2731                         return (EINVAL);
2732
2733                 opt->ip6po_hlim = *hlimp;
2734                 break;
2735         }
2736
2737         case IPV6_TCLASS:
2738         {
2739                 int tclass;
2740
2741                 if (len != sizeof(int))
2742                         return (EINVAL);
2743                 tclass = *(int *)buf;
2744                 if (tclass < -1 || tclass > 255)
2745                         return (EINVAL);
2746
2747                 opt->ip6po_tclass = tclass;
2748                 break;
2749         }
2750
2751         case IPV6_2292NEXTHOP:
2752         case IPV6_NEXTHOP:
2753                 if (cred != NULL) {
2754                         error = priv_check_cred(cred,
2755                             PRIV_NETINET_SETHDROPTS, 0);
2756                         if (error)
2757                                 return (error);
2758                 }
2759
2760                 if (len == 0) { /* just remove the option */
2761                         ip6_clearpktopts(opt, IPV6_NEXTHOP);
2762                         break;
2763                 }
2764
2765                 /* check if cmsg_len is large enough for sa_len */
2766                 if (len < sizeof(struct sockaddr) || len < *buf)
2767                         return (EINVAL);
2768
2769                 switch (((struct sockaddr *)buf)->sa_family) {
2770                 case AF_INET6:
2771                 {
2772                         struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
2773                         int error;
2774
2775                         if (sa6->sin6_len != sizeof(struct sockaddr_in6))
2776                                 return (EINVAL);
2777
2778                         if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
2779                             IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
2780                                 return (EINVAL);
2781                         }
2782                         if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
2783                             != 0) {
2784                                 return (error);
2785                         }
2786                         break;
2787                 }
2788                 case AF_LINK:   /* should eventually be supported */
2789                 default:
2790                         return (EAFNOSUPPORT);
2791                 }
2792
2793                 /* turn off the previous option, then set the new option. */
2794                 ip6_clearpktopts(opt, IPV6_NEXTHOP);
2795                 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
2796                 if (opt->ip6po_nexthop == NULL)
2797                         return (ENOBUFS);
2798                 bcopy(buf, opt->ip6po_nexthop, *buf);
2799                 break;
2800
2801         case IPV6_2292HOPOPTS:
2802         case IPV6_HOPOPTS:
2803         {
2804                 struct ip6_hbh *hbh;
2805                 int hbhlen;
2806
2807                 /*
2808                  * XXX: We don't allow a non-privileged user to set ANY HbH
2809                  * options, since per-option restriction has too much
2810                  * overhead.
2811                  */
2812                 if (cred != NULL) {
2813                         error = priv_check_cred(cred,
2814                             PRIV_NETINET_SETHDROPTS, 0);
2815                         if (error)
2816                                 return (error);
2817                 }
2818
2819                 if (len == 0) {
2820                         ip6_clearpktopts(opt, IPV6_HOPOPTS);
2821                         break;  /* just remove the option */
2822                 }
2823
2824                 /* message length validation */
2825                 if (len < sizeof(struct ip6_hbh))
2826                         return (EINVAL);
2827                 hbh = (struct ip6_hbh *)buf;
2828                 hbhlen = (hbh->ip6h_len + 1) << 3;
2829                 if (len != hbhlen)
2830                         return (EINVAL);
2831
2832                 /* turn off the previous option, then set the new option. */
2833                 ip6_clearpktopts(opt, IPV6_HOPOPTS);
2834                 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
2835                 if (opt->ip6po_hbh == NULL)
2836                         return (ENOBUFS);
2837                 bcopy(hbh, opt->ip6po_hbh, hbhlen);
2838
2839                 break;
2840         }
2841
2842         case IPV6_2292DSTOPTS:
2843         case IPV6_DSTOPTS:
2844         case IPV6_RTHDRDSTOPTS:
2845         {
2846                 struct ip6_dest *dest, **newdest = NULL;
2847                 int destlen;
2848
2849                 if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
2850                         error = priv_check_cred(cred,
2851                             PRIV_NETINET_SETHDROPTS, 0);
2852                         if (error)
2853                                 return (error);
2854                 }
2855
2856                 if (len == 0) {
2857                         ip6_clearpktopts(opt, optname);
2858                         break;  /* just remove the option */
2859                 }
2860
2861                 /* message length validation */
2862                 if (len < sizeof(struct ip6_dest))
2863                         return (EINVAL);
2864                 dest = (struct ip6_dest *)buf;
2865                 destlen = (dest->ip6d_len + 1) << 3;
2866                 if (len != destlen)
2867                         return (EINVAL);
2868
2869                 /*
2870                  * Determine the position that the destination options header
2871                  * should be inserted; before or after the routing header.
2872                  */
2873                 switch (optname) {
2874                 case IPV6_2292DSTOPTS:
2875                         /*
2876                          * The old advacned API is ambiguous on this point.
2877                          * Our approach is to determine the position based
2878                          * according to the existence of a routing header.
2879                          * Note, however, that this depends on the order of the
2880                          * extension headers in the ancillary data; the 1st
2881                          * part of the destination options header must appear
2882                          * before the routing header in the ancillary data,
2883                          * too.
2884                          * RFC3542 solved the ambiguity by introducing
2885                          * separate ancillary data or option types.
2886                          */
2887                         if (opt->ip6po_rthdr == NULL)
2888                                 newdest = &opt->ip6po_dest1;
2889                         else
2890                                 newdest = &opt->ip6po_dest2;
2891                         break;
2892                 case IPV6_RTHDRDSTOPTS:
2893                         newdest = &opt->ip6po_dest1;
2894                         break;
2895                 case IPV6_DSTOPTS:
2896                         newdest = &opt->ip6po_dest2;
2897                         break;
2898                 }
2899
2900                 /* turn off the previous option, then set the new option. */
2901                 ip6_clearpktopts(opt, optname);
2902                 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
2903                 if (*newdest == NULL)
2904                         return (ENOBUFS);
2905                 bcopy(dest, *newdest, destlen);
2906
2907                 break;
2908         }
2909
2910         case IPV6_2292RTHDR:
2911         case IPV6_RTHDR:
2912         {
2913                 struct ip6_rthdr *rth;
2914                 int rthlen;
2915
2916                 if (len == 0) {
2917                         ip6_clearpktopts(opt, IPV6_RTHDR);
2918                         break;  /* just remove the option */
2919                 }
2920
2921                 /* message length validation */
2922                 if (len < sizeof(struct ip6_rthdr))
2923                         return (EINVAL);
2924                 rth = (struct ip6_rthdr *)buf;
2925                 rthlen = (rth->ip6r_len + 1) << 3;
2926                 if (len != rthlen)
2927                         return (EINVAL);
2928
2929                 switch (rth->ip6r_type) {
2930                 case IPV6_RTHDR_TYPE_0:
2931                         if (rth->ip6r_len == 0) /* must contain one addr */
2932                                 return (EINVAL);
2933                         if (rth->ip6r_len % 2) /* length must be even */
2934                                 return (EINVAL);
2935                         if (rth->ip6r_len / 2 != rth->ip6r_segleft)
2936                                 return (EINVAL);
2937                         break;
2938                 default:
2939                         return (EINVAL);        /* not supported */
2940                 }
2941
2942                 /* turn off the previous option */
2943                 ip6_clearpktopts(opt, IPV6_RTHDR);
2944                 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
2945                 if (opt->ip6po_rthdr == NULL)
2946                         return (ENOBUFS);
2947                 bcopy(rth, opt->ip6po_rthdr, rthlen);
2948
2949                 break;
2950         }
2951
2952         case IPV6_USE_MIN_MTU:
2953                 if (len != sizeof(int))
2954                         return (EINVAL);
2955                 minmtupolicy = *(int *)buf;
2956                 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
2957                     minmtupolicy != IP6PO_MINMTU_DISABLE &&
2958                     minmtupolicy != IP6PO_MINMTU_ALL) {
2959                         return (EINVAL);
2960                 }
2961                 opt->ip6po_minmtu = minmtupolicy;
2962                 break;
2963
2964         case IPV6_DONTFRAG:
2965                 if (len != sizeof(int))
2966                         return (EINVAL);
2967
2968                 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
2969                         /*
2970                          * we ignore this option for TCP sockets.
2971                          * (RFC3542 leaves this case unspecified.)
2972                          */
2973                         opt->ip6po_flags &= ~IP6PO_DONTFRAG;
2974                 } else
2975                         opt->ip6po_flags |= IP6PO_DONTFRAG;
2976                 break;
2977
2978         case IPV6_PREFER_TEMPADDR:
2979                 if (len != sizeof(int))
2980                         return (EINVAL);
2981                 preftemp = *(int *)buf;
2982                 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
2983                     preftemp != IP6PO_TEMPADDR_NOTPREFER &&
2984                     preftemp != IP6PO_TEMPADDR_PREFER) {
2985                         return (EINVAL);
2986                 }
2987                 opt->ip6po_prefer_tempaddr = preftemp;
2988                 break;
2989
2990         default:
2991                 return (ENOPROTOOPT);
2992         } /* end of switch */
2993
2994         return (0);
2995 }
2996
2997 /*
2998  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
2999  * packet to the input queue of a specified interface.  Note that this
3000  * calls the output routine of the loopback "driver", but with an interface
3001  * pointer that might NOT be &loif -- easier than replicating that code here.
3002  */
3003 void
3004 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
3005 {
3006         struct mbuf *copym;
3007         struct ip6_hdr *ip6;
3008
3009         copym = m_copy(m, 0, M_COPYALL);
3010         if (copym == NULL)
3011                 return;
3012
3013         /*
3014          * Make sure to deep-copy IPv6 header portion in case the data
3015          * is in an mbuf cluster, so that we can safely override the IPv6
3016          * header portion later.
3017          */
3018         if ((copym->m_flags & M_EXT) != 0 ||
3019             copym->m_len < sizeof(struct ip6_hdr)) {
3020                 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3021                 if (copym == NULL)
3022                         return;
3023         }
3024
3025 #ifdef DIAGNOSTIC
3026         if (copym->m_len < sizeof(*ip6)) {
3027                 m_freem(copym);
3028                 return;
3029         }
3030 #endif
3031
3032         ip6 = mtod(copym, struct ip6_hdr *);
3033         /*
3034          * clear embedded scope identifiers if necessary.
3035          * in6_clearscope will touch the addresses only when necessary.
3036          */
3037         in6_clearscope(&ip6->ip6_src);
3038         in6_clearscope(&ip6->ip6_dst);
3039
3040         (void)if_simloop(ifp, copym, dst->sin6_family, 0);
3041 }
3042
3043 /*
3044  * Chop IPv6 header off from the payload.
3045  */
3046 static int
3047 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3048 {
3049         struct mbuf *mh;
3050         struct ip6_hdr *ip6;
3051
3052         ip6 = mtod(m, struct ip6_hdr *);
3053         if (m->m_len > sizeof(*ip6)) {
3054                 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3055                 if (mh == 0) {
3056                         m_freem(m);
3057                         return ENOBUFS;
3058                 }
3059                 M_MOVE_PKTHDR(mh, m);
3060                 MH_ALIGN(mh, sizeof(*ip6));
3061                 m->m_len -= sizeof(*ip6);
3062                 m->m_data += sizeof(*ip6);
3063                 mh->m_next = m;
3064                 m = mh;
3065                 m->m_len = sizeof(*ip6);
3066                 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3067         }
3068         exthdrs->ip6e_ip6 = m;
3069         return 0;
3070 }
3071
3072 /*
3073  * Compute IPv6 extension header length.
3074  */
3075 int
3076 ip6_optlen(struct inpcb *in6p)
3077 {
3078         int len;
3079
3080         if (!in6p->in6p_outputopts)
3081                 return 0;
3082
3083         len = 0;
3084 #define elen(x) \
3085     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3086
3087         len += elen(in6p->in6p_outputopts->ip6po_hbh);
3088         if (in6p->in6p_outputopts->ip6po_rthdr)
3089                 /* dest1 is valid with rthdr only */
3090                 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3091         len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3092         len += elen(in6p->in6p_outputopts->ip6po_dest2);
3093         return len;
3094 #undef elen
3095 }