]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet6/in6_pcb.c
zfs: merge openzfs/zfs@9cd71c860 (master)
[FreeBSD/FreeBSD.git] / sys / netinet6 / in6_pcb.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * Copyright (c) 2010-2011 Juniper Networks, Inc.
6  * All rights reserved.
7  *
8  * Portions of this software were developed by Robert N. M. Watson under
9  * contract to Juniper Networks, Inc.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the project nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *      $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $
36  */
37
38 /*-
39  * Copyright (c) 1982, 1986, 1991, 1993
40  *      The Regents of the University of California.  All rights reserved.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *      @(#)in_pcb.c    8.2 (Berkeley) 1/4/94
67  */
68
69 #include <sys/cdefs.h>
70 __FBSDID("$FreeBSD$");
71
72 #include "opt_inet.h"
73 #include "opt_inet6.h"
74 #include "opt_ipsec.h"
75 #include "opt_route.h"
76 #include "opt_rss.h"
77
78 #include <sys/hash.h>
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/malloc.h>
82 #include <sys/mbuf.h>
83 #include <sys/domain.h>
84 #include <sys/protosw.h>
85 #include <sys/socket.h>
86 #include <sys/socketvar.h>
87 #include <sys/sockio.h>
88 #include <sys/errno.h>
89 #include <sys/time.h>
90 #include <sys/priv.h>
91 #include <sys/proc.h>
92 #include <sys/jail.h>
93
94 #include <vm/uma.h>
95
96 #include <net/if.h>
97 #include <net/if_var.h>
98 #include <net/if_llatbl.h>
99 #include <net/if_types.h>
100 #include <net/route.h>
101 #include <net/route/nhop.h>
102
103 #include <netinet/in.h>
104 #include <netinet/in_var.h>
105 #include <netinet/in_systm.h>
106 #include <netinet/ip6.h>
107 #include <netinet/ip_var.h>
108
109 #include <netinet6/ip6_var.h>
110 #include <netinet6/nd6.h>
111 #include <netinet/in_pcb.h>
112 #include <netinet/in_pcb_var.h>
113 #include <netinet6/in6_pcb.h>
114 #include <netinet6/in6_fib.h>
115 #include <netinet6/scope6_var.h>
116
117 int
118 in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
119 {
120         struct socket *so = inp->inp_socket;
121         u_int16_t lport = 0;
122         int error, lookupflags = 0;
123 #ifdef INVARIANTS
124         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
125 #endif
126
127         INP_WLOCK_ASSERT(inp);
128         INP_HASH_WLOCK_ASSERT(pcbinfo);
129
130         error = prison_local_ip6(cred, laddr,
131             ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0));
132         if (error)
133                 return(error);
134
135         /* XXX: this is redundant when called from in6_pcbbind */
136         if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
137                 lookupflags = INPLOOKUP_WILDCARD;
138
139         inp->inp_flags |= INP_ANONPORT;
140
141         error = in_pcb_lport(inp, NULL, &lport, cred, lookupflags);
142         if (error != 0)
143                 return (error);
144
145         inp->inp_lport = lport;
146         if (in_pcbinshash(inp) != 0) {
147                 inp->in6p_laddr = in6addr_any;
148                 inp->inp_lport = 0;
149                 return (EAGAIN);
150         }
151
152         return (0);
153 }
154
155 int
156 in6_pcbbind(struct inpcb *inp, struct sockaddr *nam,
157     struct ucred *cred)
158 {
159         struct socket *so = inp->inp_socket;
160         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
161         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
162         u_short lport = 0;
163         int error, lookupflags = 0;
164         int reuseport = (so->so_options & SO_REUSEPORT);
165
166         /*
167          * XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here
168          * so that we don't have to add to the (already messy) code below.
169          */
170         int reuseport_lb = (so->so_options & SO_REUSEPORT_LB);
171
172         INP_WLOCK_ASSERT(inp);
173         INP_HASH_WLOCK_ASSERT(pcbinfo);
174
175         if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
176                 return (EINVAL);
177         if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0)
178                 lookupflags = INPLOOKUP_WILDCARD;
179         if (nam == NULL) {
180                 if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
181                     ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
182                         return (error);
183         } else {
184                 sin6 = (struct sockaddr_in6 *)nam;
185                 KASSERT(sin6->sin6_family == AF_INET6,
186                     ("%s: invalid address family for %p", __func__, sin6));
187                 KASSERT(sin6->sin6_len == sizeof(*sin6),
188                     ("%s: invalid address length for %p", __func__, sin6));
189
190                 if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
191                         return(error);
192
193                 if ((error = prison_local_ip6(cred, &sin6->sin6_addr,
194                     ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
195                         return (error);
196
197                 lport = sin6->sin6_port;
198                 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
199                         /*
200                          * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
201                          * allow compepte duplication of binding if
202                          * SO_REUSEPORT is set, or if SO_REUSEADDR is set
203                          * and a multicast address is bound on both
204                          * new and duplicated sockets.
205                          */
206                         if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
207                                 reuseport = SO_REUSEADDR|SO_REUSEPORT;
208                         /*
209                          * XXX: How to deal with SO_REUSEPORT_LB here?
210                          * Treat same as SO_REUSEPORT for now.
211                          */
212                         if ((so->so_options &
213                             (SO_REUSEADDR|SO_REUSEPORT_LB)) != 0)
214                                 reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB;
215                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
216                         struct epoch_tracker et;
217                         struct ifaddr *ifa;
218
219                         sin6->sin6_port = 0;            /* yech... */
220                         NET_EPOCH_ENTER(et);
221                         if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) ==
222                             NULL &&
223                             (inp->inp_flags & INP_BINDANY) == 0) {
224                                 NET_EPOCH_EXIT(et);
225                                 return (EADDRNOTAVAIL);
226                         }
227
228                         /*
229                          * XXX: bind to an anycast address might accidentally
230                          * cause sending a packet with anycast source address.
231                          * We should allow to bind to a deprecated address, since
232                          * the application dares to use it.
233                          */
234                         if (ifa != NULL &&
235                             ((struct in6_ifaddr *)ifa)->ia6_flags &
236                             (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
237                                 NET_EPOCH_EXIT(et);
238                                 return (EADDRNOTAVAIL);
239                         }
240                         NET_EPOCH_EXIT(et);
241                 }
242                 if (lport) {
243                         struct inpcb *t;
244
245                         /* GROSS */
246                         if (ntohs(lport) <= V_ipport_reservedhigh &&
247                             ntohs(lport) >= V_ipport_reservedlow &&
248                             priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT))
249                                 return (EACCES);
250                         if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
251                             priv_check_cred(inp->inp_cred, PRIV_NETINET_REUSEPORT) != 0) {
252                                 t = in6_pcblookup_local(pcbinfo,
253                                     &sin6->sin6_addr, lport,
254                                     INPLOOKUP_WILDCARD, cred);
255                                 if (t &&
256                                     ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
257                                     (so->so_type != SOCK_STREAM ||
258                                      IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
259                                     (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
260                                      !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
261                                      (t->inp_flags2 & INP_REUSEPORT) ||
262                                      (t->inp_flags2 & INP_REUSEPORT_LB) == 0) &&
263                                     (inp->inp_cred->cr_uid !=
264                                      t->inp_cred->cr_uid))
265                                         return (EADDRINUSE);
266
267                                 /*
268                                  * If the socket is a BINDMULTI socket, then
269                                  * the credentials need to match and the
270                                  * original socket also has to have been bound
271                                  * with BINDMULTI.
272                                  */
273                                 if (t && (! in_pcbbind_check_bindmulti(inp, t)))
274                                         return (EADDRINUSE);
275
276 #ifdef INET
277                                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
278                                     IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
279                                         struct sockaddr_in sin;
280
281                                         in6_sin6_2_sin(&sin, sin6);
282                                         t = in_pcblookup_local(pcbinfo,
283                                             sin.sin_addr, lport,
284                                             INPLOOKUP_WILDCARD, cred);
285                                         if (t &&
286                                             ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
287                                             (so->so_type != SOCK_STREAM ||
288                                              ntohl(t->inp_faddr.s_addr) ==
289                                               INADDR_ANY) &&
290                                             (inp->inp_cred->cr_uid !=
291                                              t->inp_cred->cr_uid))
292                                                 return (EADDRINUSE);
293
294                                         if (t && (! in_pcbbind_check_bindmulti(inp, t)))
295                                                 return (EADDRINUSE);
296                                 }
297 #endif
298                         }
299                         t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
300                             lport, lookupflags, cred);
301                         if (t && (reuseport & inp_so_options(t)) == 0 &&
302                             (reuseport_lb & inp_so_options(t)) == 0) {
303                                 return (EADDRINUSE);
304                         }
305 #ifdef INET
306                         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
307                             IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
308                                 struct sockaddr_in sin;
309
310                                 in6_sin6_2_sin(&sin, sin6);
311                                 t = in_pcblookup_local(pcbinfo, sin.sin_addr,
312                                    lport, lookupflags, cred);
313                                 if (t &&
314                                     (reuseport & inp_so_options(t)) == 0 &&
315                                     (reuseport_lb & inp_so_options(t)) == 0 &&
316                                     (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
317                                         (t->inp_vflag & INP_IPV6PROTO) != 0)) {
318                                         return (EADDRINUSE);
319                                 }
320                         }
321 #endif
322                 }
323                 inp->in6p_laddr = sin6->sin6_addr;
324         }
325         if (lport == 0) {
326                 if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) {
327                         /* Undo an address bind that may have occurred. */
328                         inp->in6p_laddr = in6addr_any;
329                         return (error);
330                 }
331         } else {
332                 inp->inp_lport = lport;
333                 if (in_pcbinshash(inp) != 0) {
334                         inp->in6p_laddr = in6addr_any;
335                         inp->inp_lport = 0;
336                         return (EAGAIN);
337                 }
338         }
339         return (0);
340 }
341
342 /*
343  *   Transform old in6_pcbconnect() into an inner subroutine for new
344  *   in6_pcbconnect(): Do some validity-checking on the remote
345  *   address (in mbuf 'nam') and then determine local host address
346  *   (i.e., which interface) to use to access that remote host.
347  *
348  *   This preserves definition of in6_pcbconnect(), while supporting a
349  *   slightly different version for T/TCP.  (This is more than
350  *   a bit of a kludge, but cleaning up the internal interfaces would
351  *   have forced minor changes in every protocol).
352  */
353 static int
354 in6_pcbladdr(struct inpcb *inp, struct sockaddr_in6 *sin6,
355     struct in6_addr *plocal_addr6)
356 {
357         int error = 0;
358         int scope_ambiguous = 0;
359         struct in6_addr in6a;
360         struct epoch_tracker et;
361
362         INP_WLOCK_ASSERT(inp);
363         INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);        /* XXXRW: why? */
364
365         if (sin6->sin6_port == 0)
366                 return (EADDRNOTAVAIL);
367
368         if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
369                 scope_ambiguous = 1;
370         if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
371                 return(error);
372
373         if (!CK_STAILQ_EMPTY(&V_in6_ifaddrhead)) {
374                 /*
375                  * If the destination address is UNSPECIFIED addr,
376                  * use the loopback addr, e.g ::1.
377                  */
378                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
379                         sin6->sin6_addr = in6addr_loopback;
380         }
381         if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0)
382                 return (error);
383
384         NET_EPOCH_ENTER(et);
385         error = in6_selectsrc_socket(sin6, inp->in6p_outputopts,
386             inp, inp->inp_cred, scope_ambiguous, &in6a, NULL);
387         NET_EPOCH_EXIT(et);
388         if (error)
389                 return (error);
390
391         /*
392          * Do not update this earlier, in case we return with an error.
393          *
394          * XXX: this in6_selectsrc_socket result might replace the bound local
395          * address with the address specified by setsockopt(IPV6_PKTINFO).
396          * Is it the intended behavior?
397          */
398         *plocal_addr6 = in6a;
399
400         /*
401          * Don't do pcblookup call here; return interface in
402          * plocal_addr6
403          * and exit to caller, that will do the lookup.
404          */
405
406         return (0);
407 }
408
409 /*
410  * Outer subroutine:
411  * Connect from a socket to a specified address.
412  * Both address and port must be specified in argument sin.
413  * If don't have a local address for this socket yet,
414  * then pick one.
415  */
416 int
417 in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
418     struct ucred *cred, struct mbuf *m, bool rehash)
419 {
420         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
421         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
422         struct sockaddr_in6 laddr6;
423         int error;
424
425         KASSERT(sin6->sin6_family == AF_INET6,
426             ("%s: invalid address family for %p", __func__, sin6));
427         KASSERT(sin6->sin6_len == sizeof(*sin6),
428             ("%s: invalid address length for %p", __func__, sin6));
429
430         bzero(&laddr6, sizeof(laddr6));
431         laddr6.sin6_family = AF_INET6;
432
433         INP_WLOCK_ASSERT(inp);
434         INP_HASH_WLOCK_ASSERT(pcbinfo);
435
436 #ifdef ROUTE_MPATH
437         if (CALC_FLOWID_OUTBOUND) {
438                 uint32_t hash_type, hash_val;
439
440                 hash_val = fib6_calc_software_hash(&inp->in6p_laddr,
441                     &sin6->sin6_addr, 0, sin6->sin6_port,
442                     inp->inp_socket->so_proto->pr_protocol, &hash_type);
443                 inp->inp_flowid = hash_val;
444                 inp->inp_flowtype = hash_type;
445         }
446 #endif
447         /*
448          * Call inner routine, to assign local interface address.
449          * in6_pcbladdr() may automatically fill in sin6_scope_id.
450          */
451         if ((error = in6_pcbladdr(inp, sin6, &laddr6.sin6_addr)) != 0)
452                 return (error);
453
454         if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr,
455                                sin6->sin6_port,
456                               IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
457                               ? &laddr6.sin6_addr : &inp->in6p_laddr,
458                               inp->inp_lport, 0, NULL, M_NODOM) != NULL) {
459                 return (EADDRINUSE);
460         }
461         if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
462                 if (inp->inp_lport == 0) {
463                         /*
464                          * rehash was required to be true in the past for
465                          * this case; retain that convention.  However,
466                          * we now call in_pcb_lport_dest rather than
467                          * in6_pcbbind; the former does not insert into
468                          * the hash table, the latter does.  Change rehash
469                          * to false to do the in_pcbinshash below.
470                          */
471                         KASSERT(rehash == true,
472                             ("Rehashing required for unbound inps"));
473                         rehash = false;
474                         error = in_pcb_lport_dest(inp,
475                             (struct sockaddr *) &laddr6, &inp->inp_lport,
476                             (struct sockaddr *) sin6, sin6->sin6_port, cred,
477                             INPLOOKUP_WILDCARD);
478                         if (error)
479                                 return (error);
480                 }
481                 inp->in6p_laddr = laddr6.sin6_addr;
482         }
483         inp->in6p_faddr = sin6->sin6_addr;
484         inp->inp_fport = sin6->sin6_port;
485         /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
486         inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
487         if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
488                 inp->inp_flow |=
489                     (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
490
491         if (rehash) {
492                 in_pcbrehash(inp);
493         } else {
494                 in_pcbinshash(inp);
495         }
496
497         return (0);
498 }
499
500 int
501 in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
502 {
503
504         return (in6_pcbconnect_mbuf(inp, nam, cred, NULL, true));
505 }
506
507 void
508 in6_pcbdisconnect(struct inpcb *inp)
509 {
510
511         INP_WLOCK_ASSERT(inp);
512         INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
513
514         bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
515         inp->inp_fport = 0;
516         /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
517         inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
518         in_pcbrehash(inp);
519 }
520
521 struct sockaddr *
522 in6_sockaddr(in_port_t port, struct in6_addr *addr_p)
523 {
524         struct sockaddr_in6 *sin6;
525
526         sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK);
527         bzero(sin6, sizeof *sin6);
528         sin6->sin6_family = AF_INET6;
529         sin6->sin6_len = sizeof(*sin6);
530         sin6->sin6_port = port;
531         sin6->sin6_addr = *addr_p;
532         (void)sa6_recoverscope(sin6); /* XXX: should catch errors */
533
534         return (struct sockaddr *)sin6;
535 }
536
537 struct sockaddr *
538 in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p)
539 {
540         struct sockaddr_in sin;
541         struct sockaddr_in6 *sin6_p;
542
543         bzero(&sin, sizeof sin);
544         sin.sin_family = AF_INET;
545         sin.sin_len = sizeof(sin);
546         sin.sin_port = port;
547         sin.sin_addr = *addr_p;
548
549         sin6_p = malloc(sizeof *sin6_p, M_SONAME,
550                 M_WAITOK);
551         in6_sin_2_v4mapsin6(&sin, sin6_p);
552
553         return (struct sockaddr *)sin6_p;
554 }
555
556 int
557 in6_getsockaddr(struct socket *so, struct sockaddr **nam)
558 {
559         struct inpcb *inp;
560         struct in6_addr addr;
561         in_port_t port;
562
563         inp = sotoinpcb(so);
564         KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL"));
565
566         INP_RLOCK(inp);
567         port = inp->inp_lport;
568         addr = inp->in6p_laddr;
569         INP_RUNLOCK(inp);
570
571         *nam = in6_sockaddr(port, &addr);
572         return 0;
573 }
574
575 int
576 in6_getpeeraddr(struct socket *so, struct sockaddr **nam)
577 {
578         struct inpcb *inp;
579         struct in6_addr addr;
580         in_port_t port;
581
582         inp = sotoinpcb(so);
583         KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL"));
584
585         INP_RLOCK(inp);
586         port = inp->inp_fport;
587         addr = inp->in6p_faddr;
588         INP_RUNLOCK(inp);
589
590         *nam = in6_sockaddr(port, &addr);
591         return 0;
592 }
593
594 int
595 in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
596 {
597         struct  inpcb *inp;
598         int     error;
599
600         inp = sotoinpcb(so);
601         KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL"));
602
603 #ifdef INET
604         if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
605                 error = in_getsockaddr(so, nam);
606                 if (error == 0)
607                         in6_sin_2_v4mapsin6_in_sock(nam);
608         } else
609 #endif
610         {
611                 /* scope issues will be handled in in6_getsockaddr(). */
612                 error = in6_getsockaddr(so, nam);
613         }
614
615         return error;
616 }
617
618 int
619 in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
620 {
621         struct  inpcb *inp;
622         int     error;
623
624         inp = sotoinpcb(so);
625         KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL"));
626
627 #ifdef INET
628         if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
629                 error = in_getpeeraddr(so, nam);
630                 if (error == 0)
631                         in6_sin_2_v4mapsin6_in_sock(nam);
632         } else
633 #endif
634         /* scope issues will be handled in in6_getpeeraddr(). */
635         error = in6_getpeeraddr(so, nam);
636
637         return error;
638 }
639
640 /*
641  * Pass some notification to all connections of a protocol
642  * associated with address dst.  The local address and/or port numbers
643  * may be specified to limit the search.  The "usual action" will be
644  * taken, depending on the ctlinput cmd.  The caller must filter any
645  * cmds that are uninteresting (e.g., no error in the map).
646  * Call the protocol specific routine (if any) to report
647  * any errors for each matching socket.
648  */
649 static bool
650 inp_match6(const struct inpcb *inp, void *v __unused)
651 {
652
653         return ((inp->inp_vflag & INP_IPV6) != 0);
654 }
655
656 void
657 in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr_in6 *sa6_dst,
658     u_int fport_arg, const struct sockaddr_in6 *src, u_int lport_arg,
659     int errno, void *cmdarg,
660     struct inpcb *(*notify)(struct inpcb *, int))
661 {
662         struct inpcb_iterator inpi = INP_ITERATOR(pcbinfo, INPLOOKUP_WLOCKPCB,
663             inp_match6, NULL);
664         struct inpcb *inp;
665         struct sockaddr_in6 sa6_src;
666         u_short fport = fport_arg, lport = lport_arg;
667         u_int32_t flowinfo;
668
669         if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
670                 return;
671
672         /*
673          * note that src can be NULL when we get notify by local fragmentation.
674          */
675         sa6_src = (src == NULL) ? sa6_any : *src;
676         flowinfo = sa6_src.sin6_flowinfo;
677
678         while ((inp = inp_next(&inpi)) != NULL) {
679                 INP_WLOCK_ASSERT(inp);
680                 /*
681                  * If the error designates a new path MTU for a destination
682                  * and the application (associated with this socket) wanted to
683                  * know the value, notify.
684                  * XXX: should we avoid to notify the value to TCP sockets?
685                  */
686                 if (errno == EMSGSIZE && cmdarg != NULL)
687                         ip6_notify_pmtu(inp, sa6_dst, *(uint32_t *)cmdarg);
688
689                 /*
690                  * Detect if we should notify the error. If no source and
691                  * destination ports are specified, but non-zero flowinfo and
692                  * local address match, notify the error. This is the case
693                  * when the error is delivered with an encrypted buffer
694                  * by ESP. Otherwise, just compare addresses and ports
695                  * as usual.
696                  */
697                 if (lport == 0 && fport == 0 && flowinfo &&
698                     inp->inp_socket != NULL &&
699                     flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) &&
700                     IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr))
701                         goto do_notify;
702                 else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
703                                              &sa6_dst->sin6_addr) ||
704                          inp->inp_socket == 0 ||
705                          (lport && inp->inp_lport != lport) ||
706                          (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
707                           !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
708                                               &sa6_src.sin6_addr)) ||
709                          (fport && inp->inp_fport != fport)) {
710                         continue;
711                 }
712
713           do_notify:
714                 if (notify)
715                         (*notify)(inp, errno);
716         }
717 }
718
719 /*
720  * Lookup a PCB based on the local address and port.  Caller must hold the
721  * hash lock.  No inpcb locks or references are acquired.
722  */
723 struct inpcb *
724 in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
725     u_short lport, int lookupflags, struct ucred *cred)
726 {
727         struct inpcb *inp;
728         int matchwild = 3, wildcard;
729
730         KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
731             ("%s: invalid lookup flags %d", __func__, lookupflags));
732
733         INP_HASH_LOCK_ASSERT(pcbinfo);
734
735         if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
736                 struct inpcbhead *head;
737                 /*
738                  * Look for an unconnected (wildcard foreign addr) PCB that
739                  * matches the local address and port we're looking for.
740                  */
741                 head = &pcbinfo->ipi_hashbase[INP_PCBHASH_WILD(lport,
742                     pcbinfo->ipi_hashmask)];
743                 CK_LIST_FOREACH(inp, head, inp_hash) {
744                         /* XXX inp locking */
745                         if ((inp->inp_vflag & INP_IPV6) == 0)
746                                 continue;
747                         if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
748                             IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
749                             inp->inp_lport == lport) {
750                                 /* Found. */
751                                 if (prison_equal_ip6(cred->cr_prison,
752                                     inp->inp_cred->cr_prison))
753                                         return (inp);
754                         }
755                 }
756                 /*
757                  * Not found.
758                  */
759                 return (NULL);
760         } else {
761                 struct inpcbporthead *porthash;
762                 struct inpcbport *phd;
763                 struct inpcb *match = NULL;
764                 /*
765                  * Best fit PCB lookup.
766                  *
767                  * First see if this local port is in use by looking on the
768                  * port hash list.
769                  */
770                 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
771                     pcbinfo->ipi_porthashmask)];
772                 CK_LIST_FOREACH(phd, porthash, phd_hash) {
773                         if (phd->phd_port == lport)
774                                 break;
775                 }
776                 if (phd != NULL) {
777                         /*
778                          * Port is in use by one or more PCBs. Look for best
779                          * fit.
780                          */
781                         CK_LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
782                                 wildcard = 0;
783                                 if (!prison_equal_ip6(cred->cr_prison,
784                                     inp->inp_cred->cr_prison))
785                                         continue;
786                                 /* XXX inp locking */
787                                 if ((inp->inp_vflag & INP_IPV6) == 0)
788                                         continue;
789                                 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
790                                         wildcard++;
791                                 if (!IN6_IS_ADDR_UNSPECIFIED(
792                                         &inp->in6p_laddr)) {
793                                         if (IN6_IS_ADDR_UNSPECIFIED(laddr))
794                                                 wildcard++;
795                                         else if (!IN6_ARE_ADDR_EQUAL(
796                                             &inp->in6p_laddr, laddr))
797                                                 continue;
798                                 } else {
799                                         if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
800                                                 wildcard++;
801                                 }
802                                 if (wildcard < matchwild) {
803                                         match = inp;
804                                         matchwild = wildcard;
805                                         if (matchwild == 0)
806                                                 break;
807                                 }
808                         }
809                 }
810                 return (match);
811         }
812 }
813
814 static bool
815 in6_multi_match(const struct inpcb *inp, void *v __unused)
816 {
817
818         if ((inp->inp_vflag & INP_IPV6) && inp->in6p_moptions != NULL)
819                 return (true);
820         else
821                 return (false);
822 }
823
824 void
825 in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
826 {
827         struct inpcb_iterator inpi = INP_ITERATOR(pcbinfo, INPLOOKUP_RLOCKPCB,
828             in6_multi_match, NULL);
829         struct inpcb *inp;
830         struct in6_multi *inm;
831         struct in6_mfilter *imf;
832         struct ip6_moptions *im6o;
833
834         IN6_MULTI_LOCK_ASSERT();
835
836         while ((inp = inp_next(&inpi)) != NULL) {
837                 INP_RLOCK_ASSERT(inp);
838
839                 im6o = inp->in6p_moptions;
840                 /*
841                  * Unselect the outgoing ifp for multicast if it
842                  * is being detached.
843                  */
844                 if (im6o->im6o_multicast_ifp == ifp)
845                         im6o->im6o_multicast_ifp = NULL;
846                 /*
847                  * Drop multicast group membership if we joined
848                  * through the interface being detached.
849                  */
850 restart:
851                 IP6_MFILTER_FOREACH(imf, &im6o->im6o_head) {
852                         if ((inm = imf->im6f_in6m) == NULL)
853                                 continue;
854                         if (inm->in6m_ifp != ifp)
855                                 continue;
856                         ip6_mfilter_remove(&im6o->im6o_head, imf);
857                         in6_leavegroup_locked(inm, NULL);
858                         ip6_mfilter_free(imf);
859                         goto restart;
860                 }
861         }
862 }
863
864 /*
865  * Check for alternatives when higher level complains
866  * about service problems.  For now, invalidate cached
867  * routing information.  If the route was created dynamically
868  * (by a redirect), time to try a default gateway again.
869  */
870 void
871 in6_losing(struct inpcb *inp)
872 {
873
874         RO_INVALIDATE_CACHE(&inp->inp_route6);
875 }
876
877 /*
878  * After a routing change, flush old routing
879  * and allocate a (hopefully) better one.
880  */
881 struct inpcb *
882 in6_rtchange(struct inpcb *inp, int errno __unused)
883 {
884
885         RO_INVALIDATE_CACHE(&inp->inp_route6);
886         return inp;
887 }
888
889 static bool
890 in6_pcblookup_lb_numa_match(const struct inpcblbgroup *grp, int domain)
891 {
892         return (domain == M_NODOM || domain == grp->il_numa_domain);
893 }
894
895 static struct inpcb *
896 in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo,
897     const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr,
898     uint16_t fport, int lookupflags, uint8_t domain)
899 {
900         const struct inpcblbgrouphead *hdr;
901         struct inpcblbgroup *grp;
902         struct inpcblbgroup *jail_exact, *jail_wild, *local_exact, *local_wild;
903
904         INP_HASH_LOCK_ASSERT(pcbinfo);
905
906         hdr = &pcbinfo->ipi_lbgrouphashbase[
907             INP_PCBPORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)];
908
909         /*
910          * Search for an LB group match based on the following criteria:
911          * - prefer jailed groups to non-jailed groups
912          * - prefer exact source address matches to wildcard matches
913          * - prefer groups bound to the specified NUMA domain 
914          */
915         jail_exact = jail_wild = local_exact = local_wild = NULL;
916         CK_LIST_FOREACH(grp, hdr, il_list) {
917                 bool injail;
918
919 #ifdef INET
920                 if (!(grp->il_vflag & INP_IPV6))
921                         continue;
922 #endif
923                 if (grp->il_lport != lport)
924                         continue;
925
926                 injail = prison_flag(grp->il_cred, PR_IP6) != 0;
927                 if (injail && prison_check_ip6_locked(grp->il_cred->cr_prison,
928                     laddr) != 0)
929                         continue;
930
931                 if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) {
932                         if (injail) {
933                                 jail_exact = grp;
934                                 if (in6_pcblookup_lb_numa_match(grp, domain))
935                                         /* This is a perfect match. */
936                                         goto out;
937                         } else if (local_exact == NULL ||
938                             in6_pcblookup_lb_numa_match(grp, domain)) {
939                                 local_exact = grp;
940                         }
941                 } else if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) &&
942                     (lookupflags & INPLOOKUP_WILDCARD) != 0) {
943                         if (injail) {
944                                 if (jail_wild == NULL ||
945                                     in6_pcblookup_lb_numa_match(grp, domain))
946                                         jail_wild = grp;
947                         } else if (local_wild == NULL ||
948                             in6_pcblookup_lb_numa_match(grp, domain)) {
949                                 local_wild = grp;
950                         }
951                 }
952         }
953
954         if (jail_exact != NULL)
955                 grp = jail_exact;
956         else if (jail_wild != NULL)
957                 grp = jail_wild;
958         else if (local_exact != NULL)
959                 grp = local_exact;
960         else
961                 grp = local_wild;
962         if (grp == NULL)
963                 return (NULL);
964 out:
965         return (grp->il_inp[INP6_PCBLBGROUP_PKTHASH(faddr, lport, fport) %
966             grp->il_inpcnt]);
967 }
968
969 /*
970  * Lookup PCB in hash list.  Used in in_pcb.c as well as here.
971  */
972 struct inpcb *
973 in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
974     u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
975     int lookupflags, struct ifnet *ifp, uint8_t numa_domain)
976 {
977         struct inpcbhead *head;
978         struct inpcb *inp, *tmpinp;
979         u_short fport = fport_arg, lport = lport_arg;
980
981         KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
982             ("%s: invalid lookup flags %d", __func__, lookupflags));
983
984         INP_HASH_LOCK_ASSERT(pcbinfo);
985
986         /*
987          * First look for an exact match.
988          */
989         tmpinp = NULL;
990         head = &pcbinfo->ipi_hashbase[INP6_PCBHASH(faddr, lport, fport,
991             pcbinfo->ipi_hashmask)];
992         CK_LIST_FOREACH(inp, head, inp_hash) {
993                 /* XXX inp locking */
994                 if ((inp->inp_vflag & INP_IPV6) == 0)
995                         continue;
996                 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
997                     IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
998                     inp->inp_fport == fport &&
999                     inp->inp_lport == lport) {
1000                         /*
1001                          * XXX We should be able to directly return
1002                          * the inp here, without any checks.
1003                          * Well unless both bound with SO_REUSEPORT?
1004                          */
1005                         if (prison_flag(inp->inp_cred, PR_IP6))
1006                                 return (inp);
1007                         if (tmpinp == NULL)
1008                                 tmpinp = inp;
1009                 }
1010         }
1011         if (tmpinp != NULL)
1012                 return (tmpinp);
1013
1014         /*
1015          * Then look for a wildcard match, if requested.
1016          */
1017         if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
1018                 struct inpcb *local_wild = NULL, *local_exact = NULL;
1019                 struct inpcb *jail_wild = NULL;
1020                 int injail;
1021
1022                 /*
1023                  * First see if an LB group matches the request before scanning
1024                  * all sockets on this port.
1025                  */
1026                 inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr,
1027                     fport, lookupflags, numa_domain);
1028                 if (inp != NULL)
1029                         return (inp);
1030
1031                 /*
1032                  * Order of socket selection - we always prefer jails.
1033                  *      1. jailed, non-wild.
1034                  *      2. jailed, wild.
1035                  *      3. non-jailed, non-wild.
1036                  *      4. non-jailed, wild.
1037                  */
1038                 head = &pcbinfo->ipi_hashbase[INP_PCBHASH_WILD(lport,
1039                     pcbinfo->ipi_hashmask)];
1040                 CK_LIST_FOREACH(inp, head, inp_hash) {
1041                         /* XXX inp locking */
1042                         if ((inp->inp_vflag & INP_IPV6) == 0)
1043                                 continue;
1044
1045                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
1046                             inp->inp_lport != lport) {
1047                                 continue;
1048                         }
1049
1050                         injail = prison_flag(inp->inp_cred, PR_IP6);
1051                         if (injail) {
1052                                 if (prison_check_ip6_locked(
1053                                     inp->inp_cred->cr_prison, laddr) != 0)
1054                                         continue;
1055                         } else {
1056                                 if (local_exact != NULL)
1057                                         continue;
1058                         }
1059
1060                         if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
1061                                 if (injail)
1062                                         return (inp);
1063                                 else
1064                                         local_exact = inp;
1065                         } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1066                                 if (injail)
1067                                         jail_wild = inp;
1068                                 else
1069                                         local_wild = inp;
1070                         }
1071                 } /* LIST_FOREACH */
1072
1073                 if (jail_wild != NULL)
1074                         return (jail_wild);
1075                 if (local_exact != NULL)
1076                         return (local_exact);
1077                 if (local_wild != NULL)
1078                         return (local_wild);
1079         } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
1080
1081         /*
1082          * Not found.
1083          */
1084         return (NULL);
1085 }
1086
1087 /*
1088  * Lookup PCB in hash list, using pcbinfo tables.  This variation locks the
1089  * hash list lock, and will return the inpcb locked (i.e., requires
1090  * INPLOOKUP_LOCKPCB).
1091  */
1092 static struct inpcb *
1093 in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
1094     u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
1095     struct ifnet *ifp, uint8_t numa_domain)
1096 {
1097         struct inpcb *inp;
1098
1099         smr_enter(pcbinfo->ipi_smr);
1100         inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
1101             lookupflags & INPLOOKUP_WILDCARD, ifp, numa_domain);
1102         if (inp != NULL) {
1103                 if (__predict_false(inp_smr_lock(inp,
1104                     (lookupflags & INPLOOKUP_LOCKMASK)) == false))
1105                         inp = NULL;
1106         } else
1107                 smr_exit(pcbinfo->ipi_smr);
1108
1109         return (inp);
1110 }
1111
1112 /*
1113  * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
1114  * from which a pre-calculated hash value may be extracted.
1115  */
1116 struct inpcb *
1117 in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
1118     struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
1119 {
1120
1121         KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
1122             ("%s: invalid lookup flags %d", __func__, lookupflags));
1123         KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
1124             ("%s: LOCKPCB not set", __func__));
1125
1126         return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
1127             lookupflags, ifp, M_NODOM));
1128 }
1129
1130 struct inpcb *
1131 in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
1132     u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
1133     struct ifnet *ifp, struct mbuf *m)
1134 {
1135
1136         KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
1137             ("%s: invalid lookup flags %d", __func__, lookupflags));
1138         KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
1139             ("%s: LOCKPCB not set", __func__));
1140
1141         return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
1142             lookupflags, ifp, m->m_pkthdr.numa_domain));
1143 }
1144
1145 void
1146 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int srcordst)
1147 {
1148         struct ip6_hdr *ip;
1149
1150         ip = mtod(m, struct ip6_hdr *);
1151         bzero(sin6, sizeof(*sin6));
1152         sin6->sin6_len = sizeof(*sin6);
1153         sin6->sin6_family = AF_INET6;
1154         sin6->sin6_addr = srcordst ? ip->ip6_dst : ip->ip6_src;
1155
1156         (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */
1157
1158         return;
1159 }