]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/udp_usrreq.c
MFV r305420:
[FreeBSD/FreeBSD.git] / sys / netinet / udp_usrreq.c
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3  *      The Regents of the University of California.
4  * Copyright (c) 2008 Robert N. M. Watson
5  * Copyright (c) 2010-2011 Juniper Networks, Inc.
6  * Copyright (c) 2014 Kevin Lo
7  * All rights reserved.
8  *
9  * Portions of this software were developed by Robert N. M. Watson under
10  * contract to Juniper Networks, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      @(#)udp_usrreq.c        8.6 (Berkeley) 5/23/95
37  */
38
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41
42 #include "opt_inet.h"
43 #include "opt_inet6.h"
44 #include "opt_ipsec.h"
45 #include "opt_rss.h"
46
47 #include <sys/param.h>
48 #include <sys/domain.h>
49 #include <sys/eventhandler.h>
50 #include <sys/jail.h>
51 #include <sys/kernel.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/priv.h>
56 #include <sys/proc.h>
57 #include <sys/protosw.h>
58 #include <sys/sdt.h>
59 #include <sys/signalvar.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/sx.h>
63 #include <sys/sysctl.h>
64 #include <sys/syslog.h>
65 #include <sys/systm.h>
66
67 #include <vm/uma.h>
68
69 #include <net/if.h>
70 #include <net/if_var.h>
71 #include <net/route.h>
72 #include <net/rss_config.h>
73
74 #include <netinet/in.h>
75 #include <netinet/in_kdtrace.h>
76 #include <netinet/in_pcb.h>
77 #include <netinet/in_systm.h>
78 #include <netinet/in_var.h>
79 #include <netinet/ip.h>
80 #ifdef INET6
81 #include <netinet/ip6.h>
82 #endif
83 #include <netinet/ip_icmp.h>
84 #include <netinet/icmp_var.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/ip_options.h>
87 #ifdef INET6
88 #include <netinet6/ip6_var.h>
89 #endif
90 #include <netinet/udp.h>
91 #include <netinet/udp_var.h>
92 #include <netinet/udplite.h>
93 #include <netinet/in_rss.h>
94
95 #ifdef IPSEC
96 #include <netipsec/ipsec.h>
97 #include <netipsec/esp.h>
98 #endif
99
100 #include <machine/in_cksum.h>
101
102 #include <security/mac/mac_framework.h>
103
104 /*
105  * UDP and UDP-Lite protocols implementation.
106  * Per RFC 768, August, 1980.
107  * Per RFC 3828, July, 2004.
108  */
109
110 /*
111  * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
112  * removes the only data integrity mechanism for packets and malformed
113  * packets that would otherwise be discarded due to bad checksums, and may
114  * cause problems (especially for NFS data blocks).
115  */
116 VNET_DEFINE(int, udp_cksum) = 1;
117 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW,
118     &VNET_NAME(udp_cksum), 0, "compute udp checksum");
119
120 int     udp_log_in_vain = 0;
121 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
122     &udp_log_in_vain, 0, "Log all incoming UDP packets");
123
124 VNET_DEFINE(int, udp_blackhole) = 0;
125 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
126     &VNET_NAME(udp_blackhole), 0,
127     "Do not send port unreachables for refused connects");
128
129 static VNET_DEFINE(int, udp_require_l2_bcast) = 0;
130 #define V_udp_require_l2_bcast          VNET(udp_require_l2_bcast)
131 SYSCTL_INT(_net_inet_udp, OID_AUTO, require_l2_bcast, CTLFLAG_VNET | CTLFLAG_RW,
132     &VNET_NAME(udp_require_l2_bcast), 0,
133     "Only treat packets sent to an L2 broadcast address as broadcast packets");
134
135 u_long  udp_sendspace = 9216;           /* really max datagram size */
136 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
137     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
138
139 u_long  udp_recvspace = 40 * (1024 +
140 #ifdef INET6
141                                       sizeof(struct sockaddr_in6)
142 #else
143                                       sizeof(struct sockaddr_in)
144 #endif
145                                       );        /* 40 1K datagrams */
146
147 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
148     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
149
150 VNET_DEFINE(struct inpcbhead, udb);             /* from udp_var.h */
151 VNET_DEFINE(struct inpcbinfo, udbinfo);
152 VNET_DEFINE(struct inpcbhead, ulitecb);
153 VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
154 static VNET_DEFINE(uma_zone_t, udpcb_zone);
155 #define V_udpcb_zone                    VNET(udpcb_zone)
156
157 #ifndef UDBHASHSIZE
158 #define UDBHASHSIZE     128
159 #endif
160
161 VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat);          /* from udp_var.h */
162 VNET_PCPUSTAT_SYSINIT(udpstat);
163 SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
164     udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
165
166 #ifdef VIMAGE
167 VNET_PCPUSTAT_SYSUNINIT(udpstat);
168 #endif /* VIMAGE */
169 #ifdef INET
170 static void     udp_detach(struct socket *so);
171 static int      udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
172                     struct mbuf *, struct thread *);
173 #endif
174
175 #ifdef IPSEC
176 #ifdef IPSEC_NAT_T
177 #define UF_ESPINUDP_ALL (UF_ESPINUDP_NON_IKE|UF_ESPINUDP)
178 #ifdef INET
179 static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int);
180 #endif
181 #endif /* IPSEC_NAT_T */
182 #endif /* IPSEC */
183
184 static void
185 udp_zone_change(void *tag)
186 {
187
188         uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
189         uma_zone_set_max(V_udpcb_zone, maxsockets);
190 }
191
192 static int
193 udp_inpcb_init(void *mem, int size, int flags)
194 {
195         struct inpcb *inp;
196
197         inp = mem;
198         INP_LOCK_INIT(inp, "inp", "udpinp");
199         return (0);
200 }
201
202 static int
203 udplite_inpcb_init(void *mem, int size, int flags)
204 {
205         struct inpcb *inp;
206
207         inp = mem;
208         INP_LOCK_INIT(inp, "inp", "udpliteinp");
209         return (0);
210 }
211
212 void
213 udp_init(void)
214 {
215
216         /*
217          * For now default to 2-tuple UDP hashing - until the fragment
218          * reassembly code can also update the flowid.
219          *
220          * Once we can calculate the flowid that way and re-establish
221          * a 4-tuple, flip this to 4-tuple.
222          */
223         in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
224             "udp_inpcb", udp_inpcb_init, NULL, 0,
225             IPI_HASHFIELDS_2TUPLE);
226         V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
227             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
228         uma_zone_set_max(V_udpcb_zone, maxsockets);
229         uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
230         EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
231             EVENTHANDLER_PRI_ANY);
232 }
233
234 void
235 udplite_init(void)
236 {
237
238         in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
239             UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL,
240             0, IPI_HASHFIELDS_2TUPLE);
241 }
242
243 /*
244  * Kernel module interface for updating udpstat.  The argument is an index
245  * into udpstat treated as an array of u_long.  While this encodes the
246  * general layout of udpstat into the caller, it doesn't encode its location,
247  * so that future changes to add, for example, per-CPU stats support won't
248  * cause binary compatibility problems for kernel modules.
249  */
250 void
251 kmod_udpstat_inc(int statnum)
252 {
253
254         counter_u64_add(VNET(udpstat)[statnum], 1);
255 }
256
257 int
258 udp_newudpcb(struct inpcb *inp)
259 {
260         struct udpcb *up;
261
262         up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO);
263         if (up == NULL)
264                 return (ENOBUFS);
265         inp->inp_ppcb = up;
266         return (0);
267 }
268
269 void
270 udp_discardcb(struct udpcb *up)
271 {
272
273         uma_zfree(V_udpcb_zone, up);
274 }
275
276 #ifdef VIMAGE
277 static void
278 udp_destroy(void *unused __unused)
279 {
280
281         in_pcbinfo_destroy(&V_udbinfo);
282         uma_zdestroy(V_udpcb_zone);
283 }
284 VNET_SYSUNINIT(udp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udp_destroy, NULL);
285
286 static void
287 udplite_destroy(void *unused __unused)
288 {
289
290         in_pcbinfo_destroy(&V_ulitecbinfo);
291 }
292 VNET_SYSUNINIT(udplite, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, udplite_destroy,
293     NULL);
294 #endif
295
296 #ifdef INET
297 /*
298  * Subroutine of udp_input(), which appends the provided mbuf chain to the
299  * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
300  * contains the source address.  If the socket ends up being an IPv6 socket,
301  * udp_append() will convert to a sockaddr_in6 before passing the address
302  * into the socket code.
303  *
304  * In the normal case udp_append() will return 0, indicating that you
305  * must unlock the inp. However if a tunneling protocol is in place we increment
306  * the inpcb refcnt and unlock the inp, on return from the tunneling protocol we
307  * then decrement the reference count. If the inp_rele returns 1, indicating the
308  * inp is gone, we return that to the caller to tell them *not* to unlock
309  * the inp. In the case of multi-cast this will cause the distribution
310  * to stop (though most tunneling protocols known currently do *not* use
311  * multicast).
312  */
313 static int
314 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
315     struct sockaddr_in *udp_in)
316 {
317         struct sockaddr *append_sa;
318         struct socket *so;
319         struct mbuf *opts = NULL;
320 #ifdef INET6
321         struct sockaddr_in6 udp_in6;
322 #endif
323         struct udpcb *up;
324
325         INP_LOCK_ASSERT(inp);
326
327         /*
328          * Engage the tunneling protocol.
329          */
330         up = intoudpcb(inp);
331         if (up->u_tun_func != NULL) {
332                 in_pcbref(inp);
333                 INP_RUNLOCK(inp);
334                 (*up->u_tun_func)(n, off, inp, (struct sockaddr *)udp_in,
335                     up->u_tun_ctx);
336                 INP_RLOCK(inp);
337                 return (in_pcbrele_rlocked(inp));
338         }
339
340         off += sizeof(struct udphdr);
341
342 #ifdef IPSEC
343         /* Check AH/ESP integrity. */
344         if (ipsec4_in_reject(n, inp)) {
345                 m_freem(n);
346                 return (0);
347         }
348 #ifdef IPSEC_NAT_T
349         up = intoudpcb(inp);
350         KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
351         if (up->u_flags & UF_ESPINUDP_ALL) {    /* IPSec UDP encaps. */
352                 n = udp4_espdecap(inp, n, off);
353                 if (n == NULL)                          /* Consumed. */
354                         return (0);
355         }
356 #endif /* IPSEC_NAT_T */
357 #endif /* IPSEC */
358 #ifdef MAC
359         if (mac_inpcb_check_deliver(inp, n) != 0) {
360                 m_freem(n);
361                 return (0);
362         }
363 #endif /* MAC */
364         if (inp->inp_flags & INP_CONTROLOPTS ||
365             inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
366 #ifdef INET6
367                 if (inp->inp_vflag & INP_IPV6)
368                         (void)ip6_savecontrol_v4(inp, n, &opts, NULL);
369                 else
370 #endif /* INET6 */
371                         ip_savecontrol(inp, &opts, ip, n);
372         }
373 #ifdef INET6
374         if (inp->inp_vflag & INP_IPV6) {
375                 bzero(&udp_in6, sizeof(udp_in6));
376                 udp_in6.sin6_len = sizeof(udp_in6);
377                 udp_in6.sin6_family = AF_INET6;
378                 in6_sin_2_v4mapsin6(udp_in, &udp_in6);
379                 append_sa = (struct sockaddr *)&udp_in6;
380         } else
381 #endif /* INET6 */
382                 append_sa = (struct sockaddr *)udp_in;
383         m_adj(n, off);
384
385         so = inp->inp_socket;
386         SOCKBUF_LOCK(&so->so_rcv);
387         if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
388                 SOCKBUF_UNLOCK(&so->so_rcv);
389                 m_freem(n);
390                 if (opts)
391                         m_freem(opts);
392                 UDPSTAT_INC(udps_fullsock);
393         } else
394                 sorwakeup_locked(so);
395         return (0);
396 }
397
398 int
399 udp_input(struct mbuf **mp, int *offp, int proto)
400 {
401         struct ip *ip;
402         struct udphdr *uh;
403         struct ifnet *ifp;
404         struct inpcb *inp;
405         uint16_t len, ip_len;
406         struct inpcbinfo *pcbinfo;
407         struct ip save_ip;
408         struct sockaddr_in udp_in;
409         struct mbuf *m;
410         struct m_tag *fwd_tag;
411         int cscov_partial, iphlen;
412
413         m = *mp;
414         iphlen = *offp;
415         ifp = m->m_pkthdr.rcvif;
416         *mp = NULL;
417         UDPSTAT_INC(udps_ipackets);
418
419         /*
420          * Strip IP options, if any; should skip this, make available to
421          * user, and use on returned packets, but we don't yet have a way to
422          * check the checksum with options still present.
423          */
424         if (iphlen > sizeof (struct ip)) {
425                 ip_stripoptions(m);
426                 iphlen = sizeof(struct ip);
427         }
428
429         /*
430          * Get IP and UDP header together in first mbuf.
431          */
432         ip = mtod(m, struct ip *);
433         if (m->m_len < iphlen + sizeof(struct udphdr)) {
434                 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
435                         UDPSTAT_INC(udps_hdrops);
436                         return (IPPROTO_DONE);
437                 }
438                 ip = mtod(m, struct ip *);
439         }
440         uh = (struct udphdr *)((caddr_t)ip + iphlen);
441         cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
442
443         /*
444          * Destination port of 0 is illegal, based on RFC768.
445          */
446         if (uh->uh_dport == 0)
447                 goto badunlocked;
448
449         /*
450          * Construct sockaddr format source address.  Stuff source address
451          * and datagram in user buffer.
452          */
453         bzero(&udp_in, sizeof(udp_in));
454         udp_in.sin_len = sizeof(udp_in);
455         udp_in.sin_family = AF_INET;
456         udp_in.sin_port = uh->uh_sport;
457         udp_in.sin_addr = ip->ip_src;
458
459         /*
460          * Make mbuf data length reflect UDP length.  If not enough data to
461          * reflect UDP length, drop.
462          */
463         len = ntohs((u_short)uh->uh_ulen);
464         ip_len = ntohs(ip->ip_len) - iphlen;
465         if (proto == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
466                 /* Zero means checksum over the complete packet. */
467                 if (len == 0)
468                         len = ip_len;
469                 cscov_partial = 0;
470         }
471         if (ip_len != len) {
472                 if (len > ip_len || len < sizeof(struct udphdr)) {
473                         UDPSTAT_INC(udps_badlen);
474                         goto badunlocked;
475                 }
476                 if (proto == IPPROTO_UDP)
477                         m_adj(m, len - ip_len);
478         }
479
480         /*
481          * Save a copy of the IP header in case we want restore it for
482          * sending an ICMP error message in response.
483          */
484         if (!V_udp_blackhole)
485                 save_ip = *ip;
486         else
487                 memset(&save_ip, 0, sizeof(save_ip));
488
489         /*
490          * Checksum extended UDP header and data.
491          */
492         if (uh->uh_sum) {
493                 u_short uh_sum;
494
495                 if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
496                     !cscov_partial) {
497                         if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
498                                 uh_sum = m->m_pkthdr.csum_data;
499                         else
500                                 uh_sum = in_pseudo(ip->ip_src.s_addr,
501                                     ip->ip_dst.s_addr, htonl((u_short)len +
502                                     m->m_pkthdr.csum_data + proto));
503                         uh_sum ^= 0xffff;
504                 } else {
505                         char b[9];
506
507                         bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
508                         bzero(((struct ipovly *)ip)->ih_x1, 9);
509                         ((struct ipovly *)ip)->ih_len = (proto == IPPROTO_UDP) ?
510                             uh->uh_ulen : htons(ip_len);
511                         uh_sum = in_cksum(m, len + sizeof (struct ip));
512                         bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
513                 }
514                 if (uh_sum) {
515                         UDPSTAT_INC(udps_badsum);
516                         m_freem(m);
517                         return (IPPROTO_DONE);
518                 }
519         } else {
520                 if (proto == IPPROTO_UDP) {
521                         UDPSTAT_INC(udps_nosum);
522                 } else {
523                         /* UDPLite requires a checksum */
524                         /* XXX: What is the right UDPLite MIB counter here? */
525                         m_freem(m);
526                         return (IPPROTO_DONE);
527                 }
528         }
529
530         pcbinfo = udp_get_inpcbinfo(proto);
531         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
532             ((!V_udp_require_l2_bcast || m->m_flags & M_BCAST) &&
533             in_broadcast(ip->ip_dst, ifp))) {
534                 struct inpcb *last;
535                 struct inpcbhead *pcblist;
536                 struct ip_moptions *imo;
537
538                 INP_INFO_RLOCK(pcbinfo);
539                 pcblist = udp_get_pcblist(proto);
540                 last = NULL;
541                 LIST_FOREACH(inp, pcblist, inp_list) {
542                         if (inp->inp_lport != uh->uh_dport)
543                                 continue;
544 #ifdef INET6
545                         if ((inp->inp_vflag & INP_IPV4) == 0)
546                                 continue;
547 #endif
548                         if (inp->inp_laddr.s_addr != INADDR_ANY &&
549                             inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
550                                 continue;
551                         if (inp->inp_faddr.s_addr != INADDR_ANY &&
552                             inp->inp_faddr.s_addr != ip->ip_src.s_addr)
553                                 continue;
554                         if (inp->inp_fport != 0 &&
555                             inp->inp_fport != uh->uh_sport)
556                                 continue;
557
558                         INP_RLOCK(inp);
559
560                         /*
561                          * XXXRW: Because we weren't holding either the inpcb
562                          * or the hash lock when we checked for a match
563                          * before, we should probably recheck now that the
564                          * inpcb lock is held.
565                          */
566
567                         /*
568                          * Handle socket delivery policy for any-source
569                          * and source-specific multicast. [RFC3678]
570                          */
571                         imo = inp->inp_moptions;
572                         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
573                                 struct sockaddr_in       group;
574                                 int                      blocked;
575                                 if (imo == NULL) {
576                                         INP_RUNLOCK(inp);
577                                         continue;
578                                 }
579                                 bzero(&group, sizeof(struct sockaddr_in));
580                                 group.sin_len = sizeof(struct sockaddr_in);
581                                 group.sin_family = AF_INET;
582                                 group.sin_addr = ip->ip_dst;
583
584                                 blocked = imo_multi_filter(imo, ifp,
585                                         (struct sockaddr *)&group,
586                                         (struct sockaddr *)&udp_in);
587                                 if (blocked != MCAST_PASS) {
588                                         if (blocked == MCAST_NOTGMEMBER)
589                                                 IPSTAT_INC(ips_notmember);
590                                         if (blocked == MCAST_NOTSMEMBER ||
591                                             blocked == MCAST_MUTED)
592                                                 UDPSTAT_INC(udps_filtermcast);
593                                         INP_RUNLOCK(inp);
594                                         continue;
595                                 }
596                         }
597                         if (last != NULL) {
598                                 struct mbuf *n;
599
600                                 if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
601                                         UDP_PROBE(receive, NULL, last, ip,
602                                             last, uh);
603                                         if (udp_append(last, ip, n, iphlen,
604                                                 &udp_in)) {
605                                                 goto inp_lost;
606                                         }
607                                 }
608                                 INP_RUNLOCK(last);
609                         }
610                         last = inp;
611                         /*
612                          * Don't look for additional matches if this one does
613                          * not have either the SO_REUSEPORT or SO_REUSEADDR
614                          * socket options set.  This heuristic avoids
615                          * searching through all pcbs in the common case of a
616                          * non-shared port.  It assumes that an application
617                          * will never clear these options after setting them.
618                          */
619                         if ((last->inp_socket->so_options &
620                             (SO_REUSEPORT|SO_REUSEADDR)) == 0)
621                                 break;
622                 }
623
624                 if (last == NULL) {
625                         /*
626                          * No matching pcb found; discard datagram.  (No need
627                          * to send an ICMP Port Unreachable for a broadcast
628                          * or multicast datgram.)
629                          */
630                         UDPSTAT_INC(udps_noportbcast);
631                         if (inp)
632                                 INP_RUNLOCK(inp);
633                         INP_INFO_RUNLOCK(pcbinfo);
634                         goto badunlocked;
635                 }
636                 UDP_PROBE(receive, NULL, last, ip, last, uh);
637                 if (udp_append(last, ip, m, iphlen, &udp_in) == 0) 
638                         INP_RUNLOCK(last);
639         inp_lost:
640                 INP_INFO_RUNLOCK(pcbinfo);
641                 return (IPPROTO_DONE);
642         }
643
644         /*
645          * Locate pcb for datagram.
646          */
647
648         /*
649          * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
650          */
651         if ((m->m_flags & M_IP_NEXTHOP) &&
652             (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
653                 struct sockaddr_in *next_hop;
654
655                 next_hop = (struct sockaddr_in *)(fwd_tag + 1);
656
657                 /*
658                  * Transparently forwarded. Pretend to be the destination.
659                  * Already got one like this?
660                  */
661                 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
662                     ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
663                 if (!inp) {
664                         /*
665                          * It's new.  Try to find the ambushing socket.
666                          * Because we've rewritten the destination address,
667                          * any hardware-generated hash is ignored.
668                          */
669                         inp = in_pcblookup(pcbinfo, ip->ip_src,
670                             uh->uh_sport, next_hop->sin_addr,
671                             next_hop->sin_port ? htons(next_hop->sin_port) :
672                             uh->uh_dport, INPLOOKUP_WILDCARD |
673                             INPLOOKUP_RLOCKPCB, ifp);
674                 }
675                 /* Remove the tag from the packet. We don't need it anymore. */
676                 m_tag_delete(m, fwd_tag);
677                 m->m_flags &= ~M_IP_NEXTHOP;
678         } else
679                 inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
680                     ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
681                     INPLOOKUP_RLOCKPCB, ifp, m);
682         if (inp == NULL) {
683                 if (udp_log_in_vain) {
684                         char buf[4*sizeof "123"];
685
686                         strcpy(buf, inet_ntoa(ip->ip_dst));
687                         log(LOG_INFO,
688                             "Connection attempt to UDP %s:%d from %s:%d\n",
689                             buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
690                             ntohs(uh->uh_sport));
691                 }
692                 UDPSTAT_INC(udps_noport);
693                 if (m->m_flags & (M_BCAST | M_MCAST)) {
694                         UDPSTAT_INC(udps_noportbcast);
695                         goto badunlocked;
696                 }
697                 if (V_udp_blackhole)
698                         goto badunlocked;
699                 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
700                         goto badunlocked;
701                 *ip = save_ip;
702                 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
703                 return (IPPROTO_DONE);
704         }
705
706         /*
707          * Check the minimum TTL for socket.
708          */
709         INP_RLOCK_ASSERT(inp);
710         if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
711                 INP_RUNLOCK(inp);
712                 m_freem(m);
713                 return (IPPROTO_DONE);
714         }
715         if (cscov_partial) {
716                 struct udpcb *up;
717
718                 up = intoudpcb(inp);
719                 if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
720                         INP_RUNLOCK(inp);
721                         m_freem(m);
722                         return (IPPROTO_DONE);
723                 }
724         }
725
726         UDP_PROBE(receive, NULL, inp, ip, inp, uh);
727         if (udp_append(inp, ip, m, iphlen, &udp_in) == 0) 
728                 INP_RUNLOCK(inp);
729         return (IPPROTO_DONE);
730
731 badunlocked:
732         m_freem(m);
733         return (IPPROTO_DONE);
734 }
735 #endif /* INET */
736
737 /*
738  * Notify a udp user of an asynchronous error; just wake up so that they can
739  * collect error status.
740  */
741 struct inpcb *
742 udp_notify(struct inpcb *inp, int errno)
743 {
744
745         /*
746          * While udp_ctlinput() always calls udp_notify() with a read lock
747          * when invoking it directly, in_pcbnotifyall() currently uses write
748          * locks due to sharing code with TCP.  For now, accept either a read
749          * or a write lock, but a read lock is sufficient.
750          */
751         INP_LOCK_ASSERT(inp);
752         if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
753              errno == EHOSTDOWN) && inp->inp_route.ro_rt) {
754                 RTFREE(inp->inp_route.ro_rt);
755                 inp->inp_route.ro_rt = (struct rtentry *)NULL;
756         }
757
758         inp->inp_socket->so_error = errno;
759         sorwakeup(inp->inp_socket);
760         sowwakeup(inp->inp_socket);
761         return (inp);
762 }
763
764 #ifdef INET
765 static void
766 udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
767     struct inpcbinfo *pcbinfo)
768 {
769         struct ip *ip = vip;
770         struct udphdr *uh;
771         struct in_addr faddr;
772         struct inpcb *inp;
773
774         faddr = ((struct sockaddr_in *)sa)->sin_addr;
775         if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
776                 return;
777
778         if (PRC_IS_REDIRECT(cmd)) {
779                 /* signal EHOSTDOWN, as it flushes the cached route */
780                 in_pcbnotifyall(&V_udbinfo, faddr, EHOSTDOWN, udp_notify);
781                 return;
782         }
783
784         /*
785          * Hostdead is ugly because it goes linearly through all PCBs.
786          *
787          * XXX: We never get this from ICMP, otherwise it makes an excellent
788          * DoS attack on machines with many connections.
789          */
790         if (cmd == PRC_HOSTDEAD)
791                 ip = NULL;
792         else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
793                 return;
794         if (ip != NULL) {
795                 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
796                 inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
797                     ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
798                 if (inp != NULL) {
799                         INP_RLOCK_ASSERT(inp);
800                         if (inp->inp_socket != NULL) {
801                                 udp_notify(inp, inetctlerrmap[cmd]);
802                         }
803                         INP_RUNLOCK(inp);
804                 } else {
805                         inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
806                                            ip->ip_src, uh->uh_sport,
807                                            INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
808                         if (inp != NULL) {
809                                 struct udpcb *up;
810
811                                 up = intoudpcb(inp);
812                                 if (up->u_icmp_func != NULL) {
813                                         INP_RUNLOCK(inp);
814                                         (*up->u_icmp_func)(cmd, sa, vip, up->u_tun_ctx);
815                                 } else {
816                                         INP_RUNLOCK(inp);
817                                 }
818                         }
819                 }
820         } else
821                 in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
822                     udp_notify);
823 }
824 void
825 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
826 {
827
828         return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
829 }
830
831 void
832 udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
833 {
834
835         return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
836 }
837 #endif /* INET */
838
839 static int
840 udp_pcblist(SYSCTL_HANDLER_ARGS)
841 {
842         int error, i, n;
843         struct inpcb *inp, **inp_list;
844         inp_gen_t gencnt;
845         struct xinpgen xig;
846
847         /*
848          * The process of preparing the PCB list is too time-consuming and
849          * resource-intensive to repeat twice on every request.
850          */
851         if (req->oldptr == 0) {
852                 n = V_udbinfo.ipi_count;
853                 n += imax(n / 8, 10);
854                 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
855                 return (0);
856         }
857
858         if (req->newptr != 0)
859                 return (EPERM);
860
861         /*
862          * OK, now we're committed to doing something.
863          */
864         INP_INFO_RLOCK(&V_udbinfo);
865         gencnt = V_udbinfo.ipi_gencnt;
866         n = V_udbinfo.ipi_count;
867         INP_INFO_RUNLOCK(&V_udbinfo);
868
869         error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
870                 + n * sizeof(struct xinpcb));
871         if (error != 0)
872                 return (error);
873
874         xig.xig_len = sizeof xig;
875         xig.xig_count = n;
876         xig.xig_gen = gencnt;
877         xig.xig_sogen = so_gencnt;
878         error = SYSCTL_OUT(req, &xig, sizeof xig);
879         if (error)
880                 return (error);
881
882         inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
883         if (inp_list == NULL)
884                 return (ENOMEM);
885
886         INP_INFO_RLOCK(&V_udbinfo);
887         for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
888              inp = LIST_NEXT(inp, inp_list)) {
889                 INP_WLOCK(inp);
890                 if (inp->inp_gencnt <= gencnt &&
891                     cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
892                         in_pcbref(inp);
893                         inp_list[i++] = inp;
894                 }
895                 INP_WUNLOCK(inp);
896         }
897         INP_INFO_RUNLOCK(&V_udbinfo);
898         n = i;
899
900         error = 0;
901         for (i = 0; i < n; i++) {
902                 inp = inp_list[i];
903                 INP_RLOCK(inp);
904                 if (inp->inp_gencnt <= gencnt) {
905                         struct xinpcb xi;
906
907                         bzero(&xi, sizeof(xi));
908                         xi.xi_len = sizeof xi;
909                         /* XXX should avoid extra copy */
910                         bcopy(inp, &xi.xi_inp, sizeof *inp);
911                         if (inp->inp_socket)
912                                 sotoxsocket(inp->inp_socket, &xi.xi_socket);
913                         xi.xi_inp.inp_gencnt = inp->inp_gencnt;
914                         INP_RUNLOCK(inp);
915                         error = SYSCTL_OUT(req, &xi, sizeof xi);
916                 } else
917                         INP_RUNLOCK(inp);
918         }
919         INP_INFO_WLOCK(&V_udbinfo);
920         for (i = 0; i < n; i++) {
921                 inp = inp_list[i];
922                 INP_RLOCK(inp);
923                 if (!in_pcbrele_rlocked(inp))
924                         INP_RUNLOCK(inp);
925         }
926         INP_INFO_WUNLOCK(&V_udbinfo);
927
928         if (!error) {
929                 /*
930                  * Give the user an updated idea of our state.  If the
931                  * generation differs from what we told her before, she knows
932                  * that something happened while we were processing this
933                  * request, and it might be necessary to retry.
934                  */
935                 INP_INFO_RLOCK(&V_udbinfo);
936                 xig.xig_gen = V_udbinfo.ipi_gencnt;
937                 xig.xig_sogen = so_gencnt;
938                 xig.xig_count = V_udbinfo.ipi_count;
939                 INP_INFO_RUNLOCK(&V_udbinfo);
940                 error = SYSCTL_OUT(req, &xig, sizeof xig);
941         }
942         free(inp_list, M_TEMP);
943         return (error);
944 }
945
946 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
947     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
948     udp_pcblist, "S,xinpcb", "List of active UDP sockets");
949
950 #ifdef INET
951 static int
952 udp_getcred(SYSCTL_HANDLER_ARGS)
953 {
954         struct xucred xuc;
955         struct sockaddr_in addrs[2];
956         struct inpcb *inp;
957         int error;
958
959         error = priv_check(req->td, PRIV_NETINET_GETCRED);
960         if (error)
961                 return (error);
962         error = SYSCTL_IN(req, addrs, sizeof(addrs));
963         if (error)
964                 return (error);
965         inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
966             addrs[0].sin_addr, addrs[0].sin_port,
967             INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
968         if (inp != NULL) {
969                 INP_RLOCK_ASSERT(inp);
970                 if (inp->inp_socket == NULL)
971                         error = ENOENT;
972                 if (error == 0)
973                         error = cr_canseeinpcb(req->td->td_ucred, inp);
974                 if (error == 0)
975                         cru2x(inp->inp_cred, &xuc);
976                 INP_RUNLOCK(inp);
977         } else
978                 error = ENOENT;
979         if (error == 0)
980                 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
981         return (error);
982 }
983
984 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
985     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
986     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
987 #endif /* INET */
988
989 int
990 udp_ctloutput(struct socket *so, struct sockopt *sopt)
991 {
992         struct inpcb *inp;
993         struct udpcb *up;
994         int isudplite, error, optval;
995
996         error = 0;
997         isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
998         inp = sotoinpcb(so);
999         KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
1000         INP_WLOCK(inp);
1001         if (sopt->sopt_level != so->so_proto->pr_protocol) {
1002 #ifdef INET6
1003                 if (INP_CHECK_SOCKAF(so, AF_INET6)) {
1004                         INP_WUNLOCK(inp);
1005                         error = ip6_ctloutput(so, sopt);
1006                 }
1007 #endif
1008 #if defined(INET) && defined(INET6)
1009                 else
1010 #endif
1011 #ifdef INET
1012                 {
1013                         INP_WUNLOCK(inp);
1014                         error = ip_ctloutput(so, sopt);
1015                 }
1016 #endif
1017                 return (error);
1018         }
1019
1020         switch (sopt->sopt_dir) {
1021         case SOPT_SET:
1022                 switch (sopt->sopt_name) {
1023                 case UDP_ENCAP:
1024                         INP_WUNLOCK(inp);
1025                         error = sooptcopyin(sopt, &optval, sizeof optval,
1026                                             sizeof optval);
1027                         if (error)
1028                                 break;
1029                         inp = sotoinpcb(so);
1030                         KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
1031                         INP_WLOCK(inp);
1032 #ifdef IPSEC_NAT_T
1033                         up = intoudpcb(inp);
1034                         KASSERT(up != NULL, ("%s: up == NULL", __func__));
1035 #endif
1036                         switch (optval) {
1037                         case 0:
1038                                 /* Clear all UDP encap. */
1039 #ifdef IPSEC_NAT_T
1040                                 up->u_flags &= ~UF_ESPINUDP_ALL;
1041 #endif
1042                                 break;
1043 #ifdef IPSEC_NAT_T
1044                         case UDP_ENCAP_ESPINUDP:
1045                         case UDP_ENCAP_ESPINUDP_NON_IKE:
1046                                 up->u_flags &= ~UF_ESPINUDP_ALL;
1047                                 if (optval == UDP_ENCAP_ESPINUDP)
1048                                         up->u_flags |= UF_ESPINUDP;
1049                                 else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE)
1050                                         up->u_flags |= UF_ESPINUDP_NON_IKE;
1051                                 break;
1052 #endif
1053                         default:
1054                                 error = EINVAL;
1055                                 break;
1056                         }
1057                         INP_WUNLOCK(inp);
1058                         break;
1059                 case UDPLITE_SEND_CSCOV:
1060                 case UDPLITE_RECV_CSCOV:
1061                         if (!isudplite) {
1062                                 INP_WUNLOCK(inp);
1063                                 error = ENOPROTOOPT;
1064                                 break;
1065                         }
1066                         INP_WUNLOCK(inp);
1067                         error = sooptcopyin(sopt, &optval, sizeof(optval),
1068                             sizeof(optval));
1069                         if (error != 0)
1070                                 break;
1071                         inp = sotoinpcb(so);
1072                         KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
1073                         INP_WLOCK(inp);
1074                         up = intoudpcb(inp);
1075                         KASSERT(up != NULL, ("%s: up == NULL", __func__));
1076                         if ((optval != 0 && optval < 8) || (optval > 65535)) {
1077                                 INP_WUNLOCK(inp);
1078                                 error = EINVAL;
1079                                 break;
1080                         }
1081                         if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
1082                                 up->u_txcslen = optval;
1083                         else
1084                                 up->u_rxcslen = optval;
1085                         INP_WUNLOCK(inp);
1086                         break;
1087                 default:
1088                         INP_WUNLOCK(inp);
1089                         error = ENOPROTOOPT;
1090                         break;
1091                 }
1092                 break;
1093         case SOPT_GET:
1094                 switch (sopt->sopt_name) {
1095 #ifdef IPSEC_NAT_T
1096                 case UDP_ENCAP:
1097                         up = intoudpcb(inp);
1098                         KASSERT(up != NULL, ("%s: up == NULL", __func__));
1099                         optval = up->u_flags & UF_ESPINUDP_ALL;
1100                         INP_WUNLOCK(inp);
1101                         error = sooptcopyout(sopt, &optval, sizeof optval);
1102                         break;
1103 #endif
1104                 case UDPLITE_SEND_CSCOV:
1105                 case UDPLITE_RECV_CSCOV:
1106                         if (!isudplite) {
1107                                 INP_WUNLOCK(inp);
1108                                 error = ENOPROTOOPT;
1109                                 break;
1110                         }
1111                         up = intoudpcb(inp);
1112                         KASSERT(up != NULL, ("%s: up == NULL", __func__));
1113                         if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
1114                                 optval = up->u_txcslen;
1115                         else
1116                                 optval = up->u_rxcslen;
1117                         INP_WUNLOCK(inp);
1118                         error = sooptcopyout(sopt, &optval, sizeof(optval));
1119                         break;
1120                 default:
1121                         INP_WUNLOCK(inp);
1122                         error = ENOPROTOOPT;
1123                         break;
1124                 }
1125                 break;
1126         }       
1127         return (error);
1128 }
1129
1130 #ifdef INET
1131 #define UH_WLOCKED      2
1132 #define UH_RLOCKED      1
1133 #define UH_UNLOCKED     0
1134 static int
1135 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
1136     struct mbuf *control, struct thread *td)
1137 {
1138         struct udpiphdr *ui;
1139         int len = m->m_pkthdr.len;
1140         struct in_addr faddr, laddr;
1141         struct cmsghdr *cm;
1142         struct inpcbinfo *pcbinfo;
1143         struct sockaddr_in *sin, src;
1144         int cscov_partial = 0;
1145         int error = 0;
1146         int ipflags;
1147         u_short fport, lport;
1148         int unlock_udbinfo, unlock_inp;
1149         u_char tos;
1150         uint8_t pr;
1151         uint16_t cscov = 0;
1152         uint32_t flowid = 0;
1153         uint8_t flowtype = M_HASHTYPE_NONE;
1154
1155         /*
1156          * udp_output() may need to temporarily bind or connect the current
1157          * inpcb.  As such, we don't know up front whether we will need the
1158          * pcbinfo lock or not.  Do any work to decide what is needed up
1159          * front before acquiring any locks.
1160          */
1161         if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
1162                 if (control)
1163                         m_freem(control);
1164                 m_freem(m);
1165                 return (EMSGSIZE);
1166         }
1167
1168         src.sin_family = 0;
1169         sin = (struct sockaddr_in *)addr;
1170         if (sin == NULL ||
1171             (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
1172                 INP_WLOCK(inp);
1173                 unlock_inp = UH_WLOCKED;
1174         } else {
1175                 INP_RLOCK(inp);
1176                 unlock_inp = UH_RLOCKED;
1177         }
1178         tos = inp->inp_ip_tos;
1179         if (control != NULL) {
1180                 /*
1181                  * XXX: Currently, we assume all the optional information is
1182                  * stored in a single mbuf.
1183                  */
1184                 if (control->m_next) {
1185                         if (unlock_inp == UH_WLOCKED)
1186                                 INP_WUNLOCK(inp);
1187                         else
1188                                 INP_RUNLOCK(inp);
1189                         m_freem(control);
1190                         m_freem(m);
1191                         return (EINVAL);
1192                 }
1193                 for (; control->m_len > 0;
1194                     control->m_data += CMSG_ALIGN(cm->cmsg_len),
1195                     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
1196                         cm = mtod(control, struct cmsghdr *);
1197                         if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
1198                             || cm->cmsg_len > control->m_len) {
1199                                 error = EINVAL;
1200                                 break;
1201                         }
1202                         if (cm->cmsg_level != IPPROTO_IP)
1203                                 continue;
1204
1205                         switch (cm->cmsg_type) {
1206                         case IP_SENDSRCADDR:
1207                                 if (cm->cmsg_len !=
1208                                     CMSG_LEN(sizeof(struct in_addr))) {
1209                                         error = EINVAL;
1210                                         break;
1211                                 }
1212                                 bzero(&src, sizeof(src));
1213                                 src.sin_family = AF_INET;
1214                                 src.sin_len = sizeof(src);
1215                                 src.sin_port = inp->inp_lport;
1216                                 src.sin_addr =
1217                                     *(struct in_addr *)CMSG_DATA(cm);
1218                                 break;
1219
1220                         case IP_TOS:
1221                                 if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
1222                                         error = EINVAL;
1223                                         break;
1224                                 }
1225                                 tos = *(u_char *)CMSG_DATA(cm);
1226                                 break;
1227
1228                         case IP_FLOWID:
1229                                 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
1230                                         error = EINVAL;
1231                                         break;
1232                                 }
1233                                 flowid = *(uint32_t *) CMSG_DATA(cm);
1234                                 break;
1235
1236                         case IP_FLOWTYPE:
1237                                 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
1238                                         error = EINVAL;
1239                                         break;
1240                                 }
1241                                 flowtype = *(uint32_t *) CMSG_DATA(cm);
1242                                 break;
1243
1244 #ifdef  RSS
1245                         case IP_RSSBUCKETID:
1246                                 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
1247                                         error = EINVAL;
1248                                         break;
1249                                 }
1250                                 /* This is just a placeholder for now */
1251                                 break;
1252 #endif  /* RSS */
1253                         default:
1254                                 error = ENOPROTOOPT;
1255                                 break;
1256                         }
1257                         if (error)
1258                                 break;
1259                 }
1260                 m_freem(control);
1261         }
1262         if (error) {
1263                 if (unlock_inp == UH_WLOCKED)
1264                         INP_WUNLOCK(inp);
1265                 else
1266                         INP_RUNLOCK(inp);
1267                 m_freem(m);
1268                 return (error);
1269         }
1270
1271         /*
1272          * Depending on whether or not the application has bound or connected
1273          * the socket, we may have to do varying levels of work.  The optimal
1274          * case is for a connected UDP socket, as a global lock isn't
1275          * required at all.
1276          *
1277          * In order to decide which we need, we require stability of the
1278          * inpcb binding, which we ensure by acquiring a read lock on the
1279          * inpcb.  This doesn't strictly follow the lock order, so we play
1280          * the trylock and retry game; note that we may end up with more
1281          * conservative locks than required the second time around, so later
1282          * assertions have to accept that.  Further analysis of the number of
1283          * misses under contention is required.
1284          *
1285          * XXXRW: Check that hash locking update here is correct.
1286          */
1287         pr = inp->inp_socket->so_proto->pr_protocol;
1288         pcbinfo = udp_get_inpcbinfo(pr);
1289         sin = (struct sockaddr_in *)addr;
1290         if (sin != NULL &&
1291             (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
1292                 INP_HASH_WLOCK(pcbinfo);
1293                 unlock_udbinfo = UH_WLOCKED;
1294         } else if ((sin != NULL && (
1295             (sin->sin_addr.s_addr == INADDR_ANY) ||
1296             (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
1297             (inp->inp_laddr.s_addr == INADDR_ANY) ||
1298             (inp->inp_lport == 0))) ||
1299             (src.sin_family == AF_INET)) {
1300                 INP_HASH_RLOCK(pcbinfo);
1301                 unlock_udbinfo = UH_RLOCKED;
1302         } else
1303                 unlock_udbinfo = UH_UNLOCKED;
1304
1305         /*
1306          * If the IP_SENDSRCADDR control message was specified, override the
1307          * source address for this datagram.  Its use is invalidated if the
1308          * address thus specified is incomplete or clobbers other inpcbs.
1309          */
1310         laddr = inp->inp_laddr;
1311         lport = inp->inp_lport;
1312         if (src.sin_family == AF_INET) {
1313                 INP_HASH_LOCK_ASSERT(pcbinfo);
1314                 if ((lport == 0) ||
1315                     (laddr.s_addr == INADDR_ANY &&
1316                      src.sin_addr.s_addr == INADDR_ANY)) {
1317                         error = EINVAL;
1318                         goto release;
1319                 }
1320                 error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
1321                     &laddr.s_addr, &lport, td->td_ucred);
1322                 if (error)
1323                         goto release;
1324         }
1325
1326         /*
1327          * If a UDP socket has been connected, then a local address/port will
1328          * have been selected and bound.
1329          *
1330          * If a UDP socket has not been connected to, then an explicit
1331          * destination address must be used, in which case a local
1332          * address/port may not have been selected and bound.
1333          */
1334         if (sin != NULL) {
1335                 INP_LOCK_ASSERT(inp);
1336                 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1337                         error = EISCONN;
1338                         goto release;
1339                 }
1340
1341                 /*
1342                  * Jail may rewrite the destination address, so let it do
1343                  * that before we use it.
1344                  */
1345                 error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
1346                 if (error)
1347                         goto release;
1348
1349                 /*
1350                  * If a local address or port hasn't yet been selected, or if
1351                  * the destination address needs to be rewritten due to using
1352                  * a special INADDR_ constant, invoke in_pcbconnect_setup()
1353                  * to do the heavy lifting.  Once a port is selected, we
1354                  * commit the binding back to the socket; we also commit the
1355                  * binding of the address if in jail.
1356                  *
1357                  * If we already have a valid binding and we're not
1358                  * requesting a destination address rewrite, use a fast path.
1359                  */
1360                 if (inp->inp_laddr.s_addr == INADDR_ANY ||
1361                     inp->inp_lport == 0 ||
1362                     sin->sin_addr.s_addr == INADDR_ANY ||
1363                     sin->sin_addr.s_addr == INADDR_BROADCAST) {
1364                         INP_HASH_LOCK_ASSERT(pcbinfo);
1365                         error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
1366                             &lport, &faddr.s_addr, &fport, NULL,
1367                             td->td_ucred);
1368                         if (error)
1369                                 goto release;
1370
1371                         /*
1372                          * XXXRW: Why not commit the port if the address is
1373                          * !INADDR_ANY?
1374                          */
1375                         /* Commit the local port if newly assigned. */
1376                         if (inp->inp_laddr.s_addr == INADDR_ANY &&
1377                             inp->inp_lport == 0) {
1378                                 INP_WLOCK_ASSERT(inp);
1379                                 INP_HASH_WLOCK_ASSERT(pcbinfo);
1380                                 /*
1381                                  * Remember addr if jailed, to prevent
1382                                  * rebinding.
1383                                  */
1384                                 if (prison_flag(td->td_ucred, PR_IP4))
1385                                         inp->inp_laddr = laddr;
1386                                 inp->inp_lport = lport;
1387                                 if (in_pcbinshash(inp) != 0) {
1388                                         inp->inp_lport = 0;
1389                                         error = EAGAIN;
1390                                         goto release;
1391                                 }
1392                                 inp->inp_flags |= INP_ANONPORT;
1393                         }
1394                 } else {
1395                         faddr = sin->sin_addr;
1396                         fport = sin->sin_port;
1397                 }
1398         } else {
1399                 INP_LOCK_ASSERT(inp);
1400                 faddr = inp->inp_faddr;
1401                 fport = inp->inp_fport;
1402                 if (faddr.s_addr == INADDR_ANY) {
1403                         error = ENOTCONN;
1404                         goto release;
1405                 }
1406         }
1407
1408         /*
1409          * Calculate data length and get a mbuf for UDP, IP, and possible
1410          * link-layer headers.  Immediate slide the data pointer back forward
1411          * since we won't use that space at this layer.
1412          */
1413         M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
1414         if (m == NULL) {
1415                 error = ENOBUFS;
1416                 goto release;
1417         }
1418         m->m_data += max_linkhdr;
1419         m->m_len -= max_linkhdr;
1420         m->m_pkthdr.len -= max_linkhdr;
1421
1422         /*
1423          * Fill in mbuf with extended UDP header and addresses and length put
1424          * into network format.
1425          */
1426         ui = mtod(m, struct udpiphdr *);
1427         bzero(ui->ui_x1, sizeof(ui->ui_x1));    /* XXX still needed? */
1428         ui->ui_pr = pr;
1429         ui->ui_src = laddr;
1430         ui->ui_dst = faddr;
1431         ui->ui_sport = lport;
1432         ui->ui_dport = fport;
1433         ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
1434         if (pr == IPPROTO_UDPLITE) {
1435                 struct udpcb *up;
1436                 uint16_t plen;
1437
1438                 up = intoudpcb(inp);
1439                 cscov = up->u_txcslen;
1440                 plen = (u_short)len + sizeof(struct udphdr);
1441                 if (cscov >= plen)
1442                         cscov = 0;
1443                 ui->ui_len = htons(plen);
1444                 ui->ui_ulen = htons(cscov);
1445                 /*
1446                  * For UDP-Lite, checksum coverage length of zero means
1447                  * the entire UDPLite packet is covered by the checksum.
1448                  */
1449                 cscov_partial = (cscov == 0) ? 0 : 1;
1450         } else
1451                 ui->ui_v = IPVERSION << 4;
1452
1453         /*
1454          * Set the Don't Fragment bit in the IP header.
1455          */
1456         if (inp->inp_flags & INP_DONTFRAG) {
1457                 struct ip *ip;
1458
1459                 ip = (struct ip *)&ui->ui_i;
1460                 ip->ip_off |= htons(IP_DF);
1461         }
1462
1463         ipflags = 0;
1464         if (inp->inp_socket->so_options & SO_DONTROUTE)
1465                 ipflags |= IP_ROUTETOIF;
1466         if (inp->inp_socket->so_options & SO_BROADCAST)
1467                 ipflags |= IP_ALLOWBROADCAST;
1468         if (inp->inp_flags & INP_ONESBCAST)
1469                 ipflags |= IP_SENDONES;
1470
1471 #ifdef MAC
1472         mac_inpcb_create_mbuf(inp, m);
1473 #endif
1474
1475         /*
1476          * Set up checksum and output datagram.
1477          */
1478         ui->ui_sum = 0;
1479         if (pr == IPPROTO_UDPLITE) {
1480                 if (inp->inp_flags & INP_ONESBCAST)
1481                         faddr.s_addr = INADDR_BROADCAST;
1482                 if (cscov_partial) {
1483                         if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
1484                                 ui->ui_sum = 0xffff;
1485                 } else {
1486                         if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
1487                                 ui->ui_sum = 0xffff;
1488                 }
1489         } else if (V_udp_cksum) {
1490                 if (inp->inp_flags & INP_ONESBCAST)
1491                         faddr.s_addr = INADDR_BROADCAST;
1492                 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
1493                     htons((u_short)len + sizeof(struct udphdr) + pr));
1494                 m->m_pkthdr.csum_flags = CSUM_UDP;
1495                 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
1496         }
1497         ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
1498         ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;    /* XXX */
1499         ((struct ip *)ui)->ip_tos = tos;                /* XXX */
1500         UDPSTAT_INC(udps_opackets);
1501
1502         /*
1503          * Setup flowid / RSS information for outbound socket.
1504          *
1505          * Once the UDP code decides to set a flowid some other way,
1506          * this allows the flowid to be overridden by userland.
1507          */
1508         if (flowtype != M_HASHTYPE_NONE) {
1509                 m->m_pkthdr.flowid = flowid;
1510                 M_HASHTYPE_SET(m, flowtype);
1511 #ifdef  RSS
1512         } else {
1513                 uint32_t hash_val, hash_type;
1514                 /*
1515                  * Calculate an appropriate RSS hash for UDP and
1516                  * UDP Lite.
1517                  *
1518                  * The called function will take care of figuring out
1519                  * whether a 2-tuple or 4-tuple hash is required based
1520                  * on the currently configured scheme.
1521                  *
1522                  * Later later on connected socket values should be
1523                  * cached in the inpcb and reused, rather than constantly
1524                  * re-calculating it.
1525                  *
1526                  * UDP Lite is a different protocol number and will
1527                  * likely end up being hashed as a 2-tuple until
1528                  * RSS / NICs grow UDP Lite protocol awareness.
1529                  */
1530                 if (rss_proto_software_hash_v4(faddr, laddr, fport, lport,
1531                     pr, &hash_val, &hash_type) == 0) {
1532                         m->m_pkthdr.flowid = hash_val;
1533                         M_HASHTYPE_SET(m, hash_type);
1534                 }
1535 #endif
1536         }
1537
1538 #ifdef  RSS
1539         /*
1540          * Don't override with the inp cached flowid value.
1541          *
1542          * Depending upon the kind of send being done, the inp
1543          * flowid/flowtype values may actually not be appropriate
1544          * for this particular socket send.
1545          *
1546          * We should either leave the flowid at zero (which is what is
1547          * currently done) or set it to some software generated
1548          * hash value based on the packet contents.
1549          */
1550         ipflags |= IP_NODEFAULTFLOWID;
1551 #endif  /* RSS */
1552
1553         if (unlock_udbinfo == UH_WLOCKED)
1554                 INP_HASH_WUNLOCK(pcbinfo);
1555         else if (unlock_udbinfo == UH_RLOCKED)
1556                 INP_HASH_RUNLOCK(pcbinfo);
1557         UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
1558         error = ip_output(m, inp->inp_options,
1559             (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
1560             inp->inp_moptions, inp);
1561         if (unlock_inp == UH_WLOCKED)
1562                 INP_WUNLOCK(inp);
1563         else
1564                 INP_RUNLOCK(inp);
1565         return (error);
1566
1567 release:
1568         if (unlock_udbinfo == UH_WLOCKED) {
1569                 INP_HASH_WUNLOCK(pcbinfo);
1570                 INP_WUNLOCK(inp);
1571         } else if (unlock_udbinfo == UH_RLOCKED) {
1572                 INP_HASH_RUNLOCK(pcbinfo);
1573                 INP_RUNLOCK(inp);
1574         } else
1575                 INP_RUNLOCK(inp);
1576         m_freem(m);
1577         return (error);
1578 }
1579
1580
1581 #if defined(IPSEC) && defined(IPSEC_NAT_T)
1582 /*
1583  * Potentially decap ESP in UDP frame.  Check for an ESP header
1584  * and optional marker; if present, strip the UDP header and
1585  * push the result through IPSec.
1586  *
1587  * Returns mbuf to be processed (potentially re-allocated) or
1588  * NULL if consumed and/or processed.
1589  */
1590 static struct mbuf *
1591 udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
1592 {
1593         size_t minlen, payload, skip, iphlen;
1594         caddr_t data;
1595         struct udpcb *up;
1596         struct m_tag *tag;
1597         struct udphdr *udphdr;
1598         struct ip *ip;
1599
1600         INP_RLOCK_ASSERT(inp);
1601
1602         /* 
1603          * Pull up data so the longest case is contiguous:
1604          *    IP/UDP hdr + non ESP marker + ESP hdr.
1605          */
1606         minlen = off + sizeof(uint64_t) + sizeof(struct esp);
1607         if (minlen > m->m_pkthdr.len)
1608                 minlen = m->m_pkthdr.len;
1609         if ((m = m_pullup(m, minlen)) == NULL) {
1610                 IPSECSTAT_INC(ips_in_inval);
1611                 return (NULL);          /* Bypass caller processing. */
1612         }
1613         data = mtod(m, caddr_t);        /* Points to ip header. */
1614         payload = m->m_len - off;       /* Size of payload. */
1615
1616         if (payload == 1 && data[off] == '\xff')
1617                 return (m);             /* NB: keepalive packet, no decap. */
1618
1619         up = intoudpcb(inp);
1620         KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
1621         KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0,
1622             ("u_flags 0x%x", up->u_flags));
1623
1624         /* 
1625          * Check that the payload is large enough to hold an
1626          * ESP header and compute the amount of data to remove.
1627          *
1628          * NB: the caller has already done a pullup for us.
1629          * XXX can we assume alignment and eliminate bcopys?
1630          */
1631         if (up->u_flags & UF_ESPINUDP_NON_IKE) {
1632                 /*
1633                  * draft-ietf-ipsec-nat-t-ike-0[01].txt and
1634                  * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring
1635                  * possible AH mode non-IKE marker+non-ESP marker
1636                  * from draft-ietf-ipsec-udp-encaps-00.txt.
1637                  */
1638                 uint64_t marker;
1639
1640                 if (payload <= sizeof(uint64_t) + sizeof(struct esp))
1641                         return (m);     /* NB: no decap. */
1642                 bcopy(data + off, &marker, sizeof(uint64_t));
1643                 if (marker != 0)        /* Non-IKE marker. */
1644                         return (m);     /* NB: no decap. */
1645                 skip = sizeof(uint64_t) + sizeof(struct udphdr);
1646         } else {
1647                 uint32_t spi;
1648
1649                 if (payload <= sizeof(struct esp)) {
1650                         IPSECSTAT_INC(ips_in_inval);
1651                         m_freem(m);
1652                         return (NULL);  /* Discard. */
1653                 }
1654                 bcopy(data + off, &spi, sizeof(uint32_t));
1655                 if (spi == 0)           /* Non-ESP marker. */
1656                         return (m);     /* NB: no decap. */
1657                 skip = sizeof(struct udphdr);
1658         }
1659
1660         /*
1661          * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
1662          * the UDP ports. This is required if we want to select
1663          * the right SPD for multiple hosts behind same NAT.
1664          *
1665          * NB: ports are maintained in network byte order everywhere
1666          *     in the NAT-T code.
1667          */
1668         tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
1669                 2 * sizeof(uint16_t), M_NOWAIT);
1670         if (tag == NULL) {
1671                 IPSECSTAT_INC(ips_in_nomem);
1672                 m_freem(m);
1673                 return (NULL);          /* Discard. */
1674         }
1675         iphlen = off - sizeof(struct udphdr);
1676         udphdr = (struct udphdr *)(data + iphlen);
1677         ((uint16_t *)(tag + 1))[0] = udphdr->uh_sport;
1678         ((uint16_t *)(tag + 1))[1] = udphdr->uh_dport;
1679         m_tag_prepend(m, tag);
1680
1681         /*
1682          * Remove the UDP header (and possibly the non ESP marker)
1683          * IP header length is iphlen
1684          * Before:
1685          *   <--- off --->
1686          *   +----+------+-----+
1687          *   | IP |  UDP | ESP |
1688          *   +----+------+-----+
1689          *        <-skip->
1690          * After:
1691          *          +----+-----+
1692          *          | IP | ESP |
1693          *          +----+-----+
1694          *   <-skip->
1695          */
1696         ovbcopy(data, data + skip, iphlen);
1697         m_adj(m, skip);
1698
1699         ip = mtod(m, struct ip *);
1700         ip->ip_len = htons(ntohs(ip->ip_len) - skip);
1701         ip->ip_p = IPPROTO_ESP;
1702
1703         /*
1704          * We cannot yet update the cksums so clear any
1705          * h/w cksum flags as they are no longer valid.
1706          */
1707         if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
1708                 m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
1709
1710         (void) ipsec_common_input(m, iphlen, offsetof(struct ip, ip_p),
1711                                 AF_INET, ip->ip_p);
1712         return (NULL);                  /* NB: consumed, bypass processing. */
1713 }
1714 #endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
1715
1716 static void
1717 udp_abort(struct socket *so)
1718 {
1719         struct inpcb *inp;
1720         struct inpcbinfo *pcbinfo;
1721
1722         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1723         inp = sotoinpcb(so);
1724         KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
1725         INP_WLOCK(inp);
1726         if (inp->inp_faddr.s_addr != INADDR_ANY) {
1727                 INP_HASH_WLOCK(pcbinfo);
1728                 in_pcbdisconnect(inp);
1729                 inp->inp_laddr.s_addr = INADDR_ANY;
1730                 INP_HASH_WUNLOCK(pcbinfo);
1731                 soisdisconnected(so);
1732         }
1733         INP_WUNLOCK(inp);
1734 }
1735
1736 static int
1737 udp_attach(struct socket *so, int proto, struct thread *td)
1738 {
1739         struct inpcb *inp;
1740         struct inpcbinfo *pcbinfo;
1741         int error;
1742
1743         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1744         inp = sotoinpcb(so);
1745         KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
1746         error = soreserve(so, udp_sendspace, udp_recvspace);
1747         if (error)
1748                 return (error);
1749         INP_INFO_WLOCK(pcbinfo);
1750         error = in_pcballoc(so, pcbinfo);
1751         if (error) {
1752                 INP_INFO_WUNLOCK(pcbinfo);
1753                 return (error);
1754         }
1755
1756         inp = sotoinpcb(so);
1757         inp->inp_vflag |= INP_IPV4;
1758         inp->inp_ip_ttl = V_ip_defttl;
1759
1760         error = udp_newudpcb(inp);
1761         if (error) {
1762                 in_pcbdetach(inp);
1763                 in_pcbfree(inp);
1764                 INP_INFO_WUNLOCK(pcbinfo);
1765                 return (error);
1766         }
1767
1768         INP_WUNLOCK(inp);
1769         INP_INFO_WUNLOCK(pcbinfo);
1770         return (0);
1771 }
1772 #endif /* INET */
1773
1774 int
1775 udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx)
1776 {
1777         struct inpcb *inp;
1778         struct udpcb *up;
1779
1780         KASSERT(so->so_type == SOCK_DGRAM,
1781             ("udp_set_kernel_tunneling: !dgram"));
1782         inp = sotoinpcb(so);
1783         KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
1784         INP_WLOCK(inp);
1785         up = intoudpcb(inp);
1786         if ((up->u_tun_func != NULL) ||
1787             (up->u_icmp_func != NULL)) {
1788                 INP_WUNLOCK(inp);
1789                 return (EBUSY);
1790         }
1791         up->u_tun_func = f;
1792         up->u_icmp_func = i;
1793         up->u_tun_ctx = ctx;
1794         INP_WUNLOCK(inp);
1795         return (0);
1796 }
1797
1798 #ifdef INET
1799 static int
1800 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1801 {
1802         struct inpcb *inp;
1803         struct inpcbinfo *pcbinfo;
1804         int error;
1805
1806         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1807         inp = sotoinpcb(so);
1808         KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
1809         INP_WLOCK(inp);
1810         INP_HASH_WLOCK(pcbinfo);
1811         error = in_pcbbind(inp, nam, td->td_ucred);
1812         INP_HASH_WUNLOCK(pcbinfo);
1813         INP_WUNLOCK(inp);
1814         return (error);
1815 }
1816
1817 static void
1818 udp_close(struct socket *so)
1819 {
1820         struct inpcb *inp;
1821         struct inpcbinfo *pcbinfo;
1822
1823         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1824         inp = sotoinpcb(so);
1825         KASSERT(inp != NULL, ("udp_close: inp == NULL"));
1826         INP_WLOCK(inp);
1827         if (inp->inp_faddr.s_addr != INADDR_ANY) {
1828                 INP_HASH_WLOCK(pcbinfo);
1829                 in_pcbdisconnect(inp);
1830                 inp->inp_laddr.s_addr = INADDR_ANY;
1831                 INP_HASH_WUNLOCK(pcbinfo);
1832                 soisdisconnected(so);
1833         }
1834         INP_WUNLOCK(inp);
1835 }
1836
1837 static int
1838 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1839 {
1840         struct inpcb *inp;
1841         struct inpcbinfo *pcbinfo;
1842         struct sockaddr_in *sin;
1843         int error;
1844
1845         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1846         inp = sotoinpcb(so);
1847         KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
1848         INP_WLOCK(inp);
1849         if (inp->inp_faddr.s_addr != INADDR_ANY) {
1850                 INP_WUNLOCK(inp);
1851                 return (EISCONN);
1852         }
1853         sin = (struct sockaddr_in *)nam;
1854         error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
1855         if (error != 0) {
1856                 INP_WUNLOCK(inp);
1857                 return (error);
1858         }
1859         INP_HASH_WLOCK(pcbinfo);
1860         error = in_pcbconnect(inp, nam, td->td_ucred);
1861         INP_HASH_WUNLOCK(pcbinfo);
1862         if (error == 0)
1863                 soisconnected(so);
1864         INP_WUNLOCK(inp);
1865         return (error);
1866 }
1867
1868 static void
1869 udp_detach(struct socket *so)
1870 {
1871         struct inpcb *inp;
1872         struct inpcbinfo *pcbinfo;
1873         struct udpcb *up;
1874
1875         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1876         inp = sotoinpcb(so);
1877         KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
1878         KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
1879             ("udp_detach: not disconnected"));
1880         INP_INFO_WLOCK(pcbinfo);
1881         INP_WLOCK(inp);
1882         up = intoudpcb(inp);
1883         KASSERT(up != NULL, ("%s: up == NULL", __func__));
1884         inp->inp_ppcb = NULL;
1885         in_pcbdetach(inp);
1886         in_pcbfree(inp);
1887         INP_INFO_WUNLOCK(pcbinfo);
1888         udp_discardcb(up);
1889 }
1890
1891 static int
1892 udp_disconnect(struct socket *so)
1893 {
1894         struct inpcb *inp;
1895         struct inpcbinfo *pcbinfo;
1896
1897         pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
1898         inp = sotoinpcb(so);
1899         KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
1900         INP_WLOCK(inp);
1901         if (inp->inp_faddr.s_addr == INADDR_ANY) {
1902                 INP_WUNLOCK(inp);
1903                 return (ENOTCONN);
1904         }
1905         INP_HASH_WLOCK(pcbinfo);
1906         in_pcbdisconnect(inp);
1907         inp->inp_laddr.s_addr = INADDR_ANY;
1908         INP_HASH_WUNLOCK(pcbinfo);
1909         SOCK_LOCK(so);
1910         so->so_state &= ~SS_ISCONNECTED;                /* XXX */
1911         SOCK_UNLOCK(so);
1912         INP_WUNLOCK(inp);
1913         return (0);
1914 }
1915
1916 static int
1917 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1918     struct mbuf *control, struct thread *td)
1919 {
1920         struct inpcb *inp;
1921
1922         inp = sotoinpcb(so);
1923         KASSERT(inp != NULL, ("udp_send: inp == NULL"));
1924         return (udp_output(inp, m, addr, control, td));
1925 }
1926 #endif /* INET */
1927
1928 int
1929 udp_shutdown(struct socket *so)
1930 {
1931         struct inpcb *inp;
1932
1933         inp = sotoinpcb(so);
1934         KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
1935         INP_WLOCK(inp);
1936         socantsendmore(so);
1937         INP_WUNLOCK(inp);
1938         return (0);
1939 }
1940
1941 #ifdef INET
1942 struct pr_usrreqs udp_usrreqs = {
1943         .pru_abort =            udp_abort,
1944         .pru_attach =           udp_attach,
1945         .pru_bind =             udp_bind,
1946         .pru_connect =          udp_connect,
1947         .pru_control =          in_control,
1948         .pru_detach =           udp_detach,
1949         .pru_disconnect =       udp_disconnect,
1950         .pru_peeraddr =         in_getpeeraddr,
1951         .pru_send =             udp_send,
1952         .pru_soreceive =        soreceive_dgram,
1953         .pru_sosend =           sosend_dgram,
1954         .pru_shutdown =         udp_shutdown,
1955         .pru_sockaddr =         in_getsockaddr,
1956         .pru_sosetlabel =       in_pcbsosetlabel,
1957         .pru_close =            udp_close,
1958 };
1959 #endif /* INET */