]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/udp_usrreq.c
MFC:
[FreeBSD/FreeBSD.git] / sys / netinet / udp_usrreq.c
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      @(#)udp_usrreq.c        8.6 (Berkeley) 5/23/95
30  * $FreeBSD$
31  */
32
33 #include "opt_ipfw.h"
34 #include "opt_ipsec.h"
35 #include "opt_inet6.h"
36 #include "opt_mac.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/domain.h>
41 #include <sys/eventhandler.h>
42 #include <sys/jail.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/mac.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/proc.h>
49 #include <sys/protosw.h>
50 #include <sys/signalvar.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/sx.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56
57 #include <vm/uma.h>
58
59 #include <net/if.h>
60 #include <net/route.h>
61
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_var.h>
66 #include <netinet/ip.h>
67 #ifdef INET6
68 #include <netinet/ip6.h>
69 #endif
70 #include <netinet/ip_icmp.h>
71 #include <netinet/icmp_var.h>
72 #include <netinet/ip_var.h>
73 #ifdef INET6
74 #include <netinet6/ip6_var.h>
75 #endif
76 #include <netinet/udp.h>
77 #include <netinet/udp_var.h>
78
79 #ifdef FAST_IPSEC
80 #include <netipsec/ipsec.h>
81 #endif /*FAST_IPSEC*/
82
83 #ifdef IPSEC
84 #include <netinet6/ipsec.h>
85 #endif /*IPSEC*/
86
87 #include <machine/in_cksum.h>
88
89 /*
90  * UDP protocol implementation.
91  * Per RFC 768, August, 1980.
92  */
93 #ifndef COMPAT_42
94 static int      udpcksum = 1;
95 #else
96 static int      udpcksum = 0;           /* XXX */
97 #endif
98 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
99                 &udpcksum, 0, "");
100
101 int     log_in_vain = 0;
102 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
103     &log_in_vain, 0, "Log all incoming UDP packets");
104
105 static int      blackhole = 0;
106 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
107         &blackhole, 0, "Do not send port unreachables for refused connects");
108
109 static int      strict_mcast_mship = 0;
110 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
111         &strict_mcast_mship, 0, "Only send multicast to member sockets");
112
113 struct  inpcbhead udb;          /* from udp_var.h */
114 #define udb6    udb  /* for KAME src sync over BSD*'s */
115 struct  inpcbinfo udbinfo;
116
117 #ifndef UDBHASHSIZE
118 #define UDBHASHSIZE 16
119 #endif
120
121 struct  udpstat udpstat;        /* from udp_var.h */
122 SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
123     &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
124
125 static void udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
126                 int off, struct sockaddr_in *udp_in);
127
128 static int udp_detach(struct socket *so);
129 static  int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
130                 struct mbuf *, struct thread *);
131
132 static void
133 udp_zone_change(void *tag)
134 {
135
136         uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
137 }
138
139 static int
140 udp_inpcb_init(void *mem, int size, int flags)
141 {
142         struct inpcb *inp = mem;
143
144         INP_LOCK_INIT(inp, "inp", "udpinp");
145         return (0);
146 }
147
148 void
149 udp_init()
150 {
151         INP_INFO_LOCK_INIT(&udbinfo, "udp");
152         LIST_INIT(&udb);
153         udbinfo.listhead = &udb;
154         udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
155         udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
156                                         &udbinfo.porthashmask);
157         udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL,
158             NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
159         uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
160         EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
161                 EVENTHANDLER_PRI_ANY);
162 }
163
164 void
165 udp_input(m, off)
166         register struct mbuf *m;
167         int off;
168 {
169         int iphlen = off;
170         register struct ip *ip;
171         register struct udphdr *uh;
172         register struct inpcb *inp;
173         int len;
174         struct ip save_ip;
175         struct sockaddr_in udp_in;
176 #ifdef IPFIREWALL_FORWARD
177         struct m_tag *fwd_tag;
178 #endif
179
180         udpstat.udps_ipackets++;
181
182         /*
183          * Strip IP options, if any; should skip this,
184          * make available to user, and use on returned packets,
185          * but we don't yet have a way to check the checksum
186          * with options still present.
187          */
188         if (iphlen > sizeof (struct ip)) {
189                 ip_stripoptions(m, (struct mbuf *)0);
190                 iphlen = sizeof(struct ip);
191         }
192
193         /*
194          * Get IP and UDP header together in first mbuf.
195          */
196         ip = mtod(m, struct ip *);
197         if (m->m_len < iphlen + sizeof(struct udphdr)) {
198                 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
199                         udpstat.udps_hdrops++;
200                         return;
201                 }
202                 ip = mtod(m, struct ip *);
203         }
204         uh = (struct udphdr *)((caddr_t)ip + iphlen);
205
206         /* destination port of 0 is illegal, based on RFC768. */
207         if (uh->uh_dport == 0)
208                 goto badunlocked;
209
210         /*
211          * Construct sockaddr format source address.
212          * Stuff source address and datagram in user buffer.
213          */
214         bzero(&udp_in, sizeof(udp_in));
215         udp_in.sin_len = sizeof(udp_in);
216         udp_in.sin_family = AF_INET;
217         udp_in.sin_port = uh->uh_sport;
218         udp_in.sin_addr = ip->ip_src;
219
220         /*
221          * Make mbuf data length reflect UDP length.
222          * If not enough data to reflect UDP length, drop.
223          */
224         len = ntohs((u_short)uh->uh_ulen);
225         if (ip->ip_len != len) {
226                 if (len > ip->ip_len || len < sizeof(struct udphdr)) {
227                         udpstat.udps_badlen++;
228                         goto badunlocked;
229                 }
230                 m_adj(m, len - ip->ip_len);
231                 /* ip->ip_len = len; */
232         }
233         /*
234          * Save a copy of the IP header in case we want restore it
235          * for sending an ICMP error message in response.
236          */
237         if (!blackhole)
238                 save_ip = *ip;
239
240         /*
241          * Checksum extended UDP header and data.
242          */
243         if (uh->uh_sum) {
244                 u_short uh_sum;
245                 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
246                         if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
247                                 uh_sum = m->m_pkthdr.csum_data;
248                         else
249                                 uh_sum = in_pseudo(ip->ip_src.s_addr,
250                                     ip->ip_dst.s_addr, htonl((u_short)len +
251                                     m->m_pkthdr.csum_data + IPPROTO_UDP));
252                         uh_sum ^= 0xffff;
253                 } else {
254                         char b[9];
255                         bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
256                         bzero(((struct ipovly *)ip)->ih_x1, 9);
257                         ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
258                         uh_sum = in_cksum(m, len + sizeof (struct ip));
259                         bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
260                 }
261                 if (uh_sum) {
262                         udpstat.udps_badsum++;
263                         m_freem(m);
264                         return;
265                 }
266         } else
267                 udpstat.udps_nosum++;
268
269 #ifdef IPFIREWALL_FORWARD
270         /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
271         fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
272
273         if (fwd_tag != NULL) {
274                 struct sockaddr_in *next_hop;
275
276                 /* Do the hack. */
277                 next_hop = (struct sockaddr_in *)(fwd_tag + 1);
278                 ip->ip_dst = next_hop->sin_addr;
279                 uh->uh_dport = ntohs(next_hop->sin_port);
280                 /* Remove the tag from the packet.  We don't need it anymore. */
281                 m_tag_delete(m, fwd_tag);
282         }
283 #endif
284
285         INP_INFO_RLOCK(&udbinfo);
286
287         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
288             in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
289                 struct inpcb *last;
290                 /*
291                  * Deliver a multicast or broadcast datagram to *all* sockets
292                  * for which the local and remote addresses and ports match
293                  * those of the incoming datagram.  This allows more than
294                  * one process to receive multi/broadcasts on the same port.
295                  * (This really ought to be done for unicast datagrams as
296                  * well, but that would cause problems with existing
297                  * applications that open both address-specific sockets and
298                  * a wildcard socket listening to the same port -- they would
299                  * end up receiving duplicates of every unicast datagram.
300                  * Those applications open the multiple sockets to overcome an
301                  * inadequacy of the UDP socket interface, but for backwards
302                  * compatibility we avoid the problem here rather than
303                  * fixing the interface.  Maybe 4.5BSD will remedy this?)
304                  */
305
306                 /*
307                  * Locate pcb(s) for datagram.
308                  * (Algorithm copied from raw_intr().)
309                  */
310                 last = NULL;
311                 LIST_FOREACH(inp, &udb, inp_list) {
312                         if (inp->inp_lport != uh->uh_dport)
313                                 continue;
314 #ifdef INET6
315                         if ((inp->inp_vflag & INP_IPV4) == 0)
316                                 continue;
317 #endif
318                         if (inp->inp_laddr.s_addr != INADDR_ANY) {
319                                 if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
320                                         continue;
321                         }
322                         if (inp->inp_faddr.s_addr != INADDR_ANY) {
323                                 if (inp->inp_faddr.s_addr !=
324                                     ip->ip_src.s_addr ||
325                                     inp->inp_fport != uh->uh_sport)
326                                         continue;
327                         }
328                         INP_LOCK(inp);
329
330                         /*
331                          * Check multicast packets to make sure they are only
332                          * sent to sockets with multicast memberships for the
333                          * packet's destination address and arrival interface
334                          */
335 #define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)])
336 #define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships)
337                         if (strict_mcast_mship && inp->inp_moptions != NULL) {
338                                 int mship, foundmship = 0;
339
340                                 for (mship = 0; mship < NMSHIPS(inp); mship++) {
341                                         if (MSHIP(inp, mship)->inm_addr.s_addr
342                                             == ip->ip_dst.s_addr &&
343                                             MSHIP(inp, mship)->inm_ifp
344                                             == m->m_pkthdr.rcvif) {
345                                                 foundmship = 1;
346                                                 break;
347                                         }
348                                 }
349                                 if (foundmship == 0) {
350                                         INP_UNLOCK(inp);
351                                         continue;
352                                 }
353                         }
354 #undef NMSHIPS
355 #undef MSHIP
356                         if (last != NULL) {
357                                 struct mbuf *n;
358
359                                 n = m_copy(m, 0, M_COPYALL);
360                                 if (n != NULL)
361                                         udp_append(last, ip, n,
362                                                    iphlen +
363                                                    sizeof(struct udphdr),
364                                                    &udp_in);
365                                 INP_UNLOCK(last);
366                         }
367                         last = inp;
368                         /*
369                          * Don't look for additional matches if this one does
370                          * not have either the SO_REUSEPORT or SO_REUSEADDR
371                          * socket options set.  This heuristic avoids searching
372                          * through all pcbs in the common case of a non-shared
373                          * port.  It * assumes that an application will never
374                          * clear these options after setting them.
375                          */
376                         if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0)
377                                 break;
378                 }
379
380                 if (last == NULL) {
381                         /*
382                          * No matching pcb found; discard datagram.
383                          * (No need to send an ICMP Port Unreachable
384                          * for a broadcast or multicast datgram.)
385                          */
386                         udpstat.udps_noportbcast++;
387                         goto badheadlocked;
388                 }
389                 udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
390                     &udp_in);
391                 INP_UNLOCK(last);
392                 INP_INFO_RUNLOCK(&udbinfo);
393                 return;
394         }
395         /*
396          * Locate pcb for datagram.
397          */
398         inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
399             ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
400         if (inp == NULL) {
401                 if (log_in_vain) {
402                         char buf[4*sizeof "123"];
403
404                         strcpy(buf, inet_ntoa(ip->ip_dst));
405                         log(LOG_INFO,
406                             "Connection attempt to UDP %s:%d from %s:%d\n",
407                             buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
408                             ntohs(uh->uh_sport));
409                 }
410                 udpstat.udps_noport++;
411                 if (m->m_flags & (M_BCAST | M_MCAST)) {
412                         udpstat.udps_noportbcast++;
413                         goto badheadlocked;
414                 }
415                 if (blackhole)
416                         goto badheadlocked;
417                 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
418                         goto badheadlocked;
419                 *ip = save_ip;
420                 ip->ip_len += iphlen;
421                 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
422                 INP_INFO_RUNLOCK(&udbinfo);
423                 return;
424         }
425         INP_LOCK(inp);
426         /* Check the minimum TTL for socket. */
427         if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
428                 goto badheadlocked;
429         udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in);
430         INP_UNLOCK(inp);
431         INP_INFO_RUNLOCK(&udbinfo);
432         return;
433
434 badheadlocked:
435         if (inp)
436                 INP_UNLOCK(inp);
437         INP_INFO_RUNLOCK(&udbinfo);
438 badunlocked:
439         m_freem(m);
440         return;
441 }
442
443 /*
444  * Subroutine of udp_input(), which appends the provided mbuf chain to the
445  * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
446  * contains the source address.  If the socket ends up being an IPv6 socket,
447  * udp_append() will convert to a sockaddr_in6 before passing the address
448  * into the socket code.
449  */
450 static void
451 udp_append(last, ip, n, off, udp_in)
452         struct inpcb *last;
453         struct ip *ip;
454         struct mbuf *n;
455         int off;
456         struct sockaddr_in *udp_in;
457 {
458         struct sockaddr *append_sa;
459         struct socket *so;
460         struct mbuf *opts = 0;
461 #ifdef INET6
462         struct sockaddr_in6 udp_in6;
463 #endif
464
465         INP_LOCK_ASSERT(last);
466
467 #if defined(IPSEC) || defined(FAST_IPSEC)
468         /* check AH/ESP integrity. */
469         if (ipsec4_in_reject(n, last)) {
470 #ifdef IPSEC
471                 ipsecstat.in_polvio++;
472 #endif /*IPSEC*/
473                 m_freem(n);
474                 return;
475         }
476 #endif /*IPSEC || FAST_IPSEC*/
477 #ifdef MAC
478         if (mac_check_inpcb_deliver(last, n) != 0) {
479                 m_freem(n);
480                 return;
481         }
482 #endif
483         if (last->inp_flags & INP_CONTROLOPTS ||
484             last->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
485 #ifdef INET6
486                 if (last->inp_vflag & INP_IPV6) {
487                         int savedflags;
488
489                         savedflags = last->inp_flags;
490                         last->inp_flags &= ~INP_UNMAPPABLEOPTS;
491                         ip6_savecontrol(last, n, &opts);
492                         last->inp_flags = savedflags;
493                 } else
494 #endif
495                 ip_savecontrol(last, &opts, ip, n);
496         }
497 #ifdef INET6
498         if (last->inp_vflag & INP_IPV6) {
499                 bzero(&udp_in6, sizeof(udp_in6));
500                 udp_in6.sin6_len = sizeof(udp_in6);
501                 udp_in6.sin6_family = AF_INET6;
502                 in6_sin_2_v4mapsin6(udp_in, &udp_in6);
503                 append_sa = (struct sockaddr *)&udp_in6;
504         } else
505 #endif
506         append_sa = (struct sockaddr *)udp_in;
507         m_adj(n, off);
508
509         so = last->inp_socket;
510         SOCKBUF_LOCK(&so->so_rcv);
511         if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
512                 m_freem(n);
513                 if (opts)
514                         m_freem(opts);
515                 udpstat.udps_fullsock++;
516                 SOCKBUF_UNLOCK(&so->so_rcv);
517         } else
518                 sorwakeup_locked(so);
519 }
520
521 /*
522  * Notify a udp user of an asynchronous error;
523  * just wake up so that he can collect error status.
524  */
525 struct inpcb *
526 udp_notify(inp, errno)
527         register struct inpcb *inp;
528         int errno;
529 {
530         inp->inp_socket->so_error = errno;
531         sorwakeup(inp->inp_socket);
532         sowwakeup(inp->inp_socket);
533         return inp;
534 }
535
536 void
537 udp_ctlinput(cmd, sa, vip)
538         int cmd;
539         struct sockaddr *sa;
540         void *vip;
541 {
542         struct ip *ip = vip;
543         struct udphdr *uh;
544         struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
545         struct in_addr faddr;
546         struct inpcb *inp;
547
548         faddr = ((struct sockaddr_in *)sa)->sin_addr;
549         if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
550                 return;
551
552         /*
553          * Redirects don't need to be handled up here.
554          */
555         if (PRC_IS_REDIRECT(cmd))
556                 return;
557         /*
558          * Hostdead is ugly because it goes linearly through all PCBs.
559          * XXX: We never get this from ICMP, otherwise it makes an
560          * excellent DoS attack on machines with many connections.
561          */
562         if (cmd == PRC_HOSTDEAD)
563                 ip = 0;
564         else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
565                 return;
566         if (ip) {
567                 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
568                 INP_INFO_RLOCK(&udbinfo);
569                 inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
570                     ip->ip_src, uh->uh_sport, 0, NULL);
571                 if (inp != NULL) {
572                         INP_LOCK(inp);
573                         if (inp->inp_socket != NULL) {
574                                 (*notify)(inp, inetctlerrmap[cmd]);
575                         }
576                         INP_UNLOCK(inp);
577                 }
578                 INP_INFO_RUNLOCK(&udbinfo);
579         } else
580                 in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify);
581 }
582
583 static int
584 udp_pcblist(SYSCTL_HANDLER_ARGS)
585 {
586         int error, i, n;
587         struct inpcb *inp, **inp_list;
588         inp_gen_t gencnt;
589         struct xinpgen xig;
590
591         /*
592          * The process of preparing the TCB list is too time-consuming and
593          * resource-intensive to repeat twice on every request.
594          */
595         if (req->oldptr == 0) {
596                 n = udbinfo.ipi_count;
597                 req->oldidx = 2 * (sizeof xig)
598                         + (n + n/8) * sizeof(struct xinpcb);
599                 return 0;
600         }
601
602         if (req->newptr != 0)
603                 return EPERM;
604
605         /*
606          * OK, now we're committed to doing something.
607          */
608         INP_INFO_RLOCK(&udbinfo);
609         gencnt = udbinfo.ipi_gencnt;
610         n = udbinfo.ipi_count;
611         INP_INFO_RUNLOCK(&udbinfo);
612
613         error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
614                 + n * sizeof(struct xinpcb));
615         if (error != 0)
616                 return (error);
617
618         xig.xig_len = sizeof xig;
619         xig.xig_count = n;
620         xig.xig_gen = gencnt;
621         xig.xig_sogen = so_gencnt;
622         error = SYSCTL_OUT(req, &xig, sizeof xig);
623         if (error)
624                 return error;
625
626         inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
627         if (inp_list == 0)
628                 return ENOMEM;
629
630         INP_INFO_RLOCK(&udbinfo);
631         for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n;
632              inp = LIST_NEXT(inp, inp_list)) {
633                 INP_LOCK(inp);
634                 if (inp->inp_gencnt <= gencnt &&
635                     cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
636                         inp_list[i++] = inp;
637                 INP_UNLOCK(inp);
638         }
639         INP_INFO_RUNLOCK(&udbinfo);
640         n = i;
641
642         error = 0;
643         for (i = 0; i < n; i++) {
644                 inp = inp_list[i];
645                 INP_LOCK(inp);
646                 if (inp->inp_gencnt <= gencnt) {
647                         struct xinpcb xi;
648                         bzero(&xi, sizeof(xi));
649                         xi.xi_len = sizeof xi;
650                         /* XXX should avoid extra copy */
651                         bcopy(inp, &xi.xi_inp, sizeof *inp);
652                         if (inp->inp_socket)
653                                 sotoxsocket(inp->inp_socket, &xi.xi_socket);
654                         xi.xi_inp.inp_gencnt = inp->inp_gencnt;
655                         INP_UNLOCK(inp);
656                         error = SYSCTL_OUT(req, &xi, sizeof xi);
657                 } else
658                         INP_UNLOCK(inp);        
659         }
660         if (!error) {
661                 /*
662                  * Give the user an updated idea of our state.
663                  * If the generation differs from what we told
664                  * her before, she knows that something happened
665                  * while we were processing this request, and it
666                  * might be necessary to retry.
667                  */
668                 INP_INFO_RLOCK(&udbinfo);
669                 xig.xig_gen = udbinfo.ipi_gencnt;
670                 xig.xig_sogen = so_gencnt;
671                 xig.xig_count = udbinfo.ipi_count;
672                 INP_INFO_RUNLOCK(&udbinfo);
673                 error = SYSCTL_OUT(req, &xig, sizeof xig);
674         }
675         free(inp_list, M_TEMP);
676         return error;
677 }
678
679 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
680             udp_pcblist, "S,xinpcb", "List of active UDP sockets");
681
682 static int
683 udp_getcred(SYSCTL_HANDLER_ARGS)
684 {
685         struct xucred xuc;
686         struct sockaddr_in addrs[2];
687         struct inpcb *inp;
688         int error;
689
690         error = suser_cred(req->td->td_ucred, SUSER_ALLOWJAIL);
691         if (error)
692                 return (error);
693         error = SYSCTL_IN(req, addrs, sizeof(addrs));
694         if (error)
695                 return (error);
696         INP_INFO_RLOCK(&udbinfo);
697         inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
698                                 addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
699         if (inp == NULL || inp->inp_socket == NULL) {
700                 error = ENOENT;
701                 goto out;
702         }
703         error = cr_canseesocket(req->td->td_ucred, inp->inp_socket);
704         if (error)
705                 goto out;
706         cru2x(inp->inp_socket->so_cred, &xuc);
707 out:
708         INP_INFO_RUNLOCK(&udbinfo);
709         if (error == 0)
710                 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
711         return (error);
712 }
713
714 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
715     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
716     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
717
718 static int
719 udp_output(inp, m, addr, control, td)
720         register struct inpcb *inp;
721         struct mbuf *m;
722         struct sockaddr *addr;
723         struct mbuf *control;
724         struct thread *td;
725 {
726         register struct udpiphdr *ui;
727         register int len = m->m_pkthdr.len;
728         struct in_addr faddr, laddr;
729         struct cmsghdr *cm;
730         struct sockaddr_in *sin, src;
731         int error = 0;
732         int ipflags;
733         u_short fport, lport;
734         int unlock_udbinfo;
735
736         /*
737          * udp_output() may need to temporarily bind or connect the current
738          * inpcb.  As such, we don't know up front what inpcb locks we will
739          * need.  Do any work to decide what is needed up front before
740          * acquiring locks.
741          */
742         if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
743                 if (control)
744                         m_freem(control);
745                 m_freem(m);
746                 return EMSGSIZE;
747         }
748
749         src.sin_family = 0;
750         if (control != NULL) {
751                 /*
752                  * XXX: Currently, we assume all the optional information
753                  * is stored in a single mbuf.
754                  */
755                 if (control->m_next) {
756                         m_freem(control);
757                         m_freem(m);
758                         return EINVAL;
759                 }
760                 for (; control->m_len > 0;
761                     control->m_data += CMSG_ALIGN(cm->cmsg_len),
762                     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
763                         cm = mtod(control, struct cmsghdr *);
764                         if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 ||
765                             cm->cmsg_len > control->m_len) {
766                                 error = EINVAL;
767                                 break;
768                         }
769                         if (cm->cmsg_level != IPPROTO_IP)
770                                 continue;
771
772                         switch (cm->cmsg_type) {
773                         case IP_SENDSRCADDR:
774                                 if (cm->cmsg_len !=
775                                     CMSG_LEN(sizeof(struct in_addr))) {
776                                         error = EINVAL;
777                                         break;
778                                 }
779                                 bzero(&src, sizeof(src));
780                                 src.sin_family = AF_INET;
781                                 src.sin_len = sizeof(src);
782                                 src.sin_port = inp->inp_lport;
783                                 src.sin_addr = *(struct in_addr *)CMSG_DATA(cm);
784                                 break;
785                         default:
786                                 error = ENOPROTOOPT;
787                                 break;
788                         }
789                         if (error)
790                                 break;
791                 }
792                 m_freem(control);
793         }
794         if (error) {
795                 m_freem(m);
796                 return error;
797         }
798
799         if (src.sin_family == AF_INET ||
800             addr != NULL) {
801                 INP_INFO_WLOCK(&udbinfo);
802                 unlock_udbinfo = 1;
803         } else
804                 unlock_udbinfo = 0;
805         INP_LOCK(inp);
806
807 #ifdef MAC
808         mac_create_mbuf_from_inpcb(inp, m);
809 #endif
810
811         /*
812          * If the IP_SENDSRCADDR control message was specified, override the
813          * source address for this datagram. Its use is invalidated if the
814          * address thus specified is incomplete or clobbers other inpcbs.
815          */
816         laddr = inp->inp_laddr;
817         lport = inp->inp_lport;
818         if (src.sin_family == AF_INET) {
819                 if ((lport == 0) ||
820                     (laddr.s_addr == INADDR_ANY &&
821                      src.sin_addr.s_addr == INADDR_ANY)) {
822                         error = EINVAL;
823                         goto release;
824                 }
825                 error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
826                     &laddr.s_addr, &lport, td->td_ucred);
827                 if (error)
828                         goto release;
829         }
830
831         if (addr) {
832                 sin = (struct sockaddr_in *)addr;
833                 if (jailed(td->td_ucred))
834                         prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
835                 if (inp->inp_faddr.s_addr != INADDR_ANY) {
836                         error = EISCONN;
837                         goto release;
838                 }
839                 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport,
840                     &faddr.s_addr, &fport, NULL, td->td_ucred);
841                 if (error)
842                         goto release;
843
844                 /* Commit the local port if newly assigned. */
845                 if (inp->inp_laddr.s_addr == INADDR_ANY &&
846                     inp->inp_lport == 0) {
847                         /*
848                          * Remember addr if jailed, to prevent rebinding.
849                          */
850                         if (jailed(td->td_ucred))
851                                 inp->inp_laddr = laddr;
852                         inp->inp_lport = lport;
853                         if (in_pcbinshash(inp) != 0) {
854                                 inp->inp_lport = 0;
855                                 error = EAGAIN;
856                                 goto release;
857                         }
858                         inp->inp_flags |= INP_ANONPORT;
859                 }
860         } else {
861                 faddr = inp->inp_faddr;
862                 fport = inp->inp_fport;
863                 if (faddr.s_addr == INADDR_ANY) {
864                         error = ENOTCONN;
865                         goto release;
866                 }
867         }
868
869         /*
870          * Calculate data length and get a mbuf for UDP, IP, and possible
871          * link-layer headers.  Immediate slide the data pointer back forward
872          * since we won't use that space at this layer.
873          */
874         M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT);
875         if (m == NULL) {
876                 error = ENOBUFS;
877                 goto release;
878         }
879         m->m_data += max_linkhdr;
880         m->m_len -= max_linkhdr;
881         m->m_pkthdr.len -= max_linkhdr;
882
883         /*
884          * Fill in mbuf with extended UDP header
885          * and addresses and length put into network format.
886          */
887         ui = mtod(m, struct udpiphdr *);
888         bzero(ui->ui_x1, sizeof(ui->ui_x1));    /* XXX still needed? */
889         ui->ui_pr = IPPROTO_UDP;
890         ui->ui_src = laddr;
891         ui->ui_dst = faddr;
892         ui->ui_sport = lport;
893         ui->ui_dport = fport;
894         ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
895
896         /*
897          * Set the Don't Fragment bit in the IP header.
898          */
899         if (inp->inp_flags & INP_DONTFRAG) {
900                 struct ip *ip;
901                 ip = (struct ip *)&ui->ui_i;
902                 ip->ip_off |= IP_DF;
903         }
904
905         ipflags = 0;
906         if (inp->inp_socket->so_options & SO_DONTROUTE)
907                 ipflags |= IP_ROUTETOIF;
908         if (inp->inp_socket->so_options & SO_BROADCAST)
909                 ipflags |= IP_ALLOWBROADCAST;
910         if (inp->inp_flags & INP_ONESBCAST)
911                 ipflags |= IP_SENDONES;
912
913         /*
914          * Set up checksum and output datagram.
915          */
916         if (udpcksum) {
917                 if (inp->inp_flags & INP_ONESBCAST)
918                         faddr.s_addr = INADDR_BROADCAST;
919                 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
920                     htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
921                 m->m_pkthdr.csum_flags = CSUM_UDP;
922                 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
923         } else {
924                 ui->ui_sum = 0;
925         }
926         ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
927         ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;    /* XXX */
928         ((struct ip *)ui)->ip_tos = inp->inp_ip_tos;    /* XXX */
929         udpstat.udps_opackets++;
930
931         if (unlock_udbinfo)
932                 INP_INFO_WUNLOCK(&udbinfo);
933         error = ip_output(m, inp->inp_options, NULL, ipflags,
934             inp->inp_moptions, inp);
935         INP_UNLOCK(inp);
936         return (error);
937
938 release:
939         INP_UNLOCK(inp);
940         if (unlock_udbinfo)
941                 INP_INFO_WUNLOCK(&udbinfo);
942         m_freem(m);
943         return (error);
944 }
945
946 u_long  udp_sendspace = 9216;           /* really max datagram size */
947                                         /* 40 1K datagrams */
948 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
949     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
950
951 u_long  udp_recvspace = 40 * (1024 +
952 #ifdef INET6
953                                       sizeof(struct sockaddr_in6)
954 #else
955                                       sizeof(struct sockaddr_in)
956 #endif
957                                       );
958 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
959     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
960
961 static int
962 udp_abort(struct socket *so)
963 {
964         struct inpcb *inp;
965
966         INP_INFO_WLOCK(&udbinfo);
967         inp = sotoinpcb(so);
968         if (inp == 0) {
969                 INP_INFO_WUNLOCK(&udbinfo);
970                 return EINVAL;  /* ??? possible? panic instead? */
971         }
972         INP_LOCK(inp);
973         soisdisconnected(so);
974         in_pcbdetach(inp);
975         INP_INFO_WUNLOCK(&udbinfo);
976         return 0;
977 }
978
979 static int
980 udp_attach(struct socket *so, int proto, struct thread *td)
981 {
982         struct inpcb *inp;
983         int error;
984
985         INP_INFO_WLOCK(&udbinfo);
986         inp = sotoinpcb(so);
987         if (inp != 0) {
988                 INP_INFO_WUNLOCK(&udbinfo);
989                 return EINVAL;
990         }
991         error = soreserve(so, udp_sendspace, udp_recvspace);
992         if (error) {
993                 INP_INFO_WUNLOCK(&udbinfo);
994                 return error;
995         }
996         error = in_pcballoc(so, &udbinfo);
997         if (error) {
998                 INP_INFO_WUNLOCK(&udbinfo);
999                 return error;
1000         }
1001
1002         inp = (struct inpcb *)so->so_pcb;
1003         INP_INFO_WUNLOCK(&udbinfo);
1004         inp->inp_vflag |= INP_IPV4;
1005         inp->inp_ip_ttl = ip_defttl;
1006         INP_UNLOCK(inp);
1007         return 0;
1008 }
1009
1010 static int
1011 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1012 {
1013         struct inpcb *inp;
1014         int error;
1015
1016         INP_INFO_WLOCK(&udbinfo);
1017         inp = sotoinpcb(so);
1018         if (inp == 0) {
1019                 INP_INFO_WUNLOCK(&udbinfo);
1020                 return EINVAL;
1021         }
1022         INP_LOCK(inp);
1023         error = in_pcbbind(inp, nam, td->td_ucred);
1024         INP_UNLOCK(inp);
1025         INP_INFO_WUNLOCK(&udbinfo);
1026         return error;
1027 }
1028
1029 static int
1030 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1031 {
1032         struct inpcb *inp;
1033         int error;
1034         struct sockaddr_in *sin;
1035
1036         INP_INFO_WLOCK(&udbinfo);
1037         inp = sotoinpcb(so);
1038         if (inp == 0) {
1039                 INP_INFO_WUNLOCK(&udbinfo);
1040                 return EINVAL;
1041         }
1042         INP_LOCK(inp);
1043         if (inp->inp_faddr.s_addr != INADDR_ANY) {
1044                 INP_UNLOCK(inp);
1045                 INP_INFO_WUNLOCK(&udbinfo);
1046                 return EISCONN;
1047         }
1048         sin = (struct sockaddr_in *)nam;
1049         if (jailed(td->td_ucred))
1050                 prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
1051         error = in_pcbconnect(inp, nam, td->td_ucred);
1052         if (error == 0)
1053                 soisconnected(so);
1054         INP_UNLOCK(inp);
1055         INP_INFO_WUNLOCK(&udbinfo);
1056         return error;
1057 }
1058
1059 static int
1060 udp_detach(struct socket *so)
1061 {
1062         struct inpcb *inp;
1063
1064         INP_INFO_WLOCK(&udbinfo);
1065         inp = sotoinpcb(so);
1066         if (inp == 0) {
1067                 INP_INFO_WUNLOCK(&udbinfo);
1068                 return EINVAL;
1069         }
1070         INP_LOCK(inp);
1071         in_pcbdetach(inp);
1072         INP_INFO_WUNLOCK(&udbinfo);
1073         return 0;
1074 }
1075
1076 static int
1077 udp_disconnect(struct socket *so)
1078 {
1079         struct inpcb *inp;
1080
1081         INP_INFO_WLOCK(&udbinfo);
1082         inp = sotoinpcb(so);
1083         if (inp == 0) {
1084                 INP_INFO_WUNLOCK(&udbinfo);
1085                 return EINVAL;
1086         }
1087         INP_LOCK(inp);
1088         if (inp->inp_faddr.s_addr == INADDR_ANY) {
1089                 INP_INFO_WUNLOCK(&udbinfo);
1090                 INP_UNLOCK(inp);
1091                 return ENOTCONN;
1092         }
1093
1094         in_pcbdisconnect(inp);
1095         inp->inp_laddr.s_addr = INADDR_ANY;
1096         INP_UNLOCK(inp);
1097         INP_INFO_WUNLOCK(&udbinfo);
1098         so->so_state &= ~SS_ISCONNECTED;                /* XXX */
1099         return 0;
1100 }
1101
1102 static int
1103 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1104             struct mbuf *control, struct thread *td)
1105 {
1106         struct inpcb *inp;
1107
1108         inp = sotoinpcb(so);
1109         return udp_output(inp, m, addr, control, td);
1110 }
1111
1112 int
1113 udp_shutdown(struct socket *so)
1114 {
1115         struct inpcb *inp;
1116
1117         INP_INFO_RLOCK(&udbinfo);
1118         inp = sotoinpcb(so);
1119         if (inp == 0) {
1120                 INP_INFO_RUNLOCK(&udbinfo);
1121                 return EINVAL;
1122         }
1123         INP_LOCK(inp);
1124         INP_INFO_RUNLOCK(&udbinfo);
1125         socantsendmore(so);
1126         INP_UNLOCK(inp);
1127         return 0;
1128 }
1129
1130 /*
1131  * This is the wrapper function for in_setsockaddr.  We just pass down
1132  * the pcbinfo for in_setsockaddr to lock.  We don't want to do the locking
1133  * here because in_setsockaddr will call malloc and might block.
1134  */
1135 static int
1136 udp_sockaddr(struct socket *so, struct sockaddr **nam)
1137 {
1138         return (in_setsockaddr(so, nam, &udbinfo));
1139 }
1140
1141 /*
1142  * This is the wrapper function for in_setpeeraddr.  We just pass down
1143  * the pcbinfo for in_setpeeraddr to lock.
1144  */
1145 static int
1146 udp_peeraddr(struct socket *so, struct sockaddr **nam)
1147 {
1148         return (in_setpeeraddr(so, nam, &udbinfo));
1149 }
1150
1151 struct pr_usrreqs udp_usrreqs = {
1152         .pru_abort =            udp_abort,
1153         .pru_attach =           udp_attach,
1154         .pru_bind =             udp_bind,
1155         .pru_connect =          udp_connect,
1156         .pru_control =          in_control,
1157         .pru_detach =           udp_detach,
1158         .pru_disconnect =       udp_disconnect,
1159         .pru_peeraddr =         udp_peeraddr,
1160         .pru_send =             udp_send,
1161         .pru_shutdown =         udp_shutdown,
1162         .pru_sockaddr =         udp_sockaddr,
1163         .pru_sosetlabel =       in_pcbsosetlabel
1164 };