]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/udp_usrreq.c
This commit was generated by cvs2svn to compensate for changes in r167612,
[FreeBSD/FreeBSD.git] / sys / netinet / udp_usrreq.c
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3  *      The Regents of the University of California.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 4. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  *      @(#)udp_usrreq.c        8.6 (Berkeley) 5/23/95
31  * $FreeBSD$
32  */
33
34 #include "opt_ipfw.h"
35 #include "opt_ipsec.h"
36 #include "opt_inet6.h"
37 #include "opt_mac.h"
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/domain.h>
42 #include <sys/eventhandler.h>
43 #include <sys/jail.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mbuf.h>
48 #include <sys/priv.h>
49 #include <sys/proc.h>
50 #include <sys/protosw.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/sx.h>
55 #include <sys/sysctl.h>
56 #include <sys/syslog.h>
57
58 #include <vm/uma.h>
59
60 #include <net/if.h>
61 #include <net/route.h>
62
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in_pcb.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip.h>
68 #ifdef INET6
69 #include <netinet/ip6.h>
70 #endif
71 #include <netinet/ip_icmp.h>
72 #include <netinet/icmp_var.h>
73 #include <netinet/ip_var.h>
74 #include <netinet/ip_options.h>
75 #ifdef INET6
76 #include <netinet6/ip6_var.h>
77 #endif
78 #include <netinet/udp.h>
79 #include <netinet/udp_var.h>
80
81 #ifdef FAST_IPSEC
82 #include <netipsec/ipsec.h>
83 #endif
84
85 #ifdef IPSEC
86 #include <netinet6/ipsec.h>
87 #endif
88
89 #include <machine/in_cksum.h>
90
91 #include <security/mac/mac_framework.h>
92
93 /*
94  * UDP protocol implementation.
95  * Per RFC 768, August, 1980.
96  */
97
98 /*
99  * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
100  * removes the only data integrity mechanism for packets and malformed
101  * packets that would otherwise be discarded by bad checksums may cause
102  * problems (especially for NFS data blocks).
103  */
104 static int      udpcksum = 1;
105 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udpcksum,
106     0, "");
107
108 int     udp_log_in_vain = 0;
109 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
110     &udp_log_in_vain, 0, "Log all incoming UDP packets");
111
112 static int      blackhole = 0;
113 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, &blackhole, 0,
114     "Do not send port unreachables for refused connects");
115
116 static int      strict_mcast_mship = 0;
117 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
118     &strict_mcast_mship, 0, "Only send multicast to member sockets");
119
120 struct  inpcbhead udb;          /* from udp_var.h */
121 struct  inpcbinfo udbinfo;
122
123 #ifndef UDBHASHSIZE
124 #define UDBHASHSIZE 16
125 #endif
126
127 struct  udpstat udpstat;        /* from udp_var.h */
128 SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW, &udpstat,
129     udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
130
131 static void     udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
132                     int off, struct sockaddr_in *udp_in);
133
134 static void     udp_detach(struct socket *so);
135 static int      udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
136                     struct mbuf *, struct thread *);
137
138 static void
139 udp_zone_change(void *tag)
140 {
141
142         uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
143 }
144
145 static int
146 udp_inpcb_init(void *mem, int size, int flags)
147 {
148         struct inpcb *inp = mem;
149
150         INP_LOCK_INIT(inp, "inp", "udpinp");
151         return (0);
152 }
153
154 void
155 udp_init()
156 {
157         INP_INFO_LOCK_INIT(&udbinfo, "udp");
158         LIST_INIT(&udb);
159         udbinfo.listhead = &udb;
160         udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
161         udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
162             &udbinfo.porthashmask);
163         udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL,
164             NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
165         uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
166         EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
167                 EVENTHANDLER_PRI_ANY);
168 }
169
170 void
171 udp_input(struct mbuf *m, int off)
172 {
173         int iphlen = off;
174         struct ip *ip;
175         struct udphdr *uh;
176         struct inpcb *inp;
177         int len;
178         struct ip save_ip;
179         struct sockaddr_in udp_in;
180 #ifdef IPFIREWALL_FORWARD
181         struct m_tag *fwd_tag;
182 #endif
183
184         udpstat.udps_ipackets++;
185
186         /*
187          * Strip IP options, if any; should skip this, make available to
188          * user, and use on returned packets, but we don't yet have a way to
189          * check the checksum with options still present.
190          */
191         if (iphlen > sizeof (struct ip)) {
192                 ip_stripoptions(m, (struct mbuf *)0);
193                 iphlen = sizeof(struct ip);
194         }
195
196         /*
197          * Get IP and UDP header together in first mbuf.
198          */
199         ip = mtod(m, struct ip *);
200         if (m->m_len < iphlen + sizeof(struct udphdr)) {
201                 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
202                         udpstat.udps_hdrops++;
203                         return;
204                 }
205                 ip = mtod(m, struct ip *);
206         }
207         uh = (struct udphdr *)((caddr_t)ip + iphlen);
208
209         /*
210          * Destination port of 0 is illegal, based on RFC768.
211          */
212         if (uh->uh_dport == 0)
213                 goto badunlocked;
214
215         /*
216          * Construct sockaddr format source address.  Stuff source address
217          * and datagram in user buffer.
218          */
219         bzero(&udp_in, sizeof(udp_in));
220         udp_in.sin_len = sizeof(udp_in);
221         udp_in.sin_family = AF_INET;
222         udp_in.sin_port = uh->uh_sport;
223         udp_in.sin_addr = ip->ip_src;
224
225         /*
226          * Make mbuf data length reflect UDP length.
227          * If not enough data to reflect UDP length, drop.
228          */
229         len = ntohs((u_short)uh->uh_ulen);
230         if (ip->ip_len != len) {
231                 if (len > ip->ip_len || len < sizeof(struct udphdr)) {
232                         udpstat.udps_badlen++;
233                         goto badunlocked;
234                 }
235                 m_adj(m, len - ip->ip_len);
236                 /* ip->ip_len = len; */
237         }
238
239         /*
240          * Save a copy of the IP header in case we want restore it for
241          * sending an ICMP error message in response.
242          */
243         if (!blackhole)
244                 save_ip = *ip;
245
246         /*
247          * Checksum extended UDP header and data.
248          */
249         if (uh->uh_sum) {
250                 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
251                         if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
252                                 uh->uh_sum = m->m_pkthdr.csum_data;
253                         else
254                                 uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
255                                     ip->ip_dst.s_addr, htonl((u_short)len +
256                                     m->m_pkthdr.csum_data + IPPROTO_UDP));
257                         uh->uh_sum ^= 0xffff;
258                 } else {
259                         char b[9];
260                         bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
261                         bzero(((struct ipovly *)ip)->ih_x1, 9);
262                         ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
263                         uh->uh_sum = in_cksum(m, len + sizeof (struct ip));
264                         bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
265                 }
266                 if (uh->uh_sum) {
267                         udpstat.udps_badsum++;
268                         m_freem(m);
269                         return;
270                 }
271         } else
272                 udpstat.udps_nosum++;
273
274 #ifdef IPFIREWALL_FORWARD
275         /*
276          * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
277          */
278         fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
279         if (fwd_tag != NULL) {
280                 struct sockaddr_in *next_hop;
281
282                 /*
283                  * Do the hack.
284                  */
285                 next_hop = (struct sockaddr_in *)(fwd_tag + 1);
286                 ip->ip_dst = next_hop->sin_addr;
287                 uh->uh_dport = ntohs(next_hop->sin_port);
288
289                 /*
290                  * Remove the tag from the packet.  We don't need it anymore.
291                  */
292                 m_tag_delete(m, fwd_tag);
293         }
294 #endif
295
296         INP_INFO_RLOCK(&udbinfo);
297
298         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
299             in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
300                 struct inpcb *last;
301
302                 /*
303                  * Deliver a multicast or broadcast datagram to *all* sockets
304                  * for which the local and remote addresses and ports match
305                  * those of the incoming datagram.  This allows more than one
306                  * process to receive multi/broadcasts on the same port.
307                  * (This really ought to be done for unicast datagrams as
308                  * well, but that would cause problems with existing
309                  * applications that open both address-specific sockets and a
310                  * wildcard socket listening to the same port -- they would
311                  * end up receiving duplicates of every unicast datagram.
312                  * Those applications open the multiple sockets to overcome
313                  * an inadequacy of the UDP socket interface, but for
314                  * backwards compatibility we avoid the problem here rather
315                  * than fixing the interface.  Maybe 4.5BSD will remedy
316                  * this?)
317                  */
318                 last = NULL;
319                 LIST_FOREACH(inp, &udb, inp_list) {
320                         if (inp->inp_lport != uh->uh_dport)
321                                 continue;
322 #ifdef INET6
323                         if ((inp->inp_vflag & INP_IPV4) == 0)
324                                 continue;
325 #endif
326                         if (inp->inp_laddr.s_addr != INADDR_ANY) {
327                                 if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
328                                         continue;
329                         }
330                         if (inp->inp_faddr.s_addr != INADDR_ANY) {
331                                 if (inp->inp_faddr.s_addr !=
332                                     ip->ip_src.s_addr ||
333                                     inp->inp_fport != uh->uh_sport)
334                                         continue;
335                         }
336
337                         /*
338                          * Check multicast packets to make sure they are only
339                          * sent to sockets with multicast memberships for the
340                          * packet's destination address and arrival interface
341                          */
342 #define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)])
343 #define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships)
344                         INP_LOCK(inp);
345                         if (strict_mcast_mship && inp->inp_moptions != NULL) {
346                                 int mship, foundmship = 0;
347
348                                 for (mship = 0; mship < NMSHIPS(inp);
349                                     mship++) {
350                                         if (MSHIP(inp, mship)->inm_addr.s_addr
351                                             == ip->ip_dst.s_addr &&
352                                             MSHIP(inp, mship)->inm_ifp
353                                             == m->m_pkthdr.rcvif) {
354                                                 foundmship = 1;
355                                                 break;
356                                         }
357                                 }
358                                 if (foundmship == 0) {
359                                         INP_UNLOCK(inp);
360                                         continue;
361                                 }
362                         }
363 #undef NMSHIPS
364 #undef MSHIP
365                         if (last != NULL) {
366                                 struct mbuf *n;
367
368                                 n = m_copy(m, 0, M_COPYALL);
369                                 if (n != NULL)
370                                         udp_append(last, ip, n, iphlen +
371                                             sizeof(struct udphdr), &udp_in);
372                                 INP_UNLOCK(last);
373                         }
374                         last = inp;
375                         /*
376                          * Don't look for additional matches if this one does
377                          * not have either the SO_REUSEPORT or SO_REUSEADDR
378                          * socket options set.  This heuristic avoids
379                          * searching through all pcbs in the common case of a
380                          * non-shared port.  It assumes that an application
381                          * will never clear these options after setting them.
382                          */
383                         if ((last->inp_socket->so_options &
384                             (SO_REUSEPORT|SO_REUSEADDR)) == 0)
385                                 break;
386                 }
387
388                 if (last == NULL) {
389                         /*
390                          * No matching pcb found; discard datagram.  (No need
391                          * to send an ICMP Port Unreachable for a broadcast
392                          * or multicast datgram.)
393                          */
394                         udpstat.udps_noportbcast++;
395                         goto badheadlocked;
396                 }
397                 udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
398                     &udp_in);
399                 INP_UNLOCK(last);
400                 INP_INFO_RUNLOCK(&udbinfo);
401                 return;
402         }
403
404         /*
405          * Locate pcb for datagram.
406          */
407         inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
408             ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
409         if (inp == NULL) {
410                 if (udp_log_in_vain) {
411                         char buf[4*sizeof "123"];
412
413                         strcpy(buf, inet_ntoa(ip->ip_dst));
414                         log(LOG_INFO,
415                             "Connection attempt to UDP %s:%d from %s:%d\n",
416                             buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
417                             ntohs(uh->uh_sport));
418                 }
419                 udpstat.udps_noport++;
420                 if (m->m_flags & (M_BCAST | M_MCAST)) {
421                         udpstat.udps_noportbcast++;
422                         goto badheadlocked;
423                 }
424                 if (blackhole)
425                         goto badheadlocked;
426                 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
427                         goto badheadlocked;
428                 *ip = save_ip;
429                 ip->ip_len += iphlen;
430                 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
431                 INP_INFO_RUNLOCK(&udbinfo);
432                 return;
433         }
434
435         /*
436          * Check the minimum TTL for socket.
437          */
438         INP_LOCK(inp);
439         if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
440                 goto badheadlocked;
441         udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in);
442         INP_UNLOCK(inp);
443         INP_INFO_RUNLOCK(&udbinfo);
444         return;
445
446 badheadlocked:
447         if (inp)
448                 INP_UNLOCK(inp);
449         INP_INFO_RUNLOCK(&udbinfo);
450 badunlocked:
451         m_freem(m);
452 }
453
454 /*
455  * Subroutine of udp_input(), which appends the provided mbuf chain to the
456  * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
457  * contains the source address.  If the socket ends up being an IPv6 socket,
458  * udp_append() will convert to a sockaddr_in6 before passing the address
459  * into the socket code.
460  */
461 static void
462 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
463     struct sockaddr_in *udp_in)
464 {
465         struct sockaddr *append_sa;
466         struct socket *so;
467         struct mbuf *opts = 0;
468 #ifdef INET6
469         struct sockaddr_in6 udp_in6;
470 #endif
471
472         INP_LOCK_ASSERT(inp);
473
474 #if defined(IPSEC) || defined(FAST_IPSEC)
475         /* check AH/ESP integrity. */
476         if (ipsec4_in_reject(n, inp)) {
477 #ifdef IPSEC
478                 ipsecstat.in_polvio++;
479 #endif
480                 m_freem(n);
481                 return;
482         }
483 #endif /*IPSEC || FAST_IPSEC*/
484 #ifdef MAC
485         if (mac_check_inpcb_deliver(inp, n) != 0) {
486                 m_freem(n);
487                 return;
488         }
489 #endif
490         if (inp->inp_flags & INP_CONTROLOPTS ||
491             inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
492 #ifdef INET6
493                 if (inp->inp_vflag & INP_IPV6) {
494                         int savedflags;
495
496                         savedflags = inp->inp_flags;
497                         inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
498                         ip6_savecontrol(inp, n, &opts);
499                         inp->inp_flags = savedflags;
500                 } else
501 #endif
502                         ip_savecontrol(inp, &opts, ip, n);
503         }
504 #ifdef INET6
505         if (inp->inp_vflag & INP_IPV6) {
506                 bzero(&udp_in6, sizeof(udp_in6));
507                 udp_in6.sin6_len = sizeof(udp_in6);
508                 udp_in6.sin6_family = AF_INET6;
509                 in6_sin_2_v4mapsin6(udp_in, &udp_in6);
510                 append_sa = (struct sockaddr *)&udp_in6;
511         } else
512 #endif
513                 append_sa = (struct sockaddr *)udp_in;
514         m_adj(n, off);
515
516         so = inp->inp_socket;
517         SOCKBUF_LOCK(&so->so_rcv);
518         if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
519                 m_freem(n);
520                 if (opts)
521                         m_freem(opts);
522                 udpstat.udps_fullsock++;
523                 SOCKBUF_UNLOCK(&so->so_rcv);
524         } else
525                 sorwakeup_locked(so);
526 }
527
528 /*
529  * Notify a udp user of an asynchronous error; just wake up so that they can
530  * collect error status.
531  */
532 struct inpcb *
533 udp_notify(struct inpcb *inp, int errno)
534 {
535
536         inp->inp_socket->so_error = errno;
537         sorwakeup(inp->inp_socket);
538         sowwakeup(inp->inp_socket);
539         return (inp);
540 }
541
542 void
543 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
544 {
545         struct ip *ip = vip;
546         struct udphdr *uh;
547         struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
548         struct in_addr faddr;
549         struct inpcb *inp;
550
551         faddr = ((struct sockaddr_in *)sa)->sin_addr;
552         if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
553                 return;
554
555         /*
556          * Redirects don't need to be handled up here.
557          */
558         if (PRC_IS_REDIRECT(cmd))
559                 return;
560
561         /*
562          * Hostdead is ugly because it goes linearly through all PCBs.
563          *
564          * XXX: We never get this from ICMP, otherwise it makes an excellent
565          * DoS attack on machines with many connections.
566          */
567         if (cmd == PRC_HOSTDEAD)
568                 ip = 0;
569         else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
570                 return;
571         if (ip) {
572                 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
573                 INP_INFO_RLOCK(&udbinfo);
574                 inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
575                     ip->ip_src, uh->uh_sport, 0, NULL);
576                 if (inp != NULL) {
577                         INP_LOCK(inp);
578                         if (inp->inp_socket != NULL) {
579                                 (*notify)(inp, inetctlerrmap[cmd]);
580                         }
581                         INP_UNLOCK(inp);
582                 }
583                 INP_INFO_RUNLOCK(&udbinfo);
584         } else
585                 in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify);
586 }
587
588 static int
589 udp_pcblist(SYSCTL_HANDLER_ARGS)
590 {
591         int error, i, n;
592         struct inpcb *inp, **inp_list;
593         inp_gen_t gencnt;
594         struct xinpgen xig;
595
596         /*
597          * The process of preparing the TCB list is too time-consuming and
598          * resource-intensive to repeat twice on every request.
599          */
600         if (req->oldptr == 0) {
601                 n = udbinfo.ipi_count;
602                 req->oldidx = 2 * (sizeof xig)
603                         + (n + n/8) * sizeof(struct xinpcb);
604                 return (0);
605         }
606
607         if (req->newptr != 0)
608                 return (EPERM);
609
610         /*
611          * OK, now we're committed to doing something.
612          */
613         INP_INFO_RLOCK(&udbinfo);
614         gencnt = udbinfo.ipi_gencnt;
615         n = udbinfo.ipi_count;
616         INP_INFO_RUNLOCK(&udbinfo);
617
618         error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
619                 + n * sizeof(struct xinpcb));
620         if (error != 0)
621                 return (error);
622
623         xig.xig_len = sizeof xig;
624         xig.xig_count = n;
625         xig.xig_gen = gencnt;
626         xig.xig_sogen = so_gencnt;
627         error = SYSCTL_OUT(req, &xig, sizeof xig);
628         if (error)
629                 return (error);
630
631         inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
632         if (inp_list == 0)
633                 return (ENOMEM);
634
635         INP_INFO_RLOCK(&udbinfo);
636         for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n;
637              inp = LIST_NEXT(inp, inp_list)) {
638                 INP_LOCK(inp);
639                 if (inp->inp_gencnt <= gencnt &&
640                     cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
641                         inp_list[i++] = inp;
642                 INP_UNLOCK(inp);
643         }
644         INP_INFO_RUNLOCK(&udbinfo);
645         n = i;
646
647         error = 0;
648         for (i = 0; i < n; i++) {
649                 inp = inp_list[i];
650                 INP_LOCK(inp);
651                 if (inp->inp_gencnt <= gencnt) {
652                         struct xinpcb xi;
653                         bzero(&xi, sizeof(xi));
654                         xi.xi_len = sizeof xi;
655                         /* XXX should avoid extra copy */
656                         bcopy(inp, &xi.xi_inp, sizeof *inp);
657                         if (inp->inp_socket)
658                                 sotoxsocket(inp->inp_socket, &xi.xi_socket);
659                         xi.xi_inp.inp_gencnt = inp->inp_gencnt;
660                         INP_UNLOCK(inp);
661                         error = SYSCTL_OUT(req, &xi, sizeof xi);
662                 } else
663                         INP_UNLOCK(inp);
664         }
665         if (!error) {
666                 /*
667                  * Give the user an updated idea of our state.  If the
668                  * generation differs from what we told her before, she knows
669                  * that something happened while we were processing this
670                  * request, and it might be necessary to retry.
671                  */
672                 INP_INFO_RLOCK(&udbinfo);
673                 xig.xig_gen = udbinfo.ipi_gencnt;
674                 xig.xig_sogen = so_gencnt;
675                 xig.xig_count = udbinfo.ipi_count;
676                 INP_INFO_RUNLOCK(&udbinfo);
677                 error = SYSCTL_OUT(req, &xig, sizeof xig);
678         }
679         free(inp_list, M_TEMP);
680         return (error);
681 }
682
683 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
684     udp_pcblist, "S,xinpcb", "List of active UDP sockets");
685
686 static int
687 udp_getcred(SYSCTL_HANDLER_ARGS)
688 {
689         struct xucred xuc;
690         struct sockaddr_in addrs[2];
691         struct inpcb *inp;
692         int error;
693
694         error = priv_check_cred(req->td->td_ucred, PRIV_NETINET_GETCRED,
695             SUSER_ALLOWJAIL);
696         if (error)
697                 return (error);
698         error = SYSCTL_IN(req, addrs, sizeof(addrs));
699         if (error)
700                 return (error);
701         INP_INFO_RLOCK(&udbinfo);
702         inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
703                                 addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
704         if (inp == NULL || inp->inp_socket == NULL) {
705                 error = ENOENT;
706                 goto out;
707         }
708         error = cr_canseesocket(req->td->td_ucred, inp->inp_socket);
709         if (error)
710                 goto out;
711         cru2x(inp->inp_socket->so_cred, &xuc);
712 out:
713         INP_INFO_RUNLOCK(&udbinfo);
714         if (error == 0)
715                 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
716         return (error);
717 }
718
719 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
720     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
721     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
722
723 static int
724 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
725     struct mbuf *control, struct thread *td)
726 {
727         struct udpiphdr *ui;
728         int len = m->m_pkthdr.len;
729         struct in_addr faddr, laddr;
730         struct cmsghdr *cm;
731         struct sockaddr_in *sin, src;
732         int error = 0;
733         int ipflags;
734         u_short fport, lport;
735         int unlock_udbinfo;
736
737         /*
738          * udp_output() may need to temporarily bind or connect the current
739          * inpcb.  As such, we don't know up front what inpcb locks we will
740          * need.  Do any work to decide what is needed up front before
741          * acquiring locks.
742          */
743         if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
744                 if (control)
745                         m_freem(control);
746                 m_freem(m);
747                 return (EMSGSIZE);
748         }
749
750         src.sin_family = 0;
751         if (control != NULL) {
752                 /*
753                  * XXX: Currently, we assume all the optional information is
754                  * stored in a single mbuf.
755                  */
756                 if (control->m_next) {
757                         m_freem(control);
758                         m_freem(m);
759                         return (EINVAL);
760                 }
761                 for (; control->m_len > 0;
762                     control->m_data += CMSG_ALIGN(cm->cmsg_len),
763                     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
764                         cm = mtod(control, struct cmsghdr *);
765                         if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 ||
766                             cm->cmsg_len > control->m_len) {
767                                 error = EINVAL;
768                                 break;
769                         }
770                         if (cm->cmsg_level != IPPROTO_IP)
771                                 continue;
772
773                         switch (cm->cmsg_type) {
774                         case IP_SENDSRCADDR:
775                                 if (cm->cmsg_len !=
776                                     CMSG_LEN(sizeof(struct in_addr))) {
777                                         error = EINVAL;
778                                         break;
779                                 }
780                                 bzero(&src, sizeof(src));
781                                 src.sin_family = AF_INET;
782                                 src.sin_len = sizeof(src);
783                                 src.sin_port = inp->inp_lport;
784                                 src.sin_addr = *(struct in_addr *)CMSG_DATA(cm);
785                                 break;
786                         default:
787                                 error = ENOPROTOOPT;
788                                 break;
789                         }
790                         if (error)
791                                 break;
792                 }
793                 m_freem(control);
794         }
795         if (error) {
796                 m_freem(m);
797                 return (error);
798         }
799
800         if (src.sin_family == AF_INET || addr != NULL) {
801                 INP_INFO_WLOCK(&udbinfo);
802                 unlock_udbinfo = 1;
803         } else
804                 unlock_udbinfo = 0;
805         INP_LOCK(inp);
806
807 #ifdef MAC
808         mac_create_mbuf_from_inpcb(inp, m);
809 #endif
810
811         /*
812          * If the IP_SENDSRCADDR control message was specified, override the
813          * source address for this datagram. Its use is invalidated if the
814          * address thus specified is incomplete or clobbers other inpcbs.
815          */
816         laddr = inp->inp_laddr;
817         lport = inp->inp_lport;
818         if (src.sin_family == AF_INET) {
819                 if ((lport == 0) ||
820                     (laddr.s_addr == INADDR_ANY &&
821                      src.sin_addr.s_addr == INADDR_ANY)) {
822                         error = EINVAL;
823                         goto release;
824                 }
825                 error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
826                     &laddr.s_addr, &lport, td->td_ucred);
827                 if (error)
828                         goto release;
829         }
830
831         if (addr) {
832                 sin = (struct sockaddr_in *)addr;
833                 if (jailed(td->td_ucred))
834                         prison_remote_ip(td->td_ucred, 0,
835                             &sin->sin_addr.s_addr);
836                 if (inp->inp_faddr.s_addr != INADDR_ANY) {
837                         error = EISCONN;
838                         goto release;
839                 }
840                 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport,
841                     &faddr.s_addr, &fport, NULL, td->td_ucred);
842                 if (error)
843                         goto release;
844
845                 /* Commit the local port if newly assigned. */
846                 if (inp->inp_laddr.s_addr == INADDR_ANY &&
847                     inp->inp_lport == 0) {
848                         /*
849                          * Remember addr if jailed, to prevent rebinding.
850                          */
851                         if (jailed(td->td_ucred))
852                                 inp->inp_laddr = laddr;
853                         inp->inp_lport = lport;
854                         if (in_pcbinshash(inp) != 0) {
855                                 inp->inp_lport = 0;
856                                 error = EAGAIN;
857                                 goto release;
858                         }
859                         inp->inp_flags |= INP_ANONPORT;
860                 }
861         } else {
862                 faddr = inp->inp_faddr;
863                 fport = inp->inp_fport;
864                 if (faddr.s_addr == INADDR_ANY) {
865                         error = ENOTCONN;
866                         goto release;
867                 }
868         }
869
870         /*
871          * Calculate data length and get a mbuf for UDP, IP, and possible
872          * link-layer headers.  Immediate slide the data pointer back forward
873          * since we won't use that space at this layer.
874          */
875         M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT);
876         if (m == NULL) {
877                 error = ENOBUFS;
878                 goto release;
879         }
880         m->m_data += max_linkhdr;
881         m->m_len -= max_linkhdr;
882         m->m_pkthdr.len -= max_linkhdr;
883
884         /*
885          * Fill in mbuf with extended UDP header and addresses and length put
886          * into network format.
887          */
888         ui = mtod(m, struct udpiphdr *);
889         bzero(ui->ui_x1, sizeof(ui->ui_x1));    /* XXX still needed? */
890         ui->ui_pr = IPPROTO_UDP;
891         ui->ui_src = laddr;
892         ui->ui_dst = faddr;
893         ui->ui_sport = lport;
894         ui->ui_dport = fport;
895         ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
896
897         /*
898          * Set the Don't Fragment bit in the IP header.
899          */
900         if (inp->inp_flags & INP_DONTFRAG) {
901                 struct ip *ip;
902
903                 ip = (struct ip *)&ui->ui_i;
904                 ip->ip_off |= IP_DF;
905         }
906
907         ipflags = 0;
908         if (inp->inp_socket->so_options & SO_DONTROUTE)
909                 ipflags |= IP_ROUTETOIF;
910         if (inp->inp_socket->so_options & SO_BROADCAST)
911                 ipflags |= IP_ALLOWBROADCAST;
912         if (inp->inp_flags & INP_ONESBCAST)
913                 ipflags |= IP_SENDONES;
914
915         /*
916          * Set up checksum and output datagram.
917          */
918         if (udpcksum) {
919                 if (inp->inp_flags & INP_ONESBCAST)
920                         faddr.s_addr = INADDR_BROADCAST;
921                 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
922                     htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
923                 m->m_pkthdr.csum_flags = CSUM_UDP;
924                 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
925         } else
926                 ui->ui_sum = 0;
927         ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
928         ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;    /* XXX */
929         ((struct ip *)ui)->ip_tos = inp->inp_ip_tos;    /* XXX */
930         udpstat.udps_opackets++;
931
932         if (unlock_udbinfo)
933                 INP_INFO_WUNLOCK(&udbinfo);
934         error = ip_output(m, inp->inp_options, NULL, ipflags,
935             inp->inp_moptions, inp);
936         INP_UNLOCK(inp);
937         return (error);
938
939 release:
940         INP_UNLOCK(inp);
941         if (unlock_udbinfo)
942                 INP_INFO_WUNLOCK(&udbinfo);
943         m_freem(m);
944         return (error);
945 }
946
947 u_long  udp_sendspace = 9216;           /* really max datagram size */
948                                         /* 40 1K datagrams */
949 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
950     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
951
952 u_long  udp_recvspace = 40 * (1024 +
953 #ifdef INET6
954                                       sizeof(struct sockaddr_in6)
955 #else
956                                       sizeof(struct sockaddr_in)
957 #endif
958                                       );
959 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
960     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
961
962 static void
963 udp_abort(struct socket *so)
964 {
965         struct inpcb *inp;
966
967         inp = sotoinpcb(so);
968         KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
969         INP_INFO_WLOCK(&udbinfo);
970         INP_LOCK(inp);
971         if (inp->inp_faddr.s_addr != INADDR_ANY) {
972                 in_pcbdisconnect(inp);
973                 inp->inp_laddr.s_addr = INADDR_ANY;
974                 soisdisconnected(so);
975         }
976         INP_UNLOCK(inp);
977         INP_INFO_WUNLOCK(&udbinfo);
978 }
979
980 static int
981 udp_attach(struct socket *so, int proto, struct thread *td)
982 {
983         struct inpcb *inp;
984         int error;
985
986         inp = sotoinpcb(so);
987         KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
988         error = soreserve(so, udp_sendspace, udp_recvspace);
989         if (error)
990                 return (error);
991         INP_INFO_WLOCK(&udbinfo);
992         error = in_pcballoc(so, &udbinfo);
993         if (error) {
994                 INP_INFO_WUNLOCK(&udbinfo);
995                 return (error);
996         }
997
998         inp = (struct inpcb *)so->so_pcb;
999         INP_INFO_WUNLOCK(&udbinfo);
1000         inp->inp_vflag |= INP_IPV4;
1001         inp->inp_ip_ttl = ip_defttl;
1002         INP_UNLOCK(inp);
1003         return (0);
1004 }
1005
1006 static int
1007 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1008 {
1009         struct inpcb *inp;
1010         int error;
1011
1012         inp = sotoinpcb(so);
1013         KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
1014         INP_INFO_WLOCK(&udbinfo);
1015         INP_LOCK(inp);
1016         error = in_pcbbind(inp, nam, td->td_ucred);
1017         INP_UNLOCK(inp);
1018         INP_INFO_WUNLOCK(&udbinfo);
1019         return (error);
1020 }
1021
1022 static void
1023 udp_close(struct socket *so)
1024 {
1025         struct inpcb *inp;
1026
1027         inp = sotoinpcb(so);
1028         KASSERT(inp != NULL, ("udp_close: inp == NULL"));
1029         INP_INFO_WLOCK(&udbinfo);
1030         INP_LOCK(inp);
1031         if (inp->inp_faddr.s_addr != INADDR_ANY) {
1032                 in_pcbdisconnect(inp);
1033                 inp->inp_laddr.s_addr = INADDR_ANY;
1034                 soisdisconnected(so);
1035         }
1036         INP_UNLOCK(inp);
1037         INP_INFO_WUNLOCK(&udbinfo);
1038 }
1039
1040 static int
1041 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1042 {
1043         struct inpcb *inp;
1044         int error;
1045         struct sockaddr_in *sin;
1046
1047         inp = sotoinpcb(so);
1048         KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
1049         INP_INFO_WLOCK(&udbinfo);
1050         INP_LOCK(inp);
1051         if (inp->inp_faddr.s_addr != INADDR_ANY) {
1052                 INP_UNLOCK(inp);
1053                 INP_INFO_WUNLOCK(&udbinfo);
1054                 return (EISCONN);
1055         }
1056         sin = (struct sockaddr_in *)nam;
1057         if (jailed(td->td_ucred))
1058                 prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
1059         error = in_pcbconnect(inp, nam, td->td_ucred);
1060         if (error == 0)
1061                 soisconnected(so);
1062         INP_UNLOCK(inp);
1063         INP_INFO_WUNLOCK(&udbinfo);
1064         return (error);
1065 }
1066
1067 static void
1068 udp_detach(struct socket *so)
1069 {
1070         struct inpcb *inp;
1071
1072         inp = sotoinpcb(so);
1073         KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
1074         KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
1075             ("udp_detach: not disconnected"));
1076         INP_INFO_WLOCK(&udbinfo);
1077         INP_LOCK(inp);
1078         in_pcbdetach(inp);
1079         in_pcbfree(inp);
1080         INP_INFO_WUNLOCK(&udbinfo);
1081 }
1082
1083 static int
1084 udp_disconnect(struct socket *so)
1085 {
1086         struct inpcb *inp;
1087
1088         inp = sotoinpcb(so);
1089         KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
1090         INP_INFO_WLOCK(&udbinfo);
1091         INP_LOCK(inp);
1092         if (inp->inp_faddr.s_addr == INADDR_ANY) {
1093                 INP_INFO_WUNLOCK(&udbinfo);
1094                 INP_UNLOCK(inp);
1095                 return (ENOTCONN);
1096         }
1097
1098         in_pcbdisconnect(inp);
1099         inp->inp_laddr.s_addr = INADDR_ANY;
1100         SOCK_LOCK(so);
1101         so->so_state &= ~SS_ISCONNECTED;                /* XXX */
1102         SOCK_UNLOCK(so);
1103         INP_UNLOCK(inp);
1104         INP_INFO_WUNLOCK(&udbinfo);
1105         return (0);
1106 }
1107
1108 static int
1109 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1110     struct mbuf *control, struct thread *td)
1111 {
1112         struct inpcb *inp;
1113
1114         inp = sotoinpcb(so);
1115         KASSERT(inp != NULL, ("udp_send: inp == NULL"));
1116         return (udp_output(inp, m, addr, control, td));
1117 }
1118
1119 int
1120 udp_shutdown(struct socket *so)
1121 {
1122         struct inpcb *inp;
1123
1124         inp = sotoinpcb(so);
1125         KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
1126         INP_LOCK(inp);
1127         socantsendmore(so);
1128         INP_UNLOCK(inp);
1129         return (0);
1130 }
1131
1132 /*
1133  * This is the wrapper function for in_setsockaddr.  We just pass down the
1134  * pcbinfo for in_setsockaddr() to lock.  We don't want to do the locking
1135  * here because in_setsockaddr() will call malloc and might block.
1136  */
1137 static int
1138 udp_sockaddr(struct socket *so, struct sockaddr **nam)
1139 {
1140
1141         return (in_setsockaddr(so, nam, &udbinfo));
1142 }
1143
1144 /*
1145  * This is the wrapper function for in_setpeeraddr().  We just pass down the
1146  * pcbinfo for in_setpeeraddr() to lock.
1147  */
1148 static int
1149 udp_peeraddr(struct socket *so, struct sockaddr **nam)
1150 {
1151
1152         return (in_setpeeraddr(so, nam, &udbinfo));
1153 }
1154
1155 struct pr_usrreqs udp_usrreqs = {
1156         .pru_abort =            udp_abort,
1157         .pru_attach =           udp_attach,
1158         .pru_bind =             udp_bind,
1159         .pru_connect =          udp_connect,
1160         .pru_control =          in_control,
1161         .pru_detach =           udp_detach,
1162         .pru_disconnect =       udp_disconnect,
1163         .pru_peeraddr =         udp_peeraddr,
1164         .pru_send =             udp_send,
1165         .pru_sosend =           sosend_dgram,
1166         .pru_shutdown =         udp_shutdown,
1167         .pru_sockaddr =         udp_sockaddr,
1168         .pru_sosetlabel =       in_pcbsosetlabel,
1169         .pru_close =            udp_close,
1170 };