]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/udp_usrreq.c
Some whitespace nits and remove a few casts.
[FreeBSD/FreeBSD.git] / sys / netinet / udp_usrreq.c
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      @(#)udp_usrreq.c        8.6 (Berkeley) 5/23/95
30  * $FreeBSD$
31  */
32
33 #include "opt_ipfw.h"
34 #include "opt_ipsec.h"
35 #include "opt_inet6.h"
36 #include "opt_mac.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/domain.h>
41 #include <sys/eventhandler.h>
42 #include <sys/jail.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/priv.h>
48 #include <sys/proc.h>
49 #include <sys/protosw.h>
50 #include <sys/signalvar.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/sx.h>
54 #include <sys/sysctl.h>
55 #include <sys/syslog.h>
56
57 #include <vm/uma.h>
58
59 #include <net/if.h>
60 #include <net/route.h>
61
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_var.h>
66 #include <netinet/ip.h>
67 #ifdef INET6
68 #include <netinet/ip6.h>
69 #endif
70 #include <netinet/ip_icmp.h>
71 #include <netinet/icmp_var.h>
72 #include <netinet/ip_var.h>
73 #include <netinet/ip_options.h>
74 #ifdef INET6
75 #include <netinet6/ip6_var.h>
76 #endif
77 #include <netinet/udp.h>
78 #include <netinet/udp_var.h>
79
80 #ifdef FAST_IPSEC
81 #include <netipsec/ipsec.h>
82 #endif /*FAST_IPSEC*/
83
84 #ifdef IPSEC
85 #include <netinet6/ipsec.h>
86 #endif /*IPSEC*/
87
88 #include <machine/in_cksum.h>
89
90 #include <security/mac/mac_framework.h>
91
92 /*
93  * UDP protocol implementation.
94  * Per RFC 768, August, 1980.
95  */
96 #ifndef COMPAT_42
97 static int      udpcksum = 1;
98 #else
99 static int      udpcksum = 0;           /* XXX */
100 #endif
101 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
102                 &udpcksum, 0, "");
103
104 int     log_in_vain = 0;
105 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
106     &log_in_vain, 0, "Log all incoming UDP packets");
107
108 static int      blackhole = 0;
109 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
110         &blackhole, 0, "Do not send port unreachables for refused connects");
111
112 static int      strict_mcast_mship = 0;
113 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
114         &strict_mcast_mship, 0, "Only send multicast to member sockets");
115
116 struct  inpcbhead udb;          /* from udp_var.h */
117 #define udb6    udb  /* for KAME src sync over BSD*'s */
118 struct  inpcbinfo udbinfo;
119
120 #ifndef UDBHASHSIZE
121 #define UDBHASHSIZE 16
122 #endif
123
124 struct  udpstat udpstat;        /* from udp_var.h */
125 SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
126     &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
127
128 static void udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
129                 int off, struct sockaddr_in *udp_in);
130
131 static void udp_detach(struct socket *so);
132 static  int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
133                 struct mbuf *, struct thread *);
134
135 static void
136 udp_zone_change(void *tag)
137 {
138
139         uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
140 }
141
142 static int
143 udp_inpcb_init(void *mem, int size, int flags)
144 {
145         struct inpcb *inp = mem;
146
147         INP_LOCK_INIT(inp, "inp", "udpinp");
148         return (0);
149 }
150
151 void
152 udp_init()
153 {
154         INP_INFO_LOCK_INIT(&udbinfo, "udp");
155         LIST_INIT(&udb);
156         udbinfo.listhead = &udb;
157         udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
158         udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
159                                         &udbinfo.porthashmask);
160         udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL,
161             NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
162         uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
163         EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
164                 EVENTHANDLER_PRI_ANY);
165 }
166
167 void
168 udp_input(m, off)
169         register struct mbuf *m;
170         int off;
171 {
172         int iphlen = off;
173         register struct ip *ip;
174         register struct udphdr *uh;
175         register struct inpcb *inp;
176         int len;
177         struct ip save_ip;
178         struct sockaddr_in udp_in;
179 #ifdef IPFIREWALL_FORWARD
180         struct m_tag *fwd_tag;
181 #endif
182
183         udpstat.udps_ipackets++;
184
185         /*
186          * Strip IP options, if any; should skip this,
187          * make available to user, and use on returned packets,
188          * but we don't yet have a way to check the checksum
189          * with options still present.
190          */
191         if (iphlen > sizeof (struct ip)) {
192                 ip_stripoptions(m, (struct mbuf *)0);
193                 iphlen = sizeof(struct ip);
194         }
195
196         /*
197          * Get IP and UDP header together in first mbuf.
198          */
199         ip = mtod(m, struct ip *);
200         if (m->m_len < iphlen + sizeof(struct udphdr)) {
201                 if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) {
202                         udpstat.udps_hdrops++;
203                         return;
204                 }
205                 ip = mtod(m, struct ip *);
206         }
207         uh = (struct udphdr *)((caddr_t)ip + iphlen);
208
209         /* destination port of 0 is illegal, based on RFC768. */
210         if (uh->uh_dport == 0)
211                 goto badunlocked;
212
213         /*
214          * Construct sockaddr format source address.
215          * Stuff source address and datagram in user buffer.
216          */
217         bzero(&udp_in, sizeof(udp_in));
218         udp_in.sin_len = sizeof(udp_in);
219         udp_in.sin_family = AF_INET;
220         udp_in.sin_port = uh->uh_sport;
221         udp_in.sin_addr = ip->ip_src;
222
223         /*
224          * Make mbuf data length reflect UDP length.
225          * If not enough data to reflect UDP length, drop.
226          */
227         len = ntohs((u_short)uh->uh_ulen);
228         if (ip->ip_len != len) {
229                 if (len > ip->ip_len || len < sizeof(struct udphdr)) {
230                         udpstat.udps_badlen++;
231                         goto badunlocked;
232                 }
233                 m_adj(m, len - ip->ip_len);
234                 /* ip->ip_len = len; */
235         }
236         /*
237          * Save a copy of the IP header in case we want restore it
238          * for sending an ICMP error message in response.
239          */
240         if (!blackhole)
241                 save_ip = *ip;
242
243         /*
244          * Checksum extended UDP header and data.
245          */
246         if (uh->uh_sum) {
247                 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
248                         if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
249                                 uh->uh_sum = m->m_pkthdr.csum_data;
250                         else
251                                 uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
252                                     ip->ip_dst.s_addr, htonl((u_short)len +
253                                     m->m_pkthdr.csum_data + IPPROTO_UDP));
254                         uh->uh_sum ^= 0xffff;
255                 } else {
256                         char b[9];
257                         bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
258                         bzero(((struct ipovly *)ip)->ih_x1, 9);
259                         ((struct ipovly *)ip)->ih_len = uh->uh_ulen;
260                         uh->uh_sum = in_cksum(m, len + sizeof (struct ip));
261                         bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
262                 }
263                 if (uh->uh_sum) {
264                         udpstat.udps_badsum++;
265                         m_freem(m);
266                         return;
267                 }
268         } else
269                 udpstat.udps_nosum++;
270
271 #ifdef IPFIREWALL_FORWARD
272         /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
273         fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
274
275         if (fwd_tag != NULL) {
276                 struct sockaddr_in *next_hop;
277
278                 /* Do the hack. */
279                 next_hop = (struct sockaddr_in *)(fwd_tag + 1);
280                 ip->ip_dst = next_hop->sin_addr;
281                 uh->uh_dport = ntohs(next_hop->sin_port);
282                 /* Remove the tag from the packet.  We don't need it anymore. */
283                 m_tag_delete(m, fwd_tag);
284         }
285 #endif
286
287         INP_INFO_RLOCK(&udbinfo);
288
289         if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
290             in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
291                 struct inpcb *last;
292                 /*
293                  * Deliver a multicast or broadcast datagram to *all* sockets
294                  * for which the local and remote addresses and ports match
295                  * those of the incoming datagram.  This allows more than
296                  * one process to receive multi/broadcasts on the same port.
297                  * (This really ought to be done for unicast datagrams as
298                  * well, but that would cause problems with existing
299                  * applications that open both address-specific sockets and
300                  * a wildcard socket listening to the same port -- they would
301                  * end up receiving duplicates of every unicast datagram.
302                  * Those applications open the multiple sockets to overcome an
303                  * inadequacy of the UDP socket interface, but for backwards
304                  * compatibility we avoid the problem here rather than
305                  * fixing the interface.  Maybe 4.5BSD will remedy this?)
306                  */
307
308                 /*
309                  * Locate pcb(s) for datagram.
310                  * (Algorithm copied from raw_intr().)
311                  */
312                 last = NULL;
313                 LIST_FOREACH(inp, &udb, inp_list) {
314                         if (inp->inp_lport != uh->uh_dport)
315                                 continue;
316 #ifdef INET6
317                         if ((inp->inp_vflag & INP_IPV4) == 0)
318                                 continue;
319 #endif
320                         if (inp->inp_laddr.s_addr != INADDR_ANY) {
321                                 if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
322                                         continue;
323                         }
324                         if (inp->inp_faddr.s_addr != INADDR_ANY) {
325                                 if (inp->inp_faddr.s_addr !=
326                                     ip->ip_src.s_addr ||
327                                     inp->inp_fport != uh->uh_sport)
328                                         continue;
329                         }
330                         INP_LOCK(inp);
331
332                         /*
333                          * Check multicast packets to make sure they are only
334                          * sent to sockets with multicast memberships for the
335                          * packet's destination address and arrival interface
336                          */
337 #define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)])
338 #define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships)
339                         if (strict_mcast_mship && inp->inp_moptions != NULL) {
340                                 int mship, foundmship = 0;
341
342                                 for (mship = 0; mship < NMSHIPS(inp); mship++) {
343                                         if (MSHIP(inp, mship)->inm_addr.s_addr
344                                             == ip->ip_dst.s_addr &&
345                                             MSHIP(inp, mship)->inm_ifp
346                                             == m->m_pkthdr.rcvif) {
347                                                 foundmship = 1;
348                                                 break;
349                                         }
350                                 }
351                                 if (foundmship == 0) {
352                                         INP_UNLOCK(inp);
353                                         continue;
354                                 }
355                         }
356 #undef NMSHIPS
357 #undef MSHIP
358                         if (last != NULL) {
359                                 struct mbuf *n;
360
361                                 n = m_copy(m, 0, M_COPYALL);
362                                 if (n != NULL)
363                                         udp_append(last, ip, n,
364                                                    iphlen +
365                                                    sizeof(struct udphdr),
366                                                    &udp_in);
367                                 INP_UNLOCK(last);
368                         }
369                         last = inp;
370                         /*
371                          * Don't look for additional matches if this one does
372                          * not have either the SO_REUSEPORT or SO_REUSEADDR
373                          * socket options set.  This heuristic avoids searching
374                          * through all pcbs in the common case of a non-shared
375                          * port.  It assumes that an application will never
376                          * clear these options after setting them.
377                          */
378                         if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0)
379                                 break;
380                 }
381
382                 if (last == NULL) {
383                         /*
384                          * No matching pcb found; discard datagram.
385                          * (No need to send an ICMP Port Unreachable
386                          * for a broadcast or multicast datgram.)
387                          */
388                         udpstat.udps_noportbcast++;
389                         goto badheadlocked;
390                 }
391                 udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
392                     &udp_in);
393                 INP_UNLOCK(last);
394                 INP_INFO_RUNLOCK(&udbinfo);
395                 return;
396         }
397         /*
398          * Locate pcb for datagram.
399          */
400         inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
401             ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
402         if (inp == NULL) {
403                 if (log_in_vain) {
404                         char buf[4*sizeof "123"];
405
406                         strcpy(buf, inet_ntoa(ip->ip_dst));
407                         log(LOG_INFO,
408                             "Connection attempt to UDP %s:%d from %s:%d\n",
409                             buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
410                             ntohs(uh->uh_sport));
411                 }
412                 udpstat.udps_noport++;
413                 if (m->m_flags & (M_BCAST | M_MCAST)) {
414                         udpstat.udps_noportbcast++;
415                         goto badheadlocked;
416                 }
417                 if (blackhole)
418                         goto badheadlocked;
419                 if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
420                         goto badheadlocked;
421                 *ip = save_ip;
422                 ip->ip_len += iphlen;
423                 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
424                 INP_INFO_RUNLOCK(&udbinfo);
425                 return;
426         }
427         INP_LOCK(inp);
428         /* Check the minimum TTL for socket. */
429         if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
430                 goto badheadlocked;
431         udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in);
432         INP_UNLOCK(inp);
433         INP_INFO_RUNLOCK(&udbinfo);
434         return;
435
436 badheadlocked:
437         if (inp)
438                 INP_UNLOCK(inp);
439         INP_INFO_RUNLOCK(&udbinfo);
440 badunlocked:
441         m_freem(m);
442         return;
443 }
444
445 /*
446  * Subroutine of udp_input(), which appends the provided mbuf chain to the
447  * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
448  * contains the source address.  If the socket ends up being an IPv6 socket,
449  * udp_append() will convert to a sockaddr_in6 before passing the address
450  * into the socket code.
451  */
452 static void
453 udp_append(inp, ip, n, off, udp_in)
454         struct inpcb *inp;
455         struct ip *ip;
456         struct mbuf *n;
457         int off;
458         struct sockaddr_in *udp_in;
459 {
460         struct sockaddr *append_sa;
461         struct socket *so;
462         struct mbuf *opts = 0;
463 #ifdef INET6
464         struct sockaddr_in6 udp_in6;
465 #endif
466
467         INP_LOCK_ASSERT(inp);
468
469 #if defined(IPSEC) || defined(FAST_IPSEC)
470         /* check AH/ESP integrity. */
471         if (ipsec4_in_reject(n, inp)) {
472 #ifdef IPSEC
473                 ipsecstat.in_polvio++;
474 #endif /*IPSEC*/
475                 m_freem(n);
476                 return;
477         }
478 #endif /*IPSEC || FAST_IPSEC*/
479 #ifdef MAC
480         if (mac_check_inpcb_deliver(inp, n) != 0) {
481                 m_freem(n);
482                 return;
483         }
484 #endif
485         if (inp->inp_flags & INP_CONTROLOPTS ||
486             inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
487 #ifdef INET6
488                 if (inp->inp_vflag & INP_IPV6) {
489                         int savedflags;
490
491                         savedflags = inp->inp_flags;
492                         inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
493                         ip6_savecontrol(inp, n, &opts);
494                         inp->inp_flags = savedflags;
495                 } else
496 #endif
497                 ip_savecontrol(inp, &opts, ip, n);
498         }
499 #ifdef INET6
500         if (inp->inp_vflag & INP_IPV6) {
501                 bzero(&udp_in6, sizeof(udp_in6));
502                 udp_in6.sin6_len = sizeof(udp_in6);
503                 udp_in6.sin6_family = AF_INET6;
504                 in6_sin_2_v4mapsin6(udp_in, &udp_in6);
505                 append_sa = (struct sockaddr *)&udp_in6;
506         } else
507 #endif
508         append_sa = (struct sockaddr *)udp_in;
509         m_adj(n, off);
510
511         so = inp->inp_socket;
512         SOCKBUF_LOCK(&so->so_rcv);
513         if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
514                 m_freem(n);
515                 if (opts)
516                         m_freem(opts);
517                 udpstat.udps_fullsock++;
518                 SOCKBUF_UNLOCK(&so->so_rcv);
519         } else
520                 sorwakeup_locked(so);
521 }
522
523 /*
524  * Notify a udp user of an asynchronous error;
525  * just wake up so that he can collect error status.
526  */
527 struct inpcb *
528 udp_notify(inp, errno)
529         register struct inpcb *inp;
530         int errno;
531 {
532         inp->inp_socket->so_error = errno;
533         sorwakeup(inp->inp_socket);
534         sowwakeup(inp->inp_socket);
535         return inp;
536 }
537
538 void
539 udp_ctlinput(cmd, sa, vip)
540         int cmd;
541         struct sockaddr *sa;
542         void *vip;
543 {
544         struct ip *ip = vip;
545         struct udphdr *uh;
546         struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
547         struct in_addr faddr;
548         struct inpcb *inp;
549
550         faddr = ((struct sockaddr_in *)sa)->sin_addr;
551         if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
552                 return;
553
554         /*
555          * Redirects don't need to be handled up here.
556          */
557         if (PRC_IS_REDIRECT(cmd))
558                 return;
559         /*
560          * Hostdead is ugly because it goes linearly through all PCBs.
561          * XXX: We never get this from ICMP, otherwise it makes an
562          * excellent DoS attack on machines with many connections.
563          */
564         if (cmd == PRC_HOSTDEAD)
565                 ip = 0;
566         else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
567                 return;
568         if (ip) {
569                 uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
570                 INP_INFO_RLOCK(&udbinfo);
571                 inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
572                     ip->ip_src, uh->uh_sport, 0, NULL);
573                 if (inp != NULL) {
574                         INP_LOCK(inp);
575                         if (inp->inp_socket != NULL) {
576                                 (*notify)(inp, inetctlerrmap[cmd]);
577                         }
578                         INP_UNLOCK(inp);
579                 }
580                 INP_INFO_RUNLOCK(&udbinfo);
581         } else
582                 in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify);
583 }
584
585 static int
586 udp_pcblist(SYSCTL_HANDLER_ARGS)
587 {
588         int error, i, n;
589         struct inpcb *inp, **inp_list;
590         inp_gen_t gencnt;
591         struct xinpgen xig;
592
593         /*
594          * The process of preparing the TCB list is too time-consuming and
595          * resource-intensive to repeat twice on every request.
596          */
597         if (req->oldptr == 0) {
598                 n = udbinfo.ipi_count;
599                 req->oldidx = 2 * (sizeof xig)
600                         + (n + n/8) * sizeof(struct xinpcb);
601                 return 0;
602         }
603
604         if (req->newptr != 0)
605                 return EPERM;
606
607         /*
608          * OK, now we're committed to doing something.
609          */
610         INP_INFO_RLOCK(&udbinfo);
611         gencnt = udbinfo.ipi_gencnt;
612         n = udbinfo.ipi_count;
613         INP_INFO_RUNLOCK(&udbinfo);
614
615         error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
616                 + n * sizeof(struct xinpcb));
617         if (error != 0)
618                 return (error);
619
620         xig.xig_len = sizeof xig;
621         xig.xig_count = n;
622         xig.xig_gen = gencnt;
623         xig.xig_sogen = so_gencnt;
624         error = SYSCTL_OUT(req, &xig, sizeof xig);
625         if (error)
626                 return error;
627
628         inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
629         if (inp_list == 0)
630                 return ENOMEM;
631
632         INP_INFO_RLOCK(&udbinfo);
633         for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n;
634              inp = LIST_NEXT(inp, inp_list)) {
635                 INP_LOCK(inp);
636                 if (inp->inp_gencnt <= gencnt &&
637                     cr_canseesocket(req->td->td_ucred, inp->inp_socket) == 0)
638                         inp_list[i++] = inp;
639                 INP_UNLOCK(inp);
640         }
641         INP_INFO_RUNLOCK(&udbinfo);
642         n = i;
643
644         error = 0;
645         for (i = 0; i < n; i++) {
646                 inp = inp_list[i];
647                 INP_LOCK(inp);
648                 if (inp->inp_gencnt <= gencnt) {
649                         struct xinpcb xi;
650                         bzero(&xi, sizeof(xi));
651                         xi.xi_len = sizeof xi;
652                         /* XXX should avoid extra copy */
653                         bcopy(inp, &xi.xi_inp, sizeof *inp);
654                         if (inp->inp_socket)
655                                 sotoxsocket(inp->inp_socket, &xi.xi_socket);
656                         xi.xi_inp.inp_gencnt = inp->inp_gencnt;
657                         INP_UNLOCK(inp);
658                         error = SYSCTL_OUT(req, &xi, sizeof xi);
659                 } else
660                         INP_UNLOCK(inp);        
661         }
662         if (!error) {
663                 /*
664                  * Give the user an updated idea of our state.
665                  * If the generation differs from what we told
666                  * her before, she knows that something happened
667                  * while we were processing this request, and it
668                  * might be necessary to retry.
669                  */
670                 INP_INFO_RLOCK(&udbinfo);
671                 xig.xig_gen = udbinfo.ipi_gencnt;
672                 xig.xig_sogen = so_gencnt;
673                 xig.xig_count = udbinfo.ipi_count;
674                 INP_INFO_RUNLOCK(&udbinfo);
675                 error = SYSCTL_OUT(req, &xig, sizeof xig);
676         }
677         free(inp_list, M_TEMP);
678         return error;
679 }
680
681 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
682             udp_pcblist, "S,xinpcb", "List of active UDP sockets");
683
684 static int
685 udp_getcred(SYSCTL_HANDLER_ARGS)
686 {
687         struct xucred xuc;
688         struct sockaddr_in addrs[2];
689         struct inpcb *inp;
690         int error;
691
692         error = priv_check_cred(req->td->td_ucred, PRIV_NETINET_GETCRED,
693             SUSER_ALLOWJAIL);
694         if (error)
695                 return (error);
696         error = SYSCTL_IN(req, addrs, sizeof(addrs));
697         if (error)
698                 return (error);
699         INP_INFO_RLOCK(&udbinfo);
700         inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
701                                 addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
702         if (inp == NULL || inp->inp_socket == NULL) {
703                 error = ENOENT;
704                 goto out;
705         }
706         error = cr_canseesocket(req->td->td_ucred, inp->inp_socket);
707         if (error)
708                 goto out;
709         cru2x(inp->inp_socket->so_cred, &xuc);
710 out:
711         INP_INFO_RUNLOCK(&udbinfo);
712         if (error == 0)
713                 error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
714         return (error);
715 }
716
717 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
718     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
719     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
720
721 static int
722 udp_output(inp, m, addr, control, td)
723         register struct inpcb *inp;
724         struct mbuf *m;
725         struct sockaddr *addr;
726         struct mbuf *control;
727         struct thread *td;
728 {
729         register struct udpiphdr *ui;
730         register int len = m->m_pkthdr.len;
731         struct in_addr faddr, laddr;
732         struct cmsghdr *cm;
733         struct sockaddr_in *sin, src;
734         int error = 0;
735         int ipflags;
736         u_short fport, lport;
737         int unlock_udbinfo;
738
739         /*
740          * udp_output() may need to temporarily bind or connect the current
741          * inpcb.  As such, we don't know up front what inpcb locks we will
742          * need.  Do any work to decide what is needed up front before
743          * acquiring locks.
744          */
745         if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
746                 if (control)
747                         m_freem(control);
748                 m_freem(m);
749                 return EMSGSIZE;
750         }
751
752         src.sin_addr.s_addr = INADDR_ANY;
753         if (control != NULL) {
754                 /*
755                  * XXX: Currently, we assume all the optional information
756                  * is stored in a single mbuf.
757                  */
758                 if (control->m_next) {
759                         m_freem(control);
760                         m_freem(m);
761                         return EINVAL;
762                 }
763                 for (; control->m_len > 0;
764                     control->m_data += CMSG_ALIGN(cm->cmsg_len),
765                     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
766                         cm = mtod(control, struct cmsghdr *);
767                         if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 ||
768                             cm->cmsg_len > control->m_len) {
769                                 error = EINVAL;
770                                 break;
771                         }
772                         if (cm->cmsg_level != IPPROTO_IP)
773                                 continue;
774
775                         switch (cm->cmsg_type) {
776                         case IP_SENDSRCADDR:
777                                 if (cm->cmsg_len !=
778                                     CMSG_LEN(sizeof(struct in_addr))) {
779                                         error = EINVAL;
780                                         break;
781                                 }
782                                 bzero(&src, sizeof(src));
783                                 src.sin_family = AF_INET;
784                                 src.sin_len = sizeof(src);
785                                 src.sin_port = inp->inp_lport;
786                                 src.sin_addr = *(struct in_addr *)CMSG_DATA(cm);
787                                 break;
788                         default:
789                                 error = ENOPROTOOPT;
790                                 break;
791                         }
792                         if (error)
793                                 break;
794                 }
795                 m_freem(control);
796         }
797         if (error) {
798                 m_freem(m);
799                 return error;
800         }
801
802         if (src.sin_addr.s_addr != INADDR_ANY ||
803             addr != NULL) {
804                 INP_INFO_WLOCK(&udbinfo);
805                 unlock_udbinfo = 1;
806         } else
807                 unlock_udbinfo = 0;
808         INP_LOCK(inp);
809
810 #ifdef MAC
811         mac_create_mbuf_from_inpcb(inp, m);
812 #endif
813
814         laddr = inp->inp_laddr;
815         lport = inp->inp_lport;
816         if (src.sin_addr.s_addr != INADDR_ANY) {
817                 if (lport == 0) {
818                         error = EINVAL;
819                         goto release;
820                 }
821                 error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
822                     &laddr.s_addr, &lport, td->td_ucred);
823                 if (error)
824                         goto release;
825         }
826
827         if (addr) {
828                 sin = (struct sockaddr_in *)addr;
829                 if (jailed(td->td_ucred))
830                         prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
831                 if (inp->inp_faddr.s_addr != INADDR_ANY) {
832                         error = EISCONN;
833                         goto release;
834                 }
835                 error = in_pcbconnect_setup(inp, addr, &laddr.s_addr, &lport,
836                     &faddr.s_addr, &fport, NULL, td->td_ucred);
837                 if (error)
838                         goto release;
839
840                 /* Commit the local port if newly assigned. */
841                 if (inp->inp_laddr.s_addr == INADDR_ANY &&
842                     inp->inp_lport == 0) {
843                         /*
844                          * Remember addr if jailed, to prevent rebinding.
845                          */
846                         if (jailed(td->td_ucred))
847                                 inp->inp_laddr = laddr;
848                         inp->inp_lport = lport;
849                         if (in_pcbinshash(inp) != 0) {
850                                 inp->inp_lport = 0;
851                                 error = EAGAIN;
852                                 goto release;
853                         }
854                         inp->inp_flags |= INP_ANONPORT;
855                 }
856         } else {
857                 faddr = inp->inp_faddr;
858                 fport = inp->inp_fport;
859                 if (faddr.s_addr == INADDR_ANY) {
860                         error = ENOTCONN;
861                         goto release;
862                 }
863         }
864
865         /*
866          * Calculate data length and get a mbuf for UDP, IP, and possible
867          * link-layer headers.  Immediate slide the data pointer back forward
868          * since we won't use that space at this layer.
869          */
870         M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_DONTWAIT);
871         if (m == NULL) {
872                 error = ENOBUFS;
873                 goto release;
874         }
875         m->m_data += max_linkhdr;
876         m->m_len -= max_linkhdr;
877         m->m_pkthdr.len -= max_linkhdr;
878
879         /*
880          * Fill in mbuf with extended UDP header
881          * and addresses and length put into network format.
882          */
883         ui = mtod(m, struct udpiphdr *);
884         bzero(ui->ui_x1, sizeof(ui->ui_x1));    /* XXX still needed? */
885         ui->ui_pr = IPPROTO_UDP;
886         ui->ui_src = laddr;
887         ui->ui_dst = faddr;
888         ui->ui_sport = lport;
889         ui->ui_dport = fport;
890         ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
891
892         /*
893          * Set the Don't Fragment bit in the IP header.
894          */
895         if (inp->inp_flags & INP_DONTFRAG) {
896                 struct ip *ip;
897                 ip = (struct ip *)&ui->ui_i;
898                 ip->ip_off |= IP_DF;
899         }
900
901         ipflags = 0;
902         if (inp->inp_socket->so_options & SO_DONTROUTE)
903                 ipflags |= IP_ROUTETOIF;
904         if (inp->inp_socket->so_options & SO_BROADCAST)
905                 ipflags |= IP_ALLOWBROADCAST;
906         if (inp->inp_flags & INP_ONESBCAST)
907                 ipflags |= IP_SENDONES;
908
909         /*
910          * Set up checksum and output datagram.
911          */
912         if (udpcksum) {
913                 if (inp->inp_flags & INP_ONESBCAST)
914                         faddr.s_addr = INADDR_BROADCAST;
915                 ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
916                     htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
917                 m->m_pkthdr.csum_flags = CSUM_UDP;
918                 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
919         } else {
920                 ui->ui_sum = 0;
921         }
922         ((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
923         ((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;    /* XXX */
924         ((struct ip *)ui)->ip_tos = inp->inp_ip_tos;    /* XXX */
925         udpstat.udps_opackets++;
926
927         if (unlock_udbinfo)
928                 INP_INFO_WUNLOCK(&udbinfo);
929         error = ip_output(m, inp->inp_options, NULL, ipflags,
930             inp->inp_moptions, inp);
931         INP_UNLOCK(inp);
932         return (error);
933
934 release:
935         INP_UNLOCK(inp);
936         if (unlock_udbinfo)
937                 INP_INFO_WUNLOCK(&udbinfo);
938         m_freem(m);
939         return (error);
940 }
941
942 u_long  udp_sendspace = 9216;           /* really max datagram size */
943                                         /* 40 1K datagrams */
944 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
945     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
946
947 u_long  udp_recvspace = 40 * (1024 +
948 #ifdef INET6
949                                       sizeof(struct sockaddr_in6)
950 #else
951                                       sizeof(struct sockaddr_in)
952 #endif
953                                       );
954 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
955     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
956
957 static void
958 udp_abort(struct socket *so)
959 {
960         struct inpcb *inp;
961
962         inp = sotoinpcb(so);
963         KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
964         INP_INFO_WLOCK(&udbinfo);
965         INP_LOCK(inp);
966         if (inp->inp_faddr.s_addr != INADDR_ANY) {
967                 in_pcbdisconnect(inp);
968                 inp->inp_laddr.s_addr = INADDR_ANY;
969                 soisdisconnected(so);
970         }
971         INP_UNLOCK(inp);
972         INP_INFO_WUNLOCK(&udbinfo);
973 }
974
975 static int
976 udp_attach(struct socket *so, int proto, struct thread *td)
977 {
978         struct inpcb *inp;
979         int error;
980
981         inp = sotoinpcb(so);
982         KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
983         error = soreserve(so, udp_sendspace, udp_recvspace);
984         if (error)
985                 return error;
986         INP_INFO_WLOCK(&udbinfo);
987         error = in_pcballoc(so, &udbinfo);
988         if (error) {
989                 INP_INFO_WUNLOCK(&udbinfo);
990                 return error;
991         }
992
993         inp = (struct inpcb *)so->so_pcb;
994         INP_INFO_WUNLOCK(&udbinfo);
995         inp->inp_vflag |= INP_IPV4;
996         inp->inp_ip_ttl = ip_defttl;
997         INP_UNLOCK(inp);
998         return 0;
999 }
1000
1001 static int
1002 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1003 {
1004         struct inpcb *inp;
1005         int error;
1006
1007         inp = sotoinpcb(so);
1008         KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
1009         INP_INFO_WLOCK(&udbinfo);
1010         INP_LOCK(inp);
1011         error = in_pcbbind(inp, nam, td->td_ucred);
1012         INP_UNLOCK(inp);
1013         INP_INFO_WUNLOCK(&udbinfo);
1014         return error;
1015 }
1016
1017 static void
1018 udp_close(struct socket *so)
1019 {
1020         struct inpcb *inp;
1021
1022         inp = sotoinpcb(so);
1023         KASSERT(inp != NULL, ("udp_close: inp == NULL"));
1024         INP_INFO_WLOCK(&udbinfo);
1025         INP_LOCK(inp);
1026         if (inp->inp_faddr.s_addr != INADDR_ANY) {
1027                 in_pcbdisconnect(inp);
1028                 inp->inp_laddr.s_addr = INADDR_ANY;
1029                 soisdisconnected(so);
1030         }
1031         INP_UNLOCK(inp);
1032         INP_INFO_WUNLOCK(&udbinfo);
1033 }
1034
1035 static int
1036 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1037 {
1038         struct inpcb *inp;
1039         int error;
1040         struct sockaddr_in *sin;
1041
1042         inp = sotoinpcb(so);
1043         KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
1044         INP_INFO_WLOCK(&udbinfo);
1045         INP_LOCK(inp);
1046         if (inp->inp_faddr.s_addr != INADDR_ANY) {
1047                 INP_UNLOCK(inp);
1048                 INP_INFO_WUNLOCK(&udbinfo);
1049                 return EISCONN;
1050         }
1051         sin = (struct sockaddr_in *)nam;
1052         if (jailed(td->td_ucred))
1053                 prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
1054         error = in_pcbconnect(inp, nam, td->td_ucred);
1055         if (error == 0)
1056                 soisconnected(so);
1057         INP_UNLOCK(inp);
1058         INP_INFO_WUNLOCK(&udbinfo);
1059         return error;
1060 }
1061
1062 static void
1063 udp_detach(struct socket *so)
1064 {
1065         struct inpcb *inp;
1066
1067         inp = sotoinpcb(so);
1068         KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
1069         KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
1070             ("udp_detach: not disconnected"));
1071         INP_INFO_WLOCK(&udbinfo);
1072         INP_LOCK(inp);
1073         in_pcbdetach(inp);
1074         in_pcbfree(inp);
1075         INP_INFO_WUNLOCK(&udbinfo);
1076 }
1077
1078 static int
1079 udp_disconnect(struct socket *so)
1080 {
1081         struct inpcb *inp;
1082
1083         inp = sotoinpcb(so);
1084         KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
1085         INP_INFO_WLOCK(&udbinfo);
1086         INP_LOCK(inp);
1087         if (inp->inp_faddr.s_addr == INADDR_ANY) {
1088                 INP_INFO_WUNLOCK(&udbinfo);
1089                 INP_UNLOCK(inp);
1090                 return ENOTCONN;
1091         }
1092
1093         in_pcbdisconnect(inp);
1094         inp->inp_laddr.s_addr = INADDR_ANY;
1095         SOCK_LOCK(so);
1096         so->so_state &= ~SS_ISCONNECTED;                /* XXX */
1097         SOCK_UNLOCK(so);
1098         INP_UNLOCK(inp);
1099         INP_INFO_WUNLOCK(&udbinfo);
1100         return 0;
1101 }
1102
1103 static int
1104 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1105             struct mbuf *control, struct thread *td)
1106 {
1107         struct inpcb *inp;
1108
1109         inp = sotoinpcb(so);
1110         KASSERT(inp != NULL, ("udp_send: inp == NULL"));
1111         return udp_output(inp, m, addr, control, td);
1112 }
1113
1114 int
1115 udp_shutdown(struct socket *so)
1116 {
1117         struct inpcb *inp;
1118
1119         inp = sotoinpcb(so);
1120         KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
1121         INP_LOCK(inp);
1122         socantsendmore(so);
1123         INP_UNLOCK(inp);
1124         return 0;
1125 }
1126
1127 /*
1128  * This is the wrapper function for in_setsockaddr.  We just pass down
1129  * the pcbinfo for in_setsockaddr to lock.  We don't want to do the locking
1130  * here because in_setsockaddr will call malloc and might block.
1131  */
1132 static int
1133 udp_sockaddr(struct socket *so, struct sockaddr **nam)
1134 {
1135         return (in_setsockaddr(so, nam, &udbinfo));
1136 }
1137
1138 /*
1139  * This is the wrapper function for in_setpeeraddr.  We just pass down
1140  * the pcbinfo for in_setpeeraddr to lock.
1141  */
1142 static int
1143 udp_peeraddr(struct socket *so, struct sockaddr **nam)
1144 {
1145         return (in_setpeeraddr(so, nam, &udbinfo));
1146 }
1147
1148 struct pr_usrreqs udp_usrreqs = {
1149         .pru_abort =            udp_abort,
1150         .pru_attach =           udp_attach,
1151         .pru_bind =             udp_bind,
1152         .pru_connect =          udp_connect,
1153         .pru_control =          in_control,
1154         .pru_detach =           udp_detach,
1155         .pru_disconnect =       udp_disconnect,
1156         .pru_peeraddr =         udp_peeraddr,
1157         .pru_send =             udp_send,
1158         .pru_sosend =           sosend_dgram,
1159         .pru_shutdown =         udp_shutdown,
1160         .pru_sockaddr =         udp_sockaddr,
1161         .pru_sosetlabel =       in_pcbsosetlabel,
1162         .pru_close =            udp_close,
1163 };