]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - contrib/unbound/services/listen_dnsport.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / contrib / unbound / services / listen_dnsport.c
1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include "services/listen_dnsport.h"
47 #include "services/outside_network.h"
48 #include "util/netevent.h"
49 #include "util/log.h"
50 #include "util/config_file.h"
51 #include "util/net_help.h"
52 #include "ldns/sbuffer.h"
53
54 #ifdef HAVE_NETDB_H
55 #include <netdb.h>
56 #endif
57 #include <fcntl.h>
58
59 #ifdef HAVE_SYS_UN_H
60 #include <sys/un.h>
61 #endif
62
63 /** number of queued TCP connections for listen() */
64 #define TCP_BACKLOG 256 
65
66 /**
67  * Debug print of the getaddrinfo returned address.
68  * @param addr: the address returned.
69  */
70 static void
71 verbose_print_addr(struct addrinfo *addr)
72 {
73         if(verbosity >= VERB_ALGO) {
74                 char buf[100];
75                 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
76 #ifdef INET6
77                 if(addr->ai_family == AF_INET6)
78                         sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
79                                 sin6_addr;
80 #endif /* INET6 */
81                 if(inet_ntop(addr->ai_family, sinaddr, buf,
82                         (socklen_t)sizeof(buf)) == 0) {
83                         (void)strlcpy(buf, "(null)", sizeof(buf));
84                 }
85                 buf[sizeof(buf)-1] = 0;
86                 verbose(VERB_ALGO, "creating %s%s socket %s %d", 
87                         addr->ai_socktype==SOCK_DGRAM?"udp":
88                         addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
89                         addr->ai_family==AF_INET?"4":
90                         addr->ai_family==AF_INET6?"6":
91                         "_otherfam", buf, 
92                         ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
93         }
94 }
95
96 int
97 create_udp_sock(int family, int socktype, struct sockaddr* addr,
98         socklen_t addrlen, int v6only, int* inuse, int* noproto,
99         int rcv, int snd, int listen, int* reuseport)
100 {
101         int s;
102 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)
103         int on=1;
104 #endif
105 #ifdef IPV6_MTU
106         int mtu = IPV6_MIN_MTU;
107 #endif
108 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
109         (void)rcv;
110 #endif
111 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
112         (void)snd;
113 #endif
114 #ifndef IPV6_V6ONLY
115         (void)v6only;
116 #endif
117         if((s = socket(family, socktype, 0)) == -1) {
118                 *inuse = 0;
119 #ifndef USE_WINSOCK
120                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
121                         *noproto = 1;
122                         return -1;
123                 }
124                 log_err("can't create socket: %s", strerror(errno));
125 #else
126                 if(WSAGetLastError() == WSAEAFNOSUPPORT || 
127                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
128                         *noproto = 1;
129                         return -1;
130                 }
131                 log_err("can't create socket: %s", 
132                         wsa_strerror(WSAGetLastError()));
133 #endif
134                 *noproto = 0;
135                 return -1;
136         }
137         if(listen) {
138 #ifdef SO_REUSEADDR
139                 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
140                         (socklen_t)sizeof(on)) < 0) {
141 #ifndef USE_WINSOCK
142                         log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
143                                 strerror(errno));
144                         if(errno != ENOSYS) {
145                                 close(s);
146                                 *noproto = 0;
147                                 *inuse = 0;
148                                 return -1;
149                         }
150 #else
151                         log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
152                                 wsa_strerror(WSAGetLastError()));
153                         closesocket(s);
154                         *noproto = 0;
155                         *inuse = 0;
156                         return -1;
157 #endif
158                 }
159 #endif /* SO_REUSEADDR */
160 #ifdef SO_REUSEPORT
161                 /* try to set SO_REUSEPORT so that incoming
162                  * queries are distributed evenly among the receiving threads.
163                  * Each thread must have its own socket bound to the same port,
164                  * with SO_REUSEPORT set on each socket.
165                  */
166                 if (reuseport && *reuseport &&
167                     setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
168                         (socklen_t)sizeof(on)) < 0) {
169 #ifdef ENOPROTOOPT
170                         if(errno != ENOPROTOOPT || verbosity >= 3)
171                                 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
172                                         strerror(errno));
173 #endif
174                         /* this option is not essential, we can continue */
175                         *reuseport = 0;
176                 }
177 #else
178                 (void)reuseport;
179 #endif /* defined(SO_REUSEPORT) */
180         }
181         if(rcv) {
182 #ifdef SO_RCVBUF
183                 int got;
184                 socklen_t slen = (socklen_t)sizeof(got);
185 #  ifdef SO_RCVBUFFORCE
186                 /* Linux specific: try to use root permission to override
187                  * system limits on rcvbuf. The limit is stored in 
188                  * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
189                 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 
190                         (socklen_t)sizeof(rcv)) < 0) {
191                         if(errno != EPERM) {
192 #    ifndef USE_WINSOCK
193                                 log_err("setsockopt(..., SO_RCVBUFFORCE, "
194                                         "...) failed: %s", strerror(errno));
195                                 close(s);
196 #    else
197                                 log_err("setsockopt(..., SO_RCVBUFFORCE, "
198                                         "...) failed: %s", 
199                                         wsa_strerror(WSAGetLastError()));
200                                 closesocket(s);
201 #    endif
202                                 *noproto = 0;
203                                 *inuse = 0;
204                                 return -1;
205                         }
206 #  endif /* SO_RCVBUFFORCE */
207                         if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 
208                                 (socklen_t)sizeof(rcv)) < 0) {
209 #  ifndef USE_WINSOCK
210                                 log_err("setsockopt(..., SO_RCVBUF, "
211                                         "...) failed: %s", strerror(errno));
212                                 close(s);
213 #  else
214                                 log_err("setsockopt(..., SO_RCVBUF, "
215                                         "...) failed: %s", 
216                                         wsa_strerror(WSAGetLastError()));
217                                 closesocket(s);
218 #  endif
219                                 *noproto = 0;
220                                 *inuse = 0;
221                                 return -1;
222                         }
223                         /* check if we got the right thing or if system
224                          * reduced to some system max.  Warn if so */
225                         if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 
226                                 &slen) >= 0 && got < rcv/2) {
227                                 log_warn("so-rcvbuf %u was not granted. "
228                                         "Got %u. To fix: start with "
229                                         "root permissions(linux) or sysctl "
230                                         "bigger net.core.rmem_max(linux) or "
231                                         "kern.ipc.maxsockbuf(bsd) values.",
232                                         (unsigned)rcv, (unsigned)got);
233                         }
234 #  ifdef SO_RCVBUFFORCE
235                 }
236 #  endif
237 #endif /* SO_RCVBUF */
238         }
239         /* first do RCVBUF as the receive buffer is more important */
240         if(snd) {
241 #ifdef SO_SNDBUF
242                 int got;
243                 socklen_t slen = (socklen_t)sizeof(got);
244 #  ifdef SO_SNDBUFFORCE
245                 /* Linux specific: try to use root permission to override
246                  * system limits on sndbuf. The limit is stored in 
247                  * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
248                 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 
249                         (socklen_t)sizeof(snd)) < 0) {
250                         if(errno != EPERM) {
251 #    ifndef USE_WINSOCK
252                                 log_err("setsockopt(..., SO_SNDBUFFORCE, "
253                                         "...) failed: %s", strerror(errno));
254                                 close(s);
255 #    else
256                                 log_err("setsockopt(..., SO_SNDBUFFORCE, "
257                                         "...) failed: %s", 
258                                         wsa_strerror(WSAGetLastError()));
259                                 closesocket(s);
260 #    endif
261                                 *noproto = 0;
262                                 *inuse = 0;
263                                 return -1;
264                         }
265 #  endif /* SO_SNDBUFFORCE */
266                         if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 
267                                 (socklen_t)sizeof(snd)) < 0) {
268 #  ifndef USE_WINSOCK
269                                 log_err("setsockopt(..., SO_SNDBUF, "
270                                         "...) failed: %s", strerror(errno));
271                                 close(s);
272 #  else
273                                 log_err("setsockopt(..., SO_SNDBUF, "
274                                         "...) failed: %s", 
275                                         wsa_strerror(WSAGetLastError()));
276                                 closesocket(s);
277 #  endif
278                                 *noproto = 0;
279                                 *inuse = 0;
280                                 return -1;
281                         }
282                         /* check if we got the right thing or if system
283                          * reduced to some system max.  Warn if so */
284                         if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 
285                                 &slen) >= 0 && got < snd/2) {
286                                 log_warn("so-sndbuf %u was not granted. "
287                                         "Got %u. To fix: start with "
288                                         "root permissions(linux) or sysctl "
289                                         "bigger net.core.wmem_max(linux) or "
290                                         "kern.ipc.maxsockbuf(bsd) values.",
291                                         (unsigned)snd, (unsigned)got);
292                         }
293 #  ifdef SO_SNDBUFFORCE
294                 }
295 #  endif
296 #endif /* SO_SNDBUF */
297         }
298         if(family == AF_INET6) {
299 # if defined(IPV6_V6ONLY)
300                 if(v6only) {
301                         int val=(v6only==2)?0:1;
302                         if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
303                                 (void*)&val, (socklen_t)sizeof(val)) < 0) {
304 #ifndef USE_WINSOCK
305                                 log_err("setsockopt(..., IPV6_V6ONLY"
306                                         ", ...) failed: %s", strerror(errno));
307                                 close(s);
308 #else
309                                 log_err("setsockopt(..., IPV6_V6ONLY"
310                                         ", ...) failed: %s", 
311                                         wsa_strerror(WSAGetLastError()));
312                                 closesocket(s);
313 #endif
314                                 *noproto = 0;
315                                 *inuse = 0;
316                                 return -1;
317                         }
318                 }
319 # endif
320 # if defined(IPV6_USE_MIN_MTU)
321                 /*
322                  * There is no fragmentation of IPv6 datagrams
323                  * during forwarding in the network. Therefore
324                  * we do not send UDP datagrams larger than
325                  * the minimum IPv6 MTU of 1280 octets. The
326                  * EDNS0 message length can be larger if the
327                  * network stack supports IPV6_USE_MIN_MTU.
328                  */
329                 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
330                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
331 #  ifndef USE_WINSOCK
332                         log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
333                                 "...) failed: %s", strerror(errno));
334                         close(s);
335 #  else
336                         log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
337                                 "...) failed: %s", 
338                                 wsa_strerror(WSAGetLastError()));
339                         closesocket(s);
340 #  endif
341                         *noproto = 0;
342                         *inuse = 0;
343                         return -1;
344                 }
345 # elif defined(IPV6_MTU)
346                 /*
347                  * On Linux, to send no larger than 1280, the PMTUD is
348                  * disabled by default for datagrams anyway, so we set
349                  * the MTU to use.
350                  */
351                 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
352                         (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
353 #  ifndef USE_WINSOCK
354                         log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 
355                                 strerror(errno));
356                         close(s);
357 #  else
358                         log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 
359                                 wsa_strerror(WSAGetLastError()));
360                         closesocket(s);
361 #  endif
362                         *noproto = 0;
363                         *inuse = 0;
364                         return -1;
365                 }
366 # endif /* IPv6 MTU */
367         } else if(family == AF_INET) {
368 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
369 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
370  * PMTU information is not accepted, but fragmentation is allowed
371  * if and only if the packet size exceeds the outgoing interface MTU
372  * (and also uses the interface mtu to determine the size of the packets).
373  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
374  * FreeBSD already has same semantics without setting the option. */
375                 int omit_set = 0;
376                 int action;
377 #   if defined(IP_PMTUDISC_OMIT)
378                 action = IP_PMTUDISC_OMIT;
379                 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 
380                         &action, (socklen_t)sizeof(action)) < 0) {
381
382                         if (errno != EINVAL) {
383                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
384                                         strerror(errno));
385
386 #    ifndef USE_WINSOCK
387                                 close(s);
388 #    else
389                                 closesocket(s);
390 #    endif
391                                 *noproto = 0;
392                                 *inuse = 0;
393                                 return -1;
394                         }
395                 }
396                 else
397                 {
398                     omit_set = 1;
399                 }
400 #   endif
401                 if (omit_set == 0) {
402                         action = IP_PMTUDISC_DONT;
403                         if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
404                                 &action, (socklen_t)sizeof(action)) < 0) {
405                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
406                                         strerror(errno));
407 #    ifndef USE_WINSOCK
408                                 close(s);
409 #    else
410                                 closesocket(s);
411 #    endif
412                                 *noproto = 0;
413                                 *inuse = 0;
414                                 return -1;
415                         }
416                 }
417 #  elif defined(IP_DONTFRAG)
418                 int off = 0;
419                 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 
420                         &off, (socklen_t)sizeof(off)) < 0) {
421                         log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
422                                 strerror(errno));
423 #    ifndef USE_WINSOCK
424                         close(s);
425 #    else
426                         closesocket(s);
427 #    endif
428                         *noproto = 0;
429                         *inuse = 0;
430                         return -1;
431                 }
432 #  endif /* IPv4 MTU */
433         }
434         if(bind(s, (struct sockaddr*)addr, addrlen) != 0) {
435                 *noproto = 0;
436                 *inuse = 0;
437 #ifndef USE_WINSOCK
438 #ifdef EADDRINUSE
439                 *inuse = (errno == EADDRINUSE);
440                 /* detect freebsd jail with no ipv6 permission */
441                 if(family==AF_INET6 && errno==EINVAL)
442                         *noproto = 1;
443                 else if(errno != EADDRINUSE) {
444                         log_err_addr("can't bind socket", strerror(errno),
445                                 (struct sockaddr_storage*)addr, addrlen);
446                 }
447 #endif /* EADDRINUSE */
448                 close(s);
449 #else /* USE_WINSOCK */
450                 if(WSAGetLastError() != WSAEADDRINUSE &&
451                         WSAGetLastError() != WSAEADDRNOTAVAIL) {
452                         log_err_addr("can't bind socket", 
453                                 wsa_strerror(WSAGetLastError()),
454                                 (struct sockaddr_storage*)addr, addrlen);
455                 }
456                 closesocket(s);
457 #endif
458                 return -1;
459         }
460         if(!fd_set_nonblock(s)) {
461                 *noproto = 0;
462                 *inuse = 0;
463 #ifndef USE_WINSOCK
464                 close(s);
465 #else
466                 closesocket(s);
467 #endif
468                 return -1;
469         }
470         return s;
471 }
472
473 int
474 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
475         int* reuseport)
476 {
477         int s;
478 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY)
479         int on = 1;
480 #endif /* SO_REUSEADDR || IPV6_V6ONLY */
481         verbose_print_addr(addr);
482         *noproto = 0;
483         if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
484 #ifndef USE_WINSOCK
485                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
486                         *noproto = 1;
487                         return -1;
488                 }
489                 log_err("can't create socket: %s", strerror(errno));
490 #else
491                 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
492                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
493                         *noproto = 1;
494                         return -1;
495                 }
496                 log_err("can't create socket: %s", 
497                         wsa_strerror(WSAGetLastError()));
498 #endif
499                 return -1;
500         }
501 #ifdef SO_REUSEADDR
502         if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
503                 (socklen_t)sizeof(on)) < 0) {
504 #ifndef USE_WINSOCK
505                 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
506                         strerror(errno));
507                 close(s);
508 #else
509                 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
510                         wsa_strerror(WSAGetLastError()));
511                 closesocket(s);
512 #endif
513                 return -1;
514         }
515 #endif /* SO_REUSEADDR */
516 #ifdef SO_REUSEPORT
517         /* try to set SO_REUSEPORT so that incoming
518          * connections are distributed evenly among the receiving threads.
519          * Each thread must have its own socket bound to the same port,
520          * with SO_REUSEPORT set on each socket.
521          */
522         if (reuseport && *reuseport &&
523                 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
524                 (socklen_t)sizeof(on)) < 0) {
525 #ifdef ENOPROTOOPT
526                 if(errno != ENOPROTOOPT || verbosity >= 3)
527                         log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
528                                 strerror(errno));
529 #endif
530                 /* this option is not essential, we can continue */
531                 *reuseport = 0;
532         }
533 #else
534         (void)reuseport;
535 #endif /* defined(SO_REUSEPORT) */
536 #if defined(IPV6_V6ONLY)
537         if(addr->ai_family == AF_INET6 && v6only) {
538                 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
539                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
540 #ifndef USE_WINSOCK
541                         log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
542                                 strerror(errno));
543                         close(s);
544 #else
545                         log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
546                                 wsa_strerror(WSAGetLastError()));
547                         closesocket(s);
548 #endif
549                         return -1;
550                 }
551         }
552 #else
553         (void)v6only;
554 #endif /* IPV6_V6ONLY */
555         if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
556 #ifndef USE_WINSOCK
557                 /* detect freebsd jail with no ipv6 permission */
558                 if(addr->ai_family==AF_INET6 && errno==EINVAL)
559                         *noproto = 1;
560                 else {
561                         log_err_addr("can't bind socket", strerror(errno),
562                                 (struct sockaddr_storage*)addr->ai_addr,
563                                 addr->ai_addrlen);
564                 }
565                 close(s);
566 #else
567                 log_err_addr("can't bind socket", 
568                         wsa_strerror(WSAGetLastError()),
569                         (struct sockaddr_storage*)addr->ai_addr,
570                         addr->ai_addrlen);
571                 closesocket(s);
572 #endif
573                 return -1;
574         }
575         if(!fd_set_nonblock(s)) {
576 #ifndef USE_WINSOCK
577                 close(s);
578 #else
579                 closesocket(s);
580 #endif
581                 return -1;
582         }
583         if(listen(s, TCP_BACKLOG) == -1) {
584 #ifndef USE_WINSOCK
585                 log_err("can't listen: %s", strerror(errno));
586                 close(s);
587 #else
588                 log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
589                 closesocket(s);
590 #endif
591                 return -1;
592         }
593         return s;
594 }
595
596 int
597 create_local_accept_sock(const char *path, int* noproto)
598 {
599 #ifdef HAVE_SYS_UN_H
600         int s;
601         struct sockaddr_un usock;
602
603         verbose(VERB_ALGO, "creating unix socket %s", path);
604 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
605         /* this member exists on BSDs, not Linux */
606         usock.sun_len = (socklen_t)sizeof(usock);
607 #endif
608         usock.sun_family = AF_LOCAL;
609         /* length is 92-108, 104 on FreeBSD */
610         (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
611
612         if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
613                 log_err("Cannot create local socket %s (%s)",
614                         path, strerror(errno));
615                 return -1;
616         }
617
618         if (unlink(path) && errno != ENOENT) {
619                 /* The socket already exists and cannot be removed */
620                 log_err("Cannot remove old local socket %s (%s)",
621                         path, strerror(errno));
622                 return -1;
623         }
624
625         if (bind(s, (struct sockaddr *)&usock,
626                 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
627                 log_err("Cannot bind local socket %s (%s)",
628                         path, strerror(errno));
629                 return -1;
630         }
631
632         if (!fd_set_nonblock(s)) {
633                 log_err("Cannot set non-blocking mode");
634                 return -1;
635         }
636
637         if (listen(s, TCP_BACKLOG) == -1) {
638                 log_err("can't listen: %s", strerror(errno));
639                 return -1;
640         }
641
642         (void)noproto; /*unused*/
643         return s;
644 #else
645         (void)path;
646         log_err("Local sockets are not supported");
647         *noproto = 1;
648         return -1;
649 #endif
650 }
651
652
653 /**
654  * Create socket from getaddrinfo results
655  */
656 static int
657 make_sock(int stype, const char* ifname, const char* port, 
658         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
659         int* reuseport)
660 {
661         struct addrinfo *res = NULL;
662         int r, s, inuse, noproto;
663         hints->ai_socktype = stype;
664         *noip6 = 0;
665         if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
666 #ifdef USE_WINSOCK
667                 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
668                         *noip6 = 1; /* 'Host not found' for IP6 on winXP */
669                         return -1;
670                 }
671 #endif
672                 log_err("node %s:%s getaddrinfo: %s %s", 
673                         ifname?ifname:"default", port, gai_strerror(r),
674 #ifdef EAI_SYSTEM
675                         r==EAI_SYSTEM?(char*)strerror(errno):""
676 #else
677                         ""
678 #endif
679                 );
680                 return -1;
681         }
682         if(stype == SOCK_DGRAM) {
683                 verbose_print_addr(res);
684                 s = create_udp_sock(res->ai_family, res->ai_socktype,
685                         (struct sockaddr*)res->ai_addr, res->ai_addrlen,
686                         v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
687                         reuseport);
688                 if(s == -1 && inuse) {
689                         log_err("bind: address already in use");
690                 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
691                         *noip6 = 1;
692                 }
693         } else  {
694                 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport);
695                 if(s == -1 && noproto && hints->ai_family == AF_INET6){
696                         *noip6 = 1;
697                 }
698         }
699         freeaddrinfo(res);
700         return s;
701 }
702
703 /** make socket and first see if ifname contains port override info */
704 static int
705 make_sock_port(int stype, const char* ifname, const char* port, 
706         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
707         int* reuseport)
708 {
709         char* s = strchr(ifname, '@');
710         if(s) {
711                 /* override port with ifspec@port */
712                 char p[16];
713                 char newif[128];
714                 if((size_t)(s-ifname) >= sizeof(newif)) {
715                         log_err("ifname too long: %s", ifname);
716                         *noip6 = 0;
717                         return -1;
718                 }
719                 if(strlen(s+1) >= sizeof(p)) {
720                         log_err("portnumber too long: %s", ifname);
721                         *noip6 = 0;
722                         return -1;
723                 }
724                 (void)strlcpy(newif, ifname, sizeof(newif));
725                 newif[s-ifname] = 0;
726                 (void)strlcpy(p, s+1, sizeof(p));
727                 p[strlen(s+1)]=0;
728                 return make_sock(stype, newif, p, hints, v6only, noip6,
729                         rcv, snd, reuseport);
730         }
731         return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
732                 reuseport);
733 }
734
735 /**
736  * Add port to open ports list.
737  * @param list: list head. changed.
738  * @param s: fd.
739  * @param ftype: if fd is UDP.
740  * @return false on failure. list in unchanged then.
741  */
742 static int
743 port_insert(struct listen_port** list, int s, enum listen_type ftype)
744 {
745         struct listen_port* item = (struct listen_port*)malloc(
746                 sizeof(struct listen_port));
747         if(!item)
748                 return 0;
749         item->next = *list;
750         item->fd = s;
751         item->ftype = ftype;
752         *list = item;
753         return 1;
754 }
755
756 /** set fd to receive source address packet info */
757 static int
758 set_recvpktinfo(int s, int family) 
759 {
760 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
761         int on = 1;
762 #else
763         (void)s;
764 #endif
765         if(family == AF_INET6) {
766 #           ifdef IPV6_RECVPKTINFO
767                 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
768                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
769                         log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
770                                 strerror(errno));
771                         return 0;
772                 }
773 #           elif defined(IPV6_PKTINFO)
774                 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
775                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
776                         log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
777                                 strerror(errno));
778                         return 0;
779                 }
780 #           else
781                 log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
782                         "disable interface-automatic in config");
783                 return 0;
784 #           endif /* defined IPV6_RECVPKTINFO */
785
786         } else if(family == AF_INET) {
787 #           ifdef IP_PKTINFO
788                 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
789                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
790                         log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
791                                 strerror(errno));
792                         return 0;
793                 }
794 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
795                 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
796                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
797                         log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
798                                 strerror(errno));
799                         return 0;
800                 }
801 #           else
802                 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
803                         "interface-automatic in config");
804                 return 0;
805 #           endif /* IP_PKTINFO */
806
807         }
808         return 1;
809 }
810
811 /**
812  * Helper for ports_open. Creates one interface (or NULL for default).
813  * @param ifname: The interface ip address.
814  * @param do_auto: use automatic interface detection.
815  *      If enabled, then ifname must be the wildcard name.
816  * @param do_udp: if udp should be used.
817  * @param do_tcp: if udp should be used.
818  * @param hints: for getaddrinfo. family and flags have to be set by caller.
819  * @param port: Port number to use (as string).
820  * @param list: list of open ports, appended to, changed to point to list head.
821  * @param rcv: receive buffer size for UDP
822  * @param snd: send buffer size for UDP
823  * @param ssl_port: ssl service port number
824  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
825  *      set to false on exit if reuseport failed due to no kernel support.
826  * @return: returns false on error.
827  */
828 static int
829 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 
830         struct addrinfo *hints, const char* port, struct listen_port** list,
831         size_t rcv, size_t snd, int ssl_port, int* reuseport)
832 {
833         int s, noip6=0;
834         if(!do_udp && !do_tcp)
835                 return 0;
836         if(do_auto) {
837                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
838                         &noip6, rcv, snd, reuseport)) == -1) {
839                         if(noip6) {
840                                 log_warn("IPv6 protocol not available");
841                                 return 1;
842                         }
843                         return 0;
844                 }
845                 /* getting source addr packet info is highly non-portable */
846                 if(!set_recvpktinfo(s, hints->ai_family)) {
847 #ifndef USE_WINSOCK
848                         close(s);
849 #else
850                         closesocket(s);
851 #endif
852                         return 0;
853                 }
854                 if(!port_insert(list, s, listen_type_udpancil)) {
855 #ifndef USE_WINSOCK
856                         close(s);
857 #else
858                         closesocket(s);
859 #endif
860                         return 0;
861                 }
862         } else if(do_udp) {
863                 /* regular udp socket */
864                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
865                         &noip6, rcv, snd, reuseport)) == -1) {
866                         if(noip6) {
867                                 log_warn("IPv6 protocol not available");
868                                 return 1;
869                         }
870                         return 0;
871                 }
872                 if(!port_insert(list, s, listen_type_udp)) {
873 #ifndef USE_WINSOCK
874                         close(s);
875 #else
876                         closesocket(s);
877 #endif
878                         return 0;
879                 }
880         }
881         if(do_tcp) {
882                 int is_ssl = ((strchr(ifname, '@') && 
883                         atoi(strchr(ifname, '@')+1) == ssl_port) ||
884                         (!strchr(ifname, '@') && atoi(port) == ssl_port));
885                 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 
886                         &noip6, 0, 0, reuseport)) == -1) {
887                         if(noip6) {
888                                 /*log_warn("IPv6 protocol not available");*/
889                                 return 1;
890                         }
891                         return 0;
892                 }
893                 if(is_ssl)
894                         verbose(VERB_ALGO, "setup TCP for SSL service");
895                 if(!port_insert(list, s, is_ssl?listen_type_ssl:
896                         listen_type_tcp)) {
897 #ifndef USE_WINSOCK
898                         close(s);
899 #else
900                         closesocket(s);
901 #endif
902                         return 0;
903                 }
904         }
905         return 1;
906 }
907
908 /** 
909  * Add items to commpoint list in front.
910  * @param c: commpoint to add.
911  * @param front: listen struct.
912  * @return: false on failure.
913  */
914 static int
915 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
916 {
917         struct listen_list* item = (struct listen_list*)malloc(
918                 sizeof(struct listen_list));
919         if(!item)
920                 return 0;
921         item->com = c;
922         item->next = front->cps;
923         front->cps = item;
924         return 1;
925 }
926
927 struct listen_dnsport* 
928 listen_create(struct comm_base* base, struct listen_port* ports,
929         size_t bufsize, int tcp_accept_count, void* sslctx,
930         struct dt_env* dtenv, comm_point_callback_t* cb, void *cb_arg)
931 {
932         struct listen_dnsport* front = (struct listen_dnsport*)
933                 malloc(sizeof(struct listen_dnsport));
934         if(!front)
935                 return NULL;
936         front->cps = NULL;
937         front->udp_buff = sldns_buffer_new(bufsize);
938         if(!front->udp_buff) {
939                 free(front);
940                 return NULL;
941         }
942
943         /* create comm points as needed */
944         while(ports) {
945                 struct comm_point* cp = NULL;
946                 if(ports->ftype == listen_type_udp) 
947                         cp = comm_point_create_udp(base, ports->fd, 
948                                 front->udp_buff, cb, cb_arg);
949                 else if(ports->ftype == listen_type_tcp)
950                         cp = comm_point_create_tcp(base, ports->fd, 
951                                 tcp_accept_count, bufsize, cb, cb_arg);
952                 else if(ports->ftype == listen_type_ssl) {
953                         cp = comm_point_create_tcp(base, ports->fd, 
954                                 tcp_accept_count, bufsize, cb, cb_arg);
955                         cp->ssl = sslctx;
956                 } else if(ports->ftype == listen_type_udpancil) 
957                         cp = comm_point_create_udp_ancil(base, ports->fd, 
958                                 front->udp_buff, cb, cb_arg);
959                 if(!cp) {
960                         log_err("can't create commpoint");      
961                         listen_delete(front);
962                         return NULL;
963                 }
964                 cp->dtenv = dtenv;
965                 cp->do_not_close = 1;
966                 if(!listen_cp_insert(cp, front)) {
967                         log_err("malloc failed");
968                         comm_point_delete(cp);
969                         listen_delete(front);
970                         return NULL;
971                 }
972                 ports = ports->next;
973         }
974         if(!front->cps) {
975                 log_err("Could not open sockets to accept queries.");
976                 listen_delete(front);
977                 return NULL;
978         }
979
980         return front;
981 }
982
983 void
984 listen_list_delete(struct listen_list* list)
985 {
986         struct listen_list *p = list, *pn;
987         while(p) {
988                 pn = p->next;
989                 comm_point_delete(p->com);
990                 free(p);
991                 p = pn;
992         }
993 }
994
995 void 
996 listen_delete(struct listen_dnsport* front)
997 {
998         if(!front) 
999                 return;
1000         listen_list_delete(front->cps);
1001         sldns_buffer_free(front->udp_buff);
1002         free(front);
1003 }
1004
1005 struct listen_port* 
1006 listening_ports_open(struct config_file* cfg, int* reuseport)
1007 {
1008         struct listen_port* list = NULL;
1009         struct addrinfo hints;
1010         int i, do_ip4, do_ip6;
1011         int do_tcp, do_auto;
1012         char portbuf[32];
1013         snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1014         do_ip4 = cfg->do_ip4;
1015         do_ip6 = cfg->do_ip6;
1016         do_tcp = cfg->do_tcp;
1017         do_auto = cfg->if_automatic && cfg->do_udp;
1018         if(cfg->incoming_num_tcp == 0)
1019                 do_tcp = 0;
1020
1021         /* getaddrinfo */
1022         memset(&hints, 0, sizeof(hints));
1023         hints.ai_flags = AI_PASSIVE;
1024         /* no name lookups on our listening ports */
1025         if(cfg->num_ifs > 0)
1026                 hints.ai_flags |= AI_NUMERICHOST;
1027         hints.ai_family = AF_UNSPEC;
1028 #ifndef INET6
1029         do_ip6 = 0;
1030 #endif
1031         if(!do_ip4 && !do_ip6) {
1032                 return NULL;
1033         }
1034         /* create ip4 and ip6 ports so that return addresses are nice. */
1035         if(do_auto || cfg->num_ifs == 0) {
1036                 if(do_ip6) {
1037                         hints.ai_family = AF_INET6;
1038                         if(!ports_create_if(do_auto?"::0":"::1", 
1039                                 do_auto, cfg->do_udp, do_tcp, 
1040                                 &hints, portbuf, &list,
1041                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1042                                 cfg->ssl_port, reuseport)) {
1043                                 listening_ports_free(list);
1044                                 return NULL;
1045                         }
1046                 }
1047                 if(do_ip4) {
1048                         hints.ai_family = AF_INET;
1049                         if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 
1050                                 do_auto, cfg->do_udp, do_tcp, 
1051                                 &hints, portbuf, &list,
1052                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1053                                 cfg->ssl_port, reuseport)) {
1054                                 listening_ports_free(list);
1055                                 return NULL;
1056                         }
1057                 }
1058         } else for(i = 0; i<cfg->num_ifs; i++) {
1059                 if(str_is_ip6(cfg->ifs[i])) {
1060                         if(!do_ip6)
1061                                 continue;
1062                         hints.ai_family = AF_INET6;
1063                         if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, 
1064                                 do_tcp, &hints, portbuf, &list, 
1065                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1066                                 cfg->ssl_port, reuseport)) {
1067                                 listening_ports_free(list);
1068                                 return NULL;
1069                         }
1070                 } else {
1071                         if(!do_ip4)
1072                                 continue;
1073                         hints.ai_family = AF_INET;
1074                         if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, 
1075                                 do_tcp, &hints, portbuf, &list, 
1076                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1077                                 cfg->ssl_port, reuseport)) {
1078                                 listening_ports_free(list);
1079                                 return NULL;
1080                         }
1081                 }
1082         }
1083         return list;
1084 }
1085
1086 void listening_ports_free(struct listen_port* list)
1087 {
1088         struct listen_port* nx;
1089         while(list) {
1090                 nx = list->next;
1091                 if(list->fd != -1) {
1092 #ifndef USE_WINSOCK
1093                         close(list->fd);
1094 #else
1095                         closesocket(list->fd);
1096 #endif
1097                 }
1098                 free(list);
1099                 list = nx;
1100         }
1101 }
1102
1103 size_t listen_get_mem(struct listen_dnsport* listen)
1104 {
1105         size_t s = sizeof(*listen) + sizeof(*listen->base) + 
1106                 sizeof(*listen->udp_buff) + 
1107                 sldns_buffer_capacity(listen->udp_buff);
1108         struct listen_list* p;
1109         for(p = listen->cps; p; p = p->next) {
1110                 s += sizeof(*p);
1111                 s += comm_point_get_mem(p->com);
1112         }
1113         return s;
1114 }
1115
1116 void listen_stop_accept(struct listen_dnsport* listen)
1117 {
1118         /* do not stop the ones that have no tcp_free list
1119          * (they have already stopped listening) */
1120         struct listen_list* p;
1121         for(p=listen->cps; p; p=p->next) {
1122                 if(p->com->type == comm_tcp_accept &&
1123                         p->com->tcp_free != NULL) {
1124                         comm_point_stop_listening(p->com);
1125                 }
1126         }
1127 }
1128
1129 void listen_start_accept(struct listen_dnsport* listen)
1130 {
1131         /* do not start the ones that have no tcp_free list, it is no
1132          * use to listen to them because they have no free tcp handlers */
1133         struct listen_list* p;
1134         for(p=listen->cps; p; p=p->next) {
1135                 if(p->com->type == comm_tcp_accept &&
1136                         p->com->tcp_free != NULL) {
1137                         comm_point_start_listening(p->com, -1, -1);
1138                 }
1139         }
1140 }
1141