]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/services/listen_dnsport.c
MFV r304732.
[FreeBSD/FreeBSD.git] / contrib / unbound / services / listen_dnsport.c
1 /*
2  * services/listen_dnsport.c - listen on port 53 for incoming DNS queries.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file has functions to get queries from clients.
40  */
41 #include "config.h"
42 #ifdef HAVE_SYS_TYPES_H
43 #  include <sys/types.h>
44 #endif
45 #include <sys/time.h>
46 #include "services/listen_dnsport.h"
47 #include "services/outside_network.h"
48 #include "util/netevent.h"
49 #include "util/log.h"
50 #include "util/config_file.h"
51 #include "util/net_help.h"
52 #include "sldns/sbuffer.h"
53
54 #ifdef HAVE_NETDB_H
55 #include <netdb.h>
56 #endif
57 #include <fcntl.h>
58
59 #ifdef HAVE_SYS_UN_H
60 #include <sys/un.h>
61 #endif
62
63 /** number of queued TCP connections for listen() */
64 #define TCP_BACKLOG 256 
65
66 /**
67  * Debug print of the getaddrinfo returned address.
68  * @param addr: the address returned.
69  */
70 static void
71 verbose_print_addr(struct addrinfo *addr)
72 {
73         if(verbosity >= VERB_ALGO) {
74                 char buf[100];
75                 void* sinaddr = &((struct sockaddr_in*)addr->ai_addr)->sin_addr;
76 #ifdef INET6
77                 if(addr->ai_family == AF_INET6)
78                         sinaddr = &((struct sockaddr_in6*)addr->ai_addr)->
79                                 sin6_addr;
80 #endif /* INET6 */
81                 if(inet_ntop(addr->ai_family, sinaddr, buf,
82                         (socklen_t)sizeof(buf)) == 0) {
83                         (void)strlcpy(buf, "(null)", sizeof(buf));
84                 }
85                 buf[sizeof(buf)-1] = 0;
86                 verbose(VERB_ALGO, "creating %s%s socket %s %d", 
87                         addr->ai_socktype==SOCK_DGRAM?"udp":
88                         addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
89                         addr->ai_family==AF_INET?"4":
90                         addr->ai_family==AF_INET6?"6":
91                         "_otherfam", buf, 
92                         ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
93         }
94 }
95
96 int
97 create_udp_sock(int family, int socktype, struct sockaddr* addr,
98         socklen_t addrlen, int v6only, int* inuse, int* noproto,
99         int rcv, int snd, int listen, int* reuseport, int transparent)
100 {
101         int s;
102 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU)  || defined(IP_TRANSPARENT) || defined(IP_BINDANY)
103         int on=1;
104 #endif
105 #ifdef IPV6_MTU
106         int mtu = IPV6_MIN_MTU;
107 #endif
108 #if !defined(SO_RCVBUFFORCE) && !defined(SO_RCVBUF)
109         (void)rcv;
110 #endif
111 #if !defined(SO_SNDBUFFORCE) && !defined(SO_SNDBUF)
112         (void)snd;
113 #endif
114 #ifndef IPV6_V6ONLY
115         (void)v6only;
116 #endif
117 #if !defined(IP_TRANSPARENT) && !defined(IP_BINDANY)
118         (void)transparent;
119 #endif
120         if((s = socket(family, socktype, 0)) == -1) {
121                 *inuse = 0;
122 #ifndef USE_WINSOCK
123                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
124                         *noproto = 1;
125                         return -1;
126                 }
127                 log_err("can't create socket: %s", strerror(errno));
128 #else
129                 if(WSAGetLastError() == WSAEAFNOSUPPORT || 
130                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
131                         *noproto = 1;
132                         return -1;
133                 }
134                 log_err("can't create socket: %s", 
135                         wsa_strerror(WSAGetLastError()));
136 #endif
137                 *noproto = 0;
138                 return -1;
139         }
140         if(listen) {
141 #ifdef SO_REUSEADDR
142                 if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
143                         (socklen_t)sizeof(on)) < 0) {
144 #ifndef USE_WINSOCK
145                         log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
146                                 strerror(errno));
147                         if(errno != ENOSYS) {
148                                 close(s);
149                                 *noproto = 0;
150                                 *inuse = 0;
151                                 return -1;
152                         }
153 #else
154                         log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
155                                 wsa_strerror(WSAGetLastError()));
156                         closesocket(s);
157                         *noproto = 0;
158                         *inuse = 0;
159                         return -1;
160 #endif
161                 }
162 #endif /* SO_REUSEADDR */
163 #ifdef SO_REUSEPORT
164                 /* try to set SO_REUSEPORT so that incoming
165                  * queries are distributed evenly among the receiving threads.
166                  * Each thread must have its own socket bound to the same port,
167                  * with SO_REUSEPORT set on each socket.
168                  */
169                 if (reuseport && *reuseport &&
170                     setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
171                         (socklen_t)sizeof(on)) < 0) {
172 #ifdef ENOPROTOOPT
173                         if(errno != ENOPROTOOPT || verbosity >= 3)
174                                 log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
175                                         strerror(errno));
176 #endif
177                         /* this option is not essential, we can continue */
178                         *reuseport = 0;
179                 }
180 #else
181                 (void)reuseport;
182 #endif /* defined(SO_REUSEPORT) */
183 #ifdef IP_TRANSPARENT
184                 if (transparent &&
185                     setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
186                     (socklen_t)sizeof(on)) < 0) {
187                         log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
188                         strerror(errno));
189                 }
190 #elif defined(IP_BINDANY)
191                 if (transparent &&
192                     setsockopt(s, (family==AF_INET6? IPPROTO_IPV6:IPPROTO_IP),
193                     IP_BINDANY, (void*)&on, (socklen_t)sizeof(on)) < 0) {
194                         log_warn("setsockopt(.. IP_BINDANY ..) failed: %s",
195                         strerror(errno));
196                 }
197 #endif /* IP_TRANSPARENT || IP_BINDANY */
198         }
199         if(rcv) {
200 #ifdef SO_RCVBUF
201                 int got;
202                 socklen_t slen = (socklen_t)sizeof(got);
203 #  ifdef SO_RCVBUFFORCE
204                 /* Linux specific: try to use root permission to override
205                  * system limits on rcvbuf. The limit is stored in 
206                  * /proc/sys/net/core/rmem_max or sysctl net.core.rmem_max */
207                 if(setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, (void*)&rcv, 
208                         (socklen_t)sizeof(rcv)) < 0) {
209                         if(errno != EPERM) {
210 #    ifndef USE_WINSOCK
211                                 log_err("setsockopt(..., SO_RCVBUFFORCE, "
212                                         "...) failed: %s", strerror(errno));
213                                 close(s);
214 #    else
215                                 log_err("setsockopt(..., SO_RCVBUFFORCE, "
216                                         "...) failed: %s", 
217                                         wsa_strerror(WSAGetLastError()));
218                                 closesocket(s);
219 #    endif
220                                 *noproto = 0;
221                                 *inuse = 0;
222                                 return -1;
223                         }
224 #  endif /* SO_RCVBUFFORCE */
225                         if(setsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&rcv, 
226                                 (socklen_t)sizeof(rcv)) < 0) {
227 #  ifndef USE_WINSOCK
228                                 log_err("setsockopt(..., SO_RCVBUF, "
229                                         "...) failed: %s", strerror(errno));
230                                 close(s);
231 #  else
232                                 log_err("setsockopt(..., SO_RCVBUF, "
233                                         "...) failed: %s", 
234                                         wsa_strerror(WSAGetLastError()));
235                                 closesocket(s);
236 #  endif
237                                 *noproto = 0;
238                                 *inuse = 0;
239                                 return -1;
240                         }
241                         /* check if we got the right thing or if system
242                          * reduced to some system max.  Warn if so */
243                         if(getsockopt(s, SOL_SOCKET, SO_RCVBUF, (void*)&got, 
244                                 &slen) >= 0 && got < rcv/2) {
245                                 log_warn("so-rcvbuf %u was not granted. "
246                                         "Got %u. To fix: start with "
247                                         "root permissions(linux) or sysctl "
248                                         "bigger net.core.rmem_max(linux) or "
249                                         "kern.ipc.maxsockbuf(bsd) values.",
250                                         (unsigned)rcv, (unsigned)got);
251                         }
252 #  ifdef SO_RCVBUFFORCE
253                 }
254 #  endif
255 #endif /* SO_RCVBUF */
256         }
257         /* first do RCVBUF as the receive buffer is more important */
258         if(snd) {
259 #ifdef SO_SNDBUF
260                 int got;
261                 socklen_t slen = (socklen_t)sizeof(got);
262 #  ifdef SO_SNDBUFFORCE
263                 /* Linux specific: try to use root permission to override
264                  * system limits on sndbuf. The limit is stored in 
265                  * /proc/sys/net/core/wmem_max or sysctl net.core.wmem_max */
266                 if(setsockopt(s, SOL_SOCKET, SO_SNDBUFFORCE, (void*)&snd, 
267                         (socklen_t)sizeof(snd)) < 0) {
268                         if(errno != EPERM) {
269 #    ifndef USE_WINSOCK
270                                 log_err("setsockopt(..., SO_SNDBUFFORCE, "
271                                         "...) failed: %s", strerror(errno));
272                                 close(s);
273 #    else
274                                 log_err("setsockopt(..., SO_SNDBUFFORCE, "
275                                         "...) failed: %s", 
276                                         wsa_strerror(WSAGetLastError()));
277                                 closesocket(s);
278 #    endif
279                                 *noproto = 0;
280                                 *inuse = 0;
281                                 return -1;
282                         }
283 #  endif /* SO_SNDBUFFORCE */
284                         if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&snd, 
285                                 (socklen_t)sizeof(snd)) < 0) {
286 #  ifndef USE_WINSOCK
287                                 log_err("setsockopt(..., SO_SNDBUF, "
288                                         "...) failed: %s", strerror(errno));
289                                 close(s);
290 #  else
291                                 log_err("setsockopt(..., SO_SNDBUF, "
292                                         "...) failed: %s", 
293                                         wsa_strerror(WSAGetLastError()));
294                                 closesocket(s);
295 #  endif
296                                 *noproto = 0;
297                                 *inuse = 0;
298                                 return -1;
299                         }
300                         /* check if we got the right thing or if system
301                          * reduced to some system max.  Warn if so */
302                         if(getsockopt(s, SOL_SOCKET, SO_SNDBUF, (void*)&got, 
303                                 &slen) >= 0 && got < snd/2) {
304                                 log_warn("so-sndbuf %u was not granted. "
305                                         "Got %u. To fix: start with "
306                                         "root permissions(linux) or sysctl "
307                                         "bigger net.core.wmem_max(linux) or "
308                                         "kern.ipc.maxsockbuf(bsd) values.",
309                                         (unsigned)snd, (unsigned)got);
310                         }
311 #  ifdef SO_SNDBUFFORCE
312                 }
313 #  endif
314 #endif /* SO_SNDBUF */
315         }
316         if(family == AF_INET6) {
317 # if defined(IPV6_V6ONLY)
318                 if(v6only) {
319                         int val=(v6only==2)?0:1;
320                         if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
321                                 (void*)&val, (socklen_t)sizeof(val)) < 0) {
322 #ifndef USE_WINSOCK
323                                 log_err("setsockopt(..., IPV6_V6ONLY"
324                                         ", ...) failed: %s", strerror(errno));
325                                 close(s);
326 #else
327                                 log_err("setsockopt(..., IPV6_V6ONLY"
328                                         ", ...) failed: %s", 
329                                         wsa_strerror(WSAGetLastError()));
330                                 closesocket(s);
331 #endif
332                                 *noproto = 0;
333                                 *inuse = 0;
334                                 return -1;
335                         }
336                 }
337 # endif
338 # if defined(IPV6_USE_MIN_MTU)
339                 /*
340                  * There is no fragmentation of IPv6 datagrams
341                  * during forwarding in the network. Therefore
342                  * we do not send UDP datagrams larger than
343                  * the minimum IPv6 MTU of 1280 octets. The
344                  * EDNS0 message length can be larger if the
345                  * network stack supports IPV6_USE_MIN_MTU.
346                  */
347                 if (setsockopt(s, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
348                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
349 #  ifndef USE_WINSOCK
350                         log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
351                                 "...) failed: %s", strerror(errno));
352                         close(s);
353 #  else
354                         log_err("setsockopt(..., IPV6_USE_MIN_MTU, "
355                                 "...) failed: %s", 
356                                 wsa_strerror(WSAGetLastError()));
357                         closesocket(s);
358 #  endif
359                         *noproto = 0;
360                         *inuse = 0;
361                         return -1;
362                 }
363 # elif defined(IPV6_MTU)
364                 /*
365                  * On Linux, to send no larger than 1280, the PMTUD is
366                  * disabled by default for datagrams anyway, so we set
367                  * the MTU to use.
368                  */
369                 if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU,
370                         (void*)&mtu, (socklen_t)sizeof(mtu)) < 0) {
371 #  ifndef USE_WINSOCK
372                         log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 
373                                 strerror(errno));
374                         close(s);
375 #  else
376                         log_err("setsockopt(..., IPV6_MTU, ...) failed: %s", 
377                                 wsa_strerror(WSAGetLastError()));
378                         closesocket(s);
379 #  endif
380                         *noproto = 0;
381                         *inuse = 0;
382                         return -1;
383                 }
384 # endif /* IPv6 MTU */
385         } else if(family == AF_INET) {
386 #  if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT)
387 /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that
388  * PMTU information is not accepted, but fragmentation is allowed
389  * if and only if the packet size exceeds the outgoing interface MTU
390  * (and also uses the interface mtu to determine the size of the packets).
391  * So there won't be any EMSGSIZE error.  Against DNS fragmentation attacks.
392  * FreeBSD already has same semantics without setting the option. */
393                 int omit_set = 0;
394                 int action;
395 #   if defined(IP_PMTUDISC_OMIT)
396                 action = IP_PMTUDISC_OMIT;
397                 if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER, 
398                         &action, (socklen_t)sizeof(action)) < 0) {
399
400                         if (errno != EINVAL) {
401                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s",
402                                         strerror(errno));
403
404 #    ifndef USE_WINSOCK
405                                 close(s);
406 #    else
407                                 closesocket(s);
408 #    endif
409                                 *noproto = 0;
410                                 *inuse = 0;
411                                 return -1;
412                         }
413                 }
414                 else
415                 {
416                     omit_set = 1;
417                 }
418 #   endif
419                 if (omit_set == 0) {
420                         action = IP_PMTUDISC_DONT;
421                         if (setsockopt(s, IPPROTO_IP, IP_MTU_DISCOVER,
422                                 &action, (socklen_t)sizeof(action)) < 0) {
423                                 log_err("setsockopt(..., IP_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s",
424                                         strerror(errno));
425 #    ifndef USE_WINSOCK
426                                 close(s);
427 #    else
428                                 closesocket(s);
429 #    endif
430                                 *noproto = 0;
431                                 *inuse = 0;
432                                 return -1;
433                         }
434                 }
435 #  elif defined(IP_DONTFRAG)
436                 int off = 0;
437                 if (setsockopt(s, IPPROTO_IP, IP_DONTFRAG, 
438                         &off, (socklen_t)sizeof(off)) < 0) {
439                         log_err("setsockopt(..., IP_DONTFRAG, ...) failed: %s",
440                                 strerror(errno));
441 #    ifndef USE_WINSOCK
442                         close(s);
443 #    else
444                         closesocket(s);
445 #    endif
446                         *noproto = 0;
447                         *inuse = 0;
448                         return -1;
449                 }
450 #  endif /* IPv4 MTU */
451         }
452         if(bind(s, (struct sockaddr*)addr, addrlen) != 0) {
453                 *noproto = 0;
454                 *inuse = 0;
455 #ifndef USE_WINSOCK
456 #ifdef EADDRINUSE
457                 *inuse = (errno == EADDRINUSE);
458                 /* detect freebsd jail with no ipv6 permission */
459                 if(family==AF_INET6 && errno==EINVAL)
460                         *noproto = 1;
461                 else if(errno != EADDRINUSE) {
462                         log_err_addr("can't bind socket", strerror(errno),
463                                 (struct sockaddr_storage*)addr, addrlen);
464                 }
465 #endif /* EADDRINUSE */
466                 close(s);
467 #else /* USE_WINSOCK */
468                 if(WSAGetLastError() != WSAEADDRINUSE &&
469                         WSAGetLastError() != WSAEADDRNOTAVAIL) {
470                         log_err_addr("can't bind socket", 
471                                 wsa_strerror(WSAGetLastError()),
472                                 (struct sockaddr_storage*)addr, addrlen);
473                 }
474                 closesocket(s);
475 #endif
476                 return -1;
477         }
478         if(!fd_set_nonblock(s)) {
479                 *noproto = 0;
480                 *inuse = 0;
481 #ifndef USE_WINSOCK
482                 close(s);
483 #else
484                 closesocket(s);
485 #endif
486                 return -1;
487         }
488         return s;
489 }
490
491 int
492 create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
493         int* reuseport, int transparent, int mss)
494 {
495         int s;
496 #if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) || defined(IP_TRANSPARENT)
497         int on = 1;
498 #endif
499 #ifndef IP_TRANSPARENT
500         (void)transparent;
501 #endif
502         verbose_print_addr(addr);
503         *noproto = 0;
504         if((s = socket(addr->ai_family, addr->ai_socktype, 0)) == -1) {
505 #ifndef USE_WINSOCK
506                 if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
507                         *noproto = 1;
508                         return -1;
509                 }
510                 log_err("can't create socket: %s", strerror(errno));
511 #else
512                 if(WSAGetLastError() == WSAEAFNOSUPPORT ||
513                         WSAGetLastError() == WSAEPROTONOSUPPORT) {
514                         *noproto = 1;
515                         return -1;
516                 }
517                 log_err("can't create socket: %s", 
518                         wsa_strerror(WSAGetLastError()));
519 #endif
520                 return -1;
521         }
522         if (mss > 0) {
523 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
524                 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG, (void*)&mss,
525                         (socklen_t)sizeof(mss)) < 0) {
526                         #ifndef USE_WINSOCK
527                         log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
528                                 strerror(errno));
529                         #else
530                         log_err(" setsockopt(.. TCP_MAXSEG ..) failed: %s",
531                                 wsa_strerror(WSAGetLastError()));
532                         #endif
533                 } else {
534                         verbose(VERB_ALGO,
535                                 " tcp socket mss set to %d", mss);
536                 }
537 #else
538                 log_warn(" setsockopt(TCP_MAXSEG) unsupported");
539 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
540         }
541 #ifdef SO_REUSEADDR
542         if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on, 
543                 (socklen_t)sizeof(on)) < 0) {
544 #ifndef USE_WINSOCK
545                 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
546                         strerror(errno));
547                 close(s);
548 #else
549                 log_err("setsockopt(.. SO_REUSEADDR ..) failed: %s",
550                         wsa_strerror(WSAGetLastError()));
551                 closesocket(s);
552 #endif
553                 return -1;
554         }
555 #endif /* SO_REUSEADDR */
556 #ifdef SO_REUSEPORT
557         /* try to set SO_REUSEPORT so that incoming
558          * connections are distributed evenly among the receiving threads.
559          * Each thread must have its own socket bound to the same port,
560          * with SO_REUSEPORT set on each socket.
561          */
562         if (reuseport && *reuseport &&
563                 setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on,
564                 (socklen_t)sizeof(on)) < 0) {
565 #ifdef ENOPROTOOPT
566                 if(errno != ENOPROTOOPT || verbosity >= 3)
567                         log_warn("setsockopt(.. SO_REUSEPORT ..) failed: %s",
568                                 strerror(errno));
569 #endif
570                 /* this option is not essential, we can continue */
571                 *reuseport = 0;
572         }
573 #else
574         (void)reuseport;
575 #endif /* defined(SO_REUSEPORT) */
576 #if defined(IPV6_V6ONLY)
577         if(addr->ai_family == AF_INET6 && v6only) {
578                 if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, 
579                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
580 #ifndef USE_WINSOCK
581                         log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
582                                 strerror(errno));
583                         close(s);
584 #else
585                         log_err("setsockopt(..., IPV6_V6ONLY, ...) failed: %s",
586                                 wsa_strerror(WSAGetLastError()));
587                         closesocket(s);
588 #endif
589                         return -1;
590                 }
591         }
592 #else
593         (void)v6only;
594 #endif /* IPV6_V6ONLY */
595 #ifdef IP_TRANSPARENT
596         if (transparent &&
597             setsockopt(s, IPPROTO_IP, IP_TRANSPARENT, (void*)&on,
598             (socklen_t)sizeof(on)) < 0) {
599                 log_warn("setsockopt(.. IP_TRANSPARENT ..) failed: %s",
600                         strerror(errno));
601         }
602 #endif /* IP_TRANSPARENT */
603         if(bind(s, addr->ai_addr, addr->ai_addrlen) != 0) {
604 #ifndef USE_WINSOCK
605                 /* detect freebsd jail with no ipv6 permission */
606                 if(addr->ai_family==AF_INET6 && errno==EINVAL)
607                         *noproto = 1;
608                 else {
609                         log_err_addr("can't bind socket", strerror(errno),
610                                 (struct sockaddr_storage*)addr->ai_addr,
611                                 addr->ai_addrlen);
612                 }
613                 close(s);
614 #else
615                 log_err_addr("can't bind socket", 
616                         wsa_strerror(WSAGetLastError()),
617                         (struct sockaddr_storage*)addr->ai_addr,
618                         addr->ai_addrlen);
619                 closesocket(s);
620 #endif
621                 return -1;
622         }
623         if(!fd_set_nonblock(s)) {
624 #ifndef USE_WINSOCK
625                 close(s);
626 #else
627                 closesocket(s);
628 #endif
629                 return -1;
630         }
631         if(listen(s, TCP_BACKLOG) == -1) {
632 #ifndef USE_WINSOCK
633                 log_err("can't listen: %s", strerror(errno));
634                 close(s);
635 #else
636                 log_err("can't listen: %s", wsa_strerror(WSAGetLastError()));
637                 closesocket(s);
638 #endif
639                 return -1;
640         }
641         return s;
642 }
643
644 int
645 create_local_accept_sock(const char *path, int* noproto)
646 {
647 #ifdef HAVE_SYS_UN_H
648         int s;
649         struct sockaddr_un usock;
650
651         verbose(VERB_ALGO, "creating unix socket %s", path);
652 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
653         /* this member exists on BSDs, not Linux */
654         usock.sun_len = (socklen_t)sizeof(usock);
655 #endif
656         usock.sun_family = AF_LOCAL;
657         /* length is 92-108, 104 on FreeBSD */
658         (void)strlcpy(usock.sun_path, path, sizeof(usock.sun_path));
659
660         if ((s = socket(AF_LOCAL, SOCK_STREAM, 0)) == -1) {
661                 log_err("Cannot create local socket %s (%s)",
662                         path, strerror(errno));
663                 return -1;
664         }
665
666         if (unlink(path) && errno != ENOENT) {
667                 /* The socket already exists and cannot be removed */
668                 log_err("Cannot remove old local socket %s (%s)",
669                         path, strerror(errno));
670                 return -1;
671         }
672
673         if (bind(s, (struct sockaddr *)&usock,
674                 (socklen_t)sizeof(struct sockaddr_un)) == -1) {
675                 log_err("Cannot bind local socket %s (%s)",
676                         path, strerror(errno));
677                 return -1;
678         }
679
680         if (!fd_set_nonblock(s)) {
681                 log_err("Cannot set non-blocking mode");
682                 return -1;
683         }
684
685         if (listen(s, TCP_BACKLOG) == -1) {
686                 log_err("can't listen: %s", strerror(errno));
687                 return -1;
688         }
689
690         (void)noproto; /*unused*/
691         return s;
692 #else
693         (void)path;
694         log_err("Local sockets are not supported");
695         *noproto = 1;
696         return -1;
697 #endif
698 }
699
700
701 /**
702  * Create socket from getaddrinfo results
703  */
704 static int
705 make_sock(int stype, const char* ifname, const char* port, 
706         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
707         int* reuseport, int transparent, int tcp_mss)
708 {
709         struct addrinfo *res = NULL;
710         int r, s, inuse, noproto;
711         hints->ai_socktype = stype;
712         *noip6 = 0;
713         if((r=getaddrinfo(ifname, port, hints, &res)) != 0 || !res) {
714 #ifdef USE_WINSOCK
715                 if(r == EAI_NONAME && hints->ai_family == AF_INET6){
716                         *noip6 = 1; /* 'Host not found' for IP6 on winXP */
717                         return -1;
718                 }
719 #endif
720                 log_err("node %s:%s getaddrinfo: %s %s", 
721                         ifname?ifname:"default", port, gai_strerror(r),
722 #ifdef EAI_SYSTEM
723                         r==EAI_SYSTEM?(char*)strerror(errno):""
724 #else
725                         ""
726 #endif
727                 );
728                 return -1;
729         }
730         if(stype == SOCK_DGRAM) {
731                 verbose_print_addr(res);
732                 s = create_udp_sock(res->ai_family, res->ai_socktype,
733                         (struct sockaddr*)res->ai_addr, res->ai_addrlen,
734                         v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
735                         reuseport, transparent);
736                 if(s == -1 && inuse) {
737                         log_err("bind: address already in use");
738                 } else if(s == -1 && noproto && hints->ai_family == AF_INET6){
739                         *noip6 = 1;
740                 }
741         } else  {
742                 s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
743                         transparent, tcp_mss);
744                 if(s == -1 && noproto && hints->ai_family == AF_INET6){
745                         *noip6 = 1;
746                 }
747         }
748         freeaddrinfo(res);
749         return s;
750 }
751
752 /** make socket and first see if ifname contains port override info */
753 static int
754 make_sock_port(int stype, const char* ifname, const char* port, 
755         struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
756         int* reuseport, int transparent, int tcp_mss)
757 {
758         char* s = strchr(ifname, '@');
759         if(s) {
760                 /* override port with ifspec@port */
761                 char p[16];
762                 char newif[128];
763                 if((size_t)(s-ifname) >= sizeof(newif)) {
764                         log_err("ifname too long: %s", ifname);
765                         *noip6 = 0;
766                         return -1;
767                 }
768                 if(strlen(s+1) >= sizeof(p)) {
769                         log_err("portnumber too long: %s", ifname);
770                         *noip6 = 0;
771                         return -1;
772                 }
773                 (void)strlcpy(newif, ifname, sizeof(newif));
774                 newif[s-ifname] = 0;
775                 (void)strlcpy(p, s+1, sizeof(p));
776                 p[strlen(s+1)]=0;
777                 return make_sock(stype, newif, p, hints, v6only, noip6,
778                         rcv, snd, reuseport, transparent, tcp_mss);
779         }
780         return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
781                 reuseport, transparent, tcp_mss);
782 }
783
784 /**
785  * Add port to open ports list.
786  * @param list: list head. changed.
787  * @param s: fd.
788  * @param ftype: if fd is UDP.
789  * @return false on failure. list in unchanged then.
790  */
791 static int
792 port_insert(struct listen_port** list, int s, enum listen_type ftype)
793 {
794         struct listen_port* item = (struct listen_port*)malloc(
795                 sizeof(struct listen_port));
796         if(!item)
797                 return 0;
798         item->next = *list;
799         item->fd = s;
800         item->ftype = ftype;
801         *list = item;
802         return 1;
803 }
804
805 /** set fd to receive source address packet info */
806 static int
807 set_recvpktinfo(int s, int family) 
808 {
809 #if defined(IPV6_RECVPKTINFO) || defined(IPV6_PKTINFO) || (defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)) || defined(IP_PKTINFO)
810         int on = 1;
811 #else
812         (void)s;
813 #endif
814         if(family == AF_INET6) {
815 #           ifdef IPV6_RECVPKTINFO
816                 if(setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO,
817                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
818                         log_err("setsockopt(..., IPV6_RECVPKTINFO, ...) failed: %s",
819                                 strerror(errno));
820                         return 0;
821                 }
822 #           elif defined(IPV6_PKTINFO)
823                 if(setsockopt(s, IPPROTO_IPV6, IPV6_PKTINFO,
824                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
825                         log_err("setsockopt(..., IPV6_PKTINFO, ...) failed: %s",
826                                 strerror(errno));
827                         return 0;
828                 }
829 #           else
830                 log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please "
831                         "disable interface-automatic in config");
832                 return 0;
833 #           endif /* defined IPV6_RECVPKTINFO */
834
835         } else if(family == AF_INET) {
836 #           ifdef IP_PKTINFO
837                 if(setsockopt(s, IPPROTO_IP, IP_PKTINFO,
838                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
839                         log_err("setsockopt(..., IP_PKTINFO, ...) failed: %s",
840                                 strerror(errno));
841                         return 0;
842                 }
843 #           elif defined(IP_RECVDSTADDR) && defined(IP_SENDSRCADDR)
844                 if(setsockopt(s, IPPROTO_IP, IP_RECVDSTADDR,
845                         (void*)&on, (socklen_t)sizeof(on)) < 0) {
846                         log_err("setsockopt(..., IP_RECVDSTADDR, ...) failed: %s",
847                                 strerror(errno));
848                         return 0;
849                 }
850 #           else
851                 log_err("no IP_SENDSRCADDR or IP_PKTINFO option, please disable "
852                         "interface-automatic in config");
853                 return 0;
854 #           endif /* IP_PKTINFO */
855
856         }
857         return 1;
858 }
859
860 /**
861  * Helper for ports_open. Creates one interface (or NULL for default).
862  * @param ifname: The interface ip address.
863  * @param do_auto: use automatic interface detection.
864  *      If enabled, then ifname must be the wildcard name.
865  * @param do_udp: if udp should be used.
866  * @param do_tcp: if udp should be used.
867  * @param hints: for getaddrinfo. family and flags have to be set by caller.
868  * @param port: Port number to use (as string).
869  * @param list: list of open ports, appended to, changed to point to list head.
870  * @param rcv: receive buffer size for UDP
871  * @param snd: send buffer size for UDP
872  * @param ssl_port: ssl service port number
873  * @param reuseport: try to set SO_REUSEPORT if nonNULL and true.
874  *      set to false on exit if reuseport failed due to no kernel support.
875  * @param transparent: set IP_TRANSPARENT socket option.
876  * @param tcp_mss: maximum segment size of tcp socket. default if zero.
877  * @return: returns false on error.
878  */
879 static int
880 ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, 
881         struct addrinfo *hints, const char* port, struct listen_port** list,
882         size_t rcv, size_t snd, int ssl_port, int* reuseport, int transparent,
883         int tcp_mss)
884 {
885         int s, noip6=0;
886         if(!do_udp && !do_tcp)
887                 return 0;
888         if(do_auto) {
889                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
890                         &noip6, rcv, snd, reuseport, transparent,
891                         tcp_mss)) == -1) {
892                         if(noip6) {
893                                 log_warn("IPv6 protocol not available");
894                                 return 1;
895                         }
896                         return 0;
897                 }
898                 /* getting source addr packet info is highly non-portable */
899                 if(!set_recvpktinfo(s, hints->ai_family)) {
900 #ifndef USE_WINSOCK
901                         close(s);
902 #else
903                         closesocket(s);
904 #endif
905                         return 0;
906                 }
907                 if(!port_insert(list, s, listen_type_udpancil)) {
908 #ifndef USE_WINSOCK
909                         close(s);
910 #else
911                         closesocket(s);
912 #endif
913                         return 0;
914                 }
915         } else if(do_udp) {
916                 /* regular udp socket */
917                 if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, 
918                         &noip6, rcv, snd, reuseport, transparent,
919                         tcp_mss)) == -1) {
920                         if(noip6) {
921                                 log_warn("IPv6 protocol not available");
922                                 return 1;
923                         }
924                         return 0;
925                 }
926                 if(!port_insert(list, s, listen_type_udp)) {
927 #ifndef USE_WINSOCK
928                         close(s);
929 #else
930                         closesocket(s);
931 #endif
932                         return 0;
933                 }
934         }
935         if(do_tcp) {
936                 int is_ssl = ((strchr(ifname, '@') && 
937                         atoi(strchr(ifname, '@')+1) == ssl_port) ||
938                         (!strchr(ifname, '@') && atoi(port) == ssl_port));
939                 if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, 
940                         &noip6, 0, 0, reuseport, transparent, tcp_mss)) == -1) {
941                         if(noip6) {
942                                 /*log_warn("IPv6 protocol not available");*/
943                                 return 1;
944                         }
945                         return 0;
946                 }
947                 if(is_ssl)
948                         verbose(VERB_ALGO, "setup TCP for SSL service");
949                 if(!port_insert(list, s, is_ssl?listen_type_ssl:
950                         listen_type_tcp)) {
951 #ifndef USE_WINSOCK
952                         close(s);
953 #else
954                         closesocket(s);
955 #endif
956                         return 0;
957                 }
958         }
959         return 1;
960 }
961
962 /** 
963  * Add items to commpoint list in front.
964  * @param c: commpoint to add.
965  * @param front: listen struct.
966  * @return: false on failure.
967  */
968 static int
969 listen_cp_insert(struct comm_point* c, struct listen_dnsport* front)
970 {
971         struct listen_list* item = (struct listen_list*)malloc(
972                 sizeof(struct listen_list));
973         if(!item)
974                 return 0;
975         item->com = c;
976         item->next = front->cps;
977         front->cps = item;
978         return 1;
979 }
980
981 struct listen_dnsport* 
982 listen_create(struct comm_base* base, struct listen_port* ports,
983         size_t bufsize, int tcp_accept_count, void* sslctx,
984         struct dt_env* dtenv, comm_point_callback_t* cb, void *cb_arg)
985 {
986         struct listen_dnsport* front = (struct listen_dnsport*)
987                 malloc(sizeof(struct listen_dnsport));
988         if(!front)
989                 return NULL;
990         front->cps = NULL;
991         front->udp_buff = sldns_buffer_new(bufsize);
992         if(!front->udp_buff) {
993                 free(front);
994                 return NULL;
995         }
996
997         /* create comm points as needed */
998         while(ports) {
999                 struct comm_point* cp = NULL;
1000                 if(ports->ftype == listen_type_udp) 
1001                         cp = comm_point_create_udp(base, ports->fd, 
1002                                 front->udp_buff, cb, cb_arg);
1003                 else if(ports->ftype == listen_type_tcp)
1004                         cp = comm_point_create_tcp(base, ports->fd, 
1005                                 tcp_accept_count, bufsize, cb, cb_arg);
1006                 else if(ports->ftype == listen_type_ssl) {
1007                         cp = comm_point_create_tcp(base, ports->fd, 
1008                                 tcp_accept_count, bufsize, cb, cb_arg);
1009                         cp->ssl = sslctx;
1010                 } else if(ports->ftype == listen_type_udpancil) 
1011                         cp = comm_point_create_udp_ancil(base, ports->fd, 
1012                                 front->udp_buff, cb, cb_arg);
1013                 if(!cp) {
1014                         log_err("can't create commpoint");      
1015                         listen_delete(front);
1016                         return NULL;
1017                 }
1018                 cp->dtenv = dtenv;
1019                 cp->do_not_close = 1;
1020                 if(!listen_cp_insert(cp, front)) {
1021                         log_err("malloc failed");
1022                         comm_point_delete(cp);
1023                         listen_delete(front);
1024                         return NULL;
1025                 }
1026                 ports = ports->next;
1027         }
1028         if(!front->cps) {
1029                 log_err("Could not open sockets to accept queries.");
1030                 listen_delete(front);
1031                 return NULL;
1032         }
1033
1034         return front;
1035 }
1036
1037 void
1038 listen_list_delete(struct listen_list* list)
1039 {
1040         struct listen_list *p = list, *pn;
1041         while(p) {
1042                 pn = p->next;
1043                 comm_point_delete(p->com);
1044                 free(p);
1045                 p = pn;
1046         }
1047 }
1048
1049 void 
1050 listen_delete(struct listen_dnsport* front)
1051 {
1052         if(!front) 
1053                 return;
1054         listen_list_delete(front->cps);
1055         sldns_buffer_free(front->udp_buff);
1056         free(front);
1057 }
1058
1059 struct listen_port* 
1060 listening_ports_open(struct config_file* cfg, int* reuseport)
1061 {
1062         struct listen_port* list = NULL;
1063         struct addrinfo hints;
1064         int i, do_ip4, do_ip6;
1065         int do_tcp, do_auto;
1066         char portbuf[32];
1067         snprintf(portbuf, sizeof(portbuf), "%d", cfg->port);
1068         do_ip4 = cfg->do_ip4;
1069         do_ip6 = cfg->do_ip6;
1070         do_tcp = cfg->do_tcp;
1071         do_auto = cfg->if_automatic && cfg->do_udp;
1072         if(cfg->incoming_num_tcp == 0)
1073                 do_tcp = 0;
1074
1075         /* getaddrinfo */
1076         memset(&hints, 0, sizeof(hints));
1077         hints.ai_flags = AI_PASSIVE;
1078         /* no name lookups on our listening ports */
1079         if(cfg->num_ifs > 0)
1080                 hints.ai_flags |= AI_NUMERICHOST;
1081         hints.ai_family = AF_UNSPEC;
1082 #ifndef INET6
1083         do_ip6 = 0;
1084 #endif
1085         if(!do_ip4 && !do_ip6) {
1086                 return NULL;
1087         }
1088         /* create ip4 and ip6 ports so that return addresses are nice. */
1089         if(do_auto || cfg->num_ifs == 0) {
1090                 if(do_ip6) {
1091                         hints.ai_family = AF_INET6;
1092                         if(!ports_create_if(do_auto?"::0":"::1", 
1093                                 do_auto, cfg->do_udp, do_tcp, 
1094                                 &hints, portbuf, &list,
1095                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1096                                 cfg->ssl_port, reuseport,
1097                                 cfg->ip_transparent,
1098                                 cfg->tcp_mss)) {
1099                                 listening_ports_free(list);
1100                                 return NULL;
1101                         }
1102                 }
1103                 if(do_ip4) {
1104                         hints.ai_family = AF_INET;
1105                         if(!ports_create_if(do_auto?"0.0.0.0":"127.0.0.1", 
1106                                 do_auto, cfg->do_udp, do_tcp, 
1107                                 &hints, portbuf, &list,
1108                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1109                                 cfg->ssl_port, reuseport,
1110                                 cfg->ip_transparent,
1111                                 cfg->tcp_mss)) {
1112                                 listening_ports_free(list);
1113                                 return NULL;
1114                         }
1115                 }
1116         } else for(i = 0; i<cfg->num_ifs; i++) {
1117                 if(str_is_ip6(cfg->ifs[i])) {
1118                         if(!do_ip6)
1119                                 continue;
1120                         hints.ai_family = AF_INET6;
1121                         if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, 
1122                                 do_tcp, &hints, portbuf, &list, 
1123                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1124                                 cfg->ssl_port, reuseport,
1125                                 cfg->ip_transparent,
1126                                 cfg->tcp_mss)) {
1127                                 listening_ports_free(list);
1128                                 return NULL;
1129                         }
1130                 } else {
1131                         if(!do_ip4)
1132                                 continue;
1133                         hints.ai_family = AF_INET;
1134                         if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, 
1135                                 do_tcp, &hints, portbuf, &list, 
1136                                 cfg->so_rcvbuf, cfg->so_sndbuf,
1137                                 cfg->ssl_port, reuseport,
1138                                 cfg->ip_transparent,
1139                                 cfg->tcp_mss)) {
1140                                 listening_ports_free(list);
1141                                 return NULL;
1142                         }
1143                 }
1144         }
1145         return list;
1146 }
1147
1148 void listening_ports_free(struct listen_port* list)
1149 {
1150         struct listen_port* nx;
1151         while(list) {
1152                 nx = list->next;
1153                 if(list->fd != -1) {
1154 #ifndef USE_WINSOCK
1155                         close(list->fd);
1156 #else
1157                         closesocket(list->fd);
1158 #endif
1159                 }
1160                 free(list);
1161                 list = nx;
1162         }
1163 }
1164
1165 size_t listen_get_mem(struct listen_dnsport* listen)
1166 {
1167         size_t s = sizeof(*listen) + sizeof(*listen->base) + 
1168                 sizeof(*listen->udp_buff) + 
1169                 sldns_buffer_capacity(listen->udp_buff);
1170         struct listen_list* p;
1171         for(p = listen->cps; p; p = p->next) {
1172                 s += sizeof(*p);
1173                 s += comm_point_get_mem(p->com);
1174         }
1175         return s;
1176 }
1177
1178 void listen_stop_accept(struct listen_dnsport* listen)
1179 {
1180         /* do not stop the ones that have no tcp_free list
1181          * (they have already stopped listening) */
1182         struct listen_list* p;
1183         for(p=listen->cps; p; p=p->next) {
1184                 if(p->com->type == comm_tcp_accept &&
1185                         p->com->tcp_free != NULL) {
1186                         comm_point_stop_listening(p->com);
1187                 }
1188         }
1189 }
1190
1191 void listen_start_accept(struct listen_dnsport* listen)
1192 {
1193         /* do not start the ones that have no tcp_free list, it is no
1194          * use to listen to them because they have no free tcp handlers */
1195         struct listen_list* p;
1196         for(p=listen->cps; p; p=p->next) {
1197                 if(p->com->type == comm_tcp_accept &&
1198                         p->com->tcp_free != NULL) {
1199                         comm_point_start_listening(p->com, -1, -1);
1200                 }
1201         }
1202 }
1203