]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/util/netevent.c
Import libcbor at 'contrib/libcbor/'
[FreeBSD/FreeBSD.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/ub_event.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/tcp_conn_limit.h"
47 #include "util/fptr_wlist.h"
48 #include "sldns/pkthdr.h"
49 #include "sldns/sbuffer.h"
50 #include "sldns/str2wire.h"
51 #include "dnstap/dnstap.h"
52 #include "dnscrypt/dnscrypt.h"
53 #include "services/listen_dnsport.h"
54 #ifdef HAVE_SYS_TYPES_H
55 #include <sys/types.h>
56 #endif
57 #ifdef HAVE_SYS_SOCKET_H
58 #include <sys/socket.h>
59 #endif
60 #ifdef HAVE_NETDB_H
61 #include <netdb.h>
62 #endif
63
64 #ifdef HAVE_OPENSSL_SSL_H
65 #include <openssl/ssl.h>
66 #endif
67 #ifdef HAVE_OPENSSL_ERR_H
68 #include <openssl/err.h>
69 #endif
70
71 /* -------- Start of local definitions -------- */
72 /** if CMSG_ALIGN is not defined on this platform, a workaround */
73 #ifndef CMSG_ALIGN
74 #  ifdef __CMSG_ALIGN
75 #    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
76 #  elif defined(CMSG_DATA_ALIGN)
77 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
78 #  else
79 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
80 #  endif
81 #endif
82
83 /** if CMSG_LEN is not defined on this platform, a workaround */
84 #ifndef CMSG_LEN
85 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
86 #endif
87
88 /** if CMSG_SPACE is not defined on this platform, a workaround */
89 #ifndef CMSG_SPACE
90 #  ifdef _CMSG_HDR_ALIGN
91 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
92 #  else
93 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
94 #  endif
95 #endif
96
97 /** The TCP writing query timeout in milliseconds */
98 #define TCP_QUERY_TIMEOUT 120000
99 /** The minimum actual TCP timeout to use, regardless of what we advertise,
100  * in msec */
101 #define TCP_QUERY_TIMEOUT_MINIMUM 200
102
103 #ifndef NONBLOCKING_IS_BROKEN
104 /** number of UDP reads to perform per read indication from select */
105 #define NUM_UDP_PER_SELECT 100
106 #else
107 #define NUM_UDP_PER_SELECT 1
108 #endif
109
110 /**
111  * The internal event structure for keeping ub_event info for the event.
112  * Possibly other structures (list, tree) this is part of.
113  */
114 struct internal_event {
115         /** the comm base */
116         struct comm_base* base;
117         /** ub_event event type */
118         struct ub_event* ev;
119 };
120
121 /**
122  * Internal base structure, so that every thread has its own events.
123  */
124 struct internal_base {
125         /** ub_event event_base type. */
126         struct ub_event_base* base;
127         /** seconds time pointer points here */
128         time_t secs;
129         /** timeval with current time */
130         struct timeval now;
131         /** the event used for slow_accept timeouts */
132         struct ub_event* slow_accept;
133         /** true if slow_accept is enabled */
134         int slow_accept_enabled;
135 };
136
137 /**
138  * Internal timer structure, to store timer event in.
139  */
140 struct internal_timer {
141         /** the super struct from which derived */
142         struct comm_timer super;
143         /** the comm base */
144         struct comm_base* base;
145         /** ub_event event type */
146         struct ub_event* ev;
147         /** is timer enabled */
148         uint8_t enabled;
149 };
150
151 /**
152  * Internal signal structure, to store signal event in.
153  */
154 struct internal_signal {
155         /** ub_event event type */
156         struct ub_event* ev;
157         /** next in signal list */
158         struct internal_signal* next;
159 };
160
161 /** create a tcp handler with a parent */
162 static struct comm_point* comm_point_create_tcp_handler(
163         struct comm_base *base, struct comm_point* parent, size_t bufsize,
164         struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
165         void* callback_arg, struct unbound_socket* socket);
166
167 /* -------- End of local definitions -------- */
168
169 struct comm_base* 
170 comm_base_create(int sigs)
171 {
172         struct comm_base* b = (struct comm_base*)calloc(1,
173                 sizeof(struct comm_base));
174         const char *evnm="event", *evsys="", *evmethod="";
175
176         if(!b)
177                 return NULL;
178         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
179         if(!b->eb) {
180                 free(b);
181                 return NULL;
182         }
183         b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
184         if(!b->eb->base) {
185                 free(b->eb);
186                 free(b);
187                 return NULL;
188         }
189         ub_comm_base_now(b);
190         ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
191         verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod);
192         return b;
193 }
194
195 struct comm_base*
196 comm_base_create_event(struct ub_event_base* base)
197 {
198         struct comm_base* b = (struct comm_base*)calloc(1,
199                 sizeof(struct comm_base));
200         if(!b)
201                 return NULL;
202         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
203         if(!b->eb) {
204                 free(b);
205                 return NULL;
206         }
207         b->eb->base = base;
208         ub_comm_base_now(b);
209         return b;
210 }
211
212 void 
213 comm_base_delete(struct comm_base* b)
214 {
215         if(!b)
216                 return;
217         if(b->eb->slow_accept_enabled) {
218                 if(ub_event_del(b->eb->slow_accept) != 0) {
219                         log_err("could not event_del slow_accept");
220                 }
221                 ub_event_free(b->eb->slow_accept);
222         }
223         ub_event_base_free(b->eb->base);
224         b->eb->base = NULL;
225         free(b->eb);
226         free(b);
227 }
228
229 void 
230 comm_base_delete_no_base(struct comm_base* b)
231 {
232         if(!b)
233                 return;
234         if(b->eb->slow_accept_enabled) {
235                 if(ub_event_del(b->eb->slow_accept) != 0) {
236                         log_err("could not event_del slow_accept");
237                 }
238                 ub_event_free(b->eb->slow_accept);
239         }
240         b->eb->base = NULL;
241         free(b->eb);
242         free(b);
243 }
244
245 void 
246 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
247 {
248         *tt = &b->eb->secs;
249         *tv = &b->eb->now;
250 }
251
252 void 
253 comm_base_dispatch(struct comm_base* b)
254 {
255         int retval;
256         retval = ub_event_base_dispatch(b->eb->base);
257         if(retval < 0) {
258                 fatal_exit("event_dispatch returned error %d, "
259                         "errno is %s", retval, strerror(errno));
260         }
261 }
262
263 void comm_base_exit(struct comm_base* b)
264 {
265         if(ub_event_base_loopexit(b->eb->base) != 0) {
266                 log_err("Could not loopexit");
267         }
268 }
269
270 void comm_base_set_slow_accept_handlers(struct comm_base* b,
271         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
272 {
273         b->stop_accept = stop_acc;
274         b->start_accept = start_acc;
275         b->cb_arg = arg;
276 }
277
278 struct ub_event_base* comm_base_internal(struct comm_base* b)
279 {
280         return b->eb->base;
281 }
282
283 /** see if errno for udp has to be logged or not uses globals */
284 static int
285 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
286 {
287         /* do not log transient errors (unless high verbosity) */
288 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
289         switch(errno) {
290 #  ifdef ENETUNREACH
291                 case ENETUNREACH:
292 #  endif
293 #  ifdef EHOSTDOWN
294                 case EHOSTDOWN:
295 #  endif
296 #  ifdef EHOSTUNREACH
297                 case EHOSTUNREACH:
298 #  endif
299 #  ifdef ENETDOWN
300                 case ENETDOWN:
301 #  endif
302                 case EPERM:
303                         if(verbosity < VERB_ALGO)
304                                 return 0;
305                 default:
306                         break;
307         }
308 #endif
309         /* permission denied is gotten for every send if the
310          * network is disconnected (on some OS), squelch it */
311         if( ((errno == EPERM)
312 #  ifdef EADDRNOTAVAIL
313                 /* 'Cannot assign requested address' also when disconnected */
314                 || (errno == EADDRNOTAVAIL)
315 #  endif
316                 ) && verbosity < VERB_ALGO)
317                 return 0;
318 #  ifdef EADDRINUSE
319         /* If SO_REUSEADDR is set, we could try to connect to the same server
320          * from the same source port twice. */
321         if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
322                 return 0;
323 #  endif
324         /* squelch errors where people deploy AAAA ::ffff:bla for
325          * authority servers, which we try for intranets. */
326         if(errno == EINVAL && addr_is_ip4mapped(
327                 (struct sockaddr_storage*)addr, addrlen) &&
328                 verbosity < VERB_DETAIL)
329                 return 0;
330         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
331          * but a dns cache does not need it. */
332         if(errno == EACCES && addr_is_broadcast(
333                 (struct sockaddr_storage*)addr, addrlen) &&
334                 verbosity < VERB_DETAIL)
335                 return 0;
336         return 1;
337 }
338
339 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
340 {
341         return udp_send_errno_needs_log(addr, addrlen);
342 }
343
344 /* send a UDP reply */
345 int
346 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
347         struct sockaddr* addr, socklen_t addrlen, int is_connected)
348 {
349         ssize_t sent;
350         log_assert(c->fd != -1);
351 #ifdef UNBOUND_DEBUG
352         if(sldns_buffer_remaining(packet) == 0)
353                 log_err("error: send empty UDP packet");
354 #endif
355         log_assert(addr && addrlen > 0);
356         if(!is_connected) {
357                 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
358                         sldns_buffer_remaining(packet), 0,
359                         addr, addrlen);
360         } else {
361                 sent = send(c->fd, (void*)sldns_buffer_begin(packet),
362                         sldns_buffer_remaining(packet), 0);
363         }
364         if(sent == -1) {
365                 /* try again and block, waiting for IO to complete,
366                  * we want to send the answer, and we will wait for
367                  * the ethernet interface buffer to have space. */
368 #ifndef USE_WINSOCK
369                 if(errno == EAGAIN || 
370 #  ifdef EWOULDBLOCK
371                         errno == EWOULDBLOCK ||
372 #  endif
373                         errno == ENOBUFS) {
374 #else
375                 if(WSAGetLastError() == WSAEINPROGRESS ||
376                         WSAGetLastError() == WSAENOBUFS ||
377                         WSAGetLastError() == WSAEWOULDBLOCK) {
378 #endif
379                         int e;
380                         fd_set_block(c->fd);
381                         if (!is_connected) {
382                                 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
383                                         sldns_buffer_remaining(packet), 0,
384                                         addr, addrlen);
385                         } else {
386                                 sent = send(c->fd, (void*)sldns_buffer_begin(packet),
387                                         sldns_buffer_remaining(packet), 0);
388                         }
389                         e = errno;
390                         fd_set_nonblock(c->fd);
391                         errno = e;
392                 }
393         }
394         if(sent == -1) {
395                 if(!udp_send_errno_needs_log(addr, addrlen))
396                         return 0;
397                 if (!is_connected) {
398                         verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno));
399                 } else {
400                         verbose(VERB_OPS, "send failed: %s", sock_strerror(errno));
401                 }
402                 if(addr)
403                         log_addr(VERB_OPS, "remote address is",
404                                 (struct sockaddr_storage*)addr, addrlen);
405                 return 0;
406         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
407                 log_err("sent %d in place of %d bytes", 
408                         (int)sent, (int)sldns_buffer_remaining(packet));
409                 return 0;
410         }
411         return 1;
412 }
413
414 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
415 /** print debug ancillary info */
416 static void p_ancil(const char* str, struct comm_reply* r)
417 {
418         if(r->srctype != 4 && r->srctype != 6) {
419                 log_info("%s: unknown srctype %d", str, r->srctype);
420                 return;
421         }
422
423         if(r->srctype == 6) {
424 #ifdef IPV6_PKTINFO
425                 char buf[1024];
426                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
427                         buf, (socklen_t)sizeof(buf)) == 0) {
428                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
429                 }
430                 buf[sizeof(buf)-1]=0;
431                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
432 #endif
433         } else if(r->srctype == 4) {
434 #ifdef IP_PKTINFO
435                 char buf1[1024], buf2[1024];
436                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
437                         buf1, (socklen_t)sizeof(buf1)) == 0) {
438                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
439                 }
440                 buf1[sizeof(buf1)-1]=0;
441 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
442                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
443                         buf2, (socklen_t)sizeof(buf2)) == 0) {
444                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
445                 }
446                 buf2[sizeof(buf2)-1]=0;
447 #else
448                 buf2[0]=0;
449 #endif
450                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
451                         buf1, buf2);
452 #elif defined(IP_RECVDSTADDR)
453                 char buf1[1024];
454                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
455                         buf1, (socklen_t)sizeof(buf1)) == 0) {
456                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
457                 }
458                 buf1[sizeof(buf1)-1]=0;
459                 log_info("%s: %s", str, buf1);
460 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
461         }
462 }
463 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
464
465 /** send a UDP reply over specified interface*/
466 static int
467 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
468         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
469 {
470 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
471         ssize_t sent;
472         struct msghdr msg;
473         struct iovec iov[1];
474         union {
475                 struct cmsghdr hdr;
476                 char buf[256];
477         } control;
478 #ifndef S_SPLINT_S
479         struct cmsghdr *cmsg;
480 #endif /* S_SPLINT_S */
481
482         log_assert(c->fd != -1);
483 #ifdef UNBOUND_DEBUG
484         if(sldns_buffer_remaining(packet) == 0)
485                 log_err("error: send empty UDP packet");
486 #endif
487         log_assert(addr && addrlen > 0);
488
489         msg.msg_name = addr;
490         msg.msg_namelen = addrlen;
491         iov[0].iov_base = sldns_buffer_begin(packet);
492         iov[0].iov_len = sldns_buffer_remaining(packet);
493         msg.msg_iov = iov;
494         msg.msg_iovlen = 1;
495         msg.msg_control = control.buf;
496 #ifndef S_SPLINT_S
497         msg.msg_controllen = sizeof(control.buf);
498 #endif /* S_SPLINT_S */
499         msg.msg_flags = 0;
500
501 #ifndef S_SPLINT_S
502         cmsg = CMSG_FIRSTHDR(&msg);
503         if(r->srctype == 4) {
504 #ifdef IP_PKTINFO
505                 void* cmsg_data;
506                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
507                 log_assert(msg.msg_controllen <= sizeof(control.buf));
508                 cmsg->cmsg_level = IPPROTO_IP;
509                 cmsg->cmsg_type = IP_PKTINFO;
510                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
511                         sizeof(struct in_pktinfo));
512                 /* unset the ifindex to not bypass the routing tables */
513                 cmsg_data = CMSG_DATA(cmsg);
514                 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
515                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
516 #elif defined(IP_SENDSRCADDR)
517                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
518                 log_assert(msg.msg_controllen <= sizeof(control.buf));
519                 cmsg->cmsg_level = IPPROTO_IP;
520                 cmsg->cmsg_type = IP_SENDSRCADDR;
521                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
522                         sizeof(struct in_addr));
523                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
524 #else
525                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
526                 msg.msg_control = NULL;
527 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
528         } else if(r->srctype == 6) {
529                 void* cmsg_data;
530                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
531                 log_assert(msg.msg_controllen <= sizeof(control.buf));
532                 cmsg->cmsg_level = IPPROTO_IPV6;
533                 cmsg->cmsg_type = IPV6_PKTINFO;
534                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
535                         sizeof(struct in6_pktinfo));
536                 /* unset the ifindex to not bypass the routing tables */
537                 cmsg_data = CMSG_DATA(cmsg);
538                 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
539                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
540         } else {
541                 /* try to pass all 0 to use default route */
542                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
543                 log_assert(msg.msg_controllen <= sizeof(control.buf));
544                 cmsg->cmsg_level = IPPROTO_IPV6;
545                 cmsg->cmsg_type = IPV6_PKTINFO;
546                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
547                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
548         }
549 #endif /* S_SPLINT_S */
550         if(verbosity >= VERB_ALGO)
551                 p_ancil("send_udp over interface", r);
552         sent = sendmsg(c->fd, &msg, 0);
553         if(sent == -1) {
554                 /* try again and block, waiting for IO to complete,
555                  * we want to send the answer, and we will wait for
556                  * the ethernet interface buffer to have space. */
557 #ifndef USE_WINSOCK
558                 if(errno == EAGAIN || 
559 #  ifdef EWOULDBLOCK
560                         errno == EWOULDBLOCK ||
561 #  endif
562                         errno == ENOBUFS) {
563 #else
564                 if(WSAGetLastError() == WSAEINPROGRESS ||
565                         WSAGetLastError() == WSAENOBUFS ||
566                         WSAGetLastError() == WSAEWOULDBLOCK) {
567 #endif
568                         int e;
569                         fd_set_block(c->fd);
570                         sent = sendmsg(c->fd, &msg, 0);
571                         e = errno;
572                         fd_set_nonblock(c->fd);
573                         errno = e;
574                 }
575         }
576         if(sent == -1) {
577                 if(!udp_send_errno_needs_log(addr, addrlen))
578                         return 0;
579                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
580                 log_addr(VERB_OPS, "remote address is", 
581                         (struct sockaddr_storage*)addr, addrlen);
582 #ifdef __NetBSD__
583                 /* netbsd 7 has IP_PKTINFO for recv but not send */
584                 if(errno == EINVAL && r->srctype == 4)
585                         log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
586                                 "Please disable interface-automatic");
587 #endif
588                 return 0;
589         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
590                 log_err("sent %d in place of %d bytes", 
591                         (int)sent, (int)sldns_buffer_remaining(packet));
592                 return 0;
593         }
594         return 1;
595 #else
596         (void)c;
597         (void)packet;
598         (void)addr;
599         (void)addrlen;
600         (void)r;
601         log_err("sendmsg: IPV6_PKTINFO not supported");
602         return 0;
603 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
604 }
605
606 /** return true is UDP receive error needs to be logged */
607 static int udp_recv_needs_log(int err)
608 {
609         switch(err) {
610         case EACCES: /* some hosts send ICMP 'Permission Denied' */
611 #ifndef USE_WINSOCK
612         case ECONNREFUSED:
613 #  ifdef ENETUNREACH
614         case ENETUNREACH:
615 #  endif
616 #  ifdef EHOSTDOWN
617         case EHOSTDOWN:
618 #  endif
619 #  ifdef EHOSTUNREACH
620         case EHOSTUNREACH:
621 #  endif
622 #  ifdef ENETDOWN
623         case ENETDOWN:
624 #  endif
625 #else /* USE_WINSOCK */
626         case WSAECONNREFUSED:
627         case WSAENETUNREACH:
628         case WSAEHOSTDOWN:
629         case WSAEHOSTUNREACH:
630         case WSAENETDOWN:
631 #endif
632                 if(verbosity >= VERB_ALGO)
633                         return 1;
634                 return 0;
635         default:
636                 break;
637         }
638         return 1;
639 }
640
641 void 
642 comm_point_udp_ancil_callback(int fd, short event, void* arg)
643 {
644 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
645         struct comm_reply rep;
646         struct msghdr msg;
647         struct iovec iov[1];
648         ssize_t rcv;
649         union {
650                 struct cmsghdr hdr;
651                 char buf[256];
652         } ancil;
653         int i;
654 #ifndef S_SPLINT_S
655         struct cmsghdr* cmsg;
656 #endif /* S_SPLINT_S */
657
658         rep.c = (struct comm_point*)arg;
659         log_assert(rep.c->type == comm_udp);
660
661         if(!(event&UB_EV_READ))
662                 return;
663         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
664         ub_comm_base_now(rep.c->ev->base);
665         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
666                 sldns_buffer_clear(rep.c->buffer);
667                 rep.addrlen = (socklen_t)sizeof(rep.addr);
668                 log_assert(fd != -1);
669                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
670                 msg.msg_name = &rep.addr;
671                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
672                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
673                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
674                 msg.msg_iov = iov;
675                 msg.msg_iovlen = 1;
676                 msg.msg_control = ancil.buf;
677 #ifndef S_SPLINT_S
678                 msg.msg_controllen = sizeof(ancil.buf);
679 #endif /* S_SPLINT_S */
680                 msg.msg_flags = 0;
681                 rcv = recvmsg(fd, &msg, 0);
682                 if(rcv == -1) {
683                         if(errno != EAGAIN && errno != EINTR
684                                 && udp_recv_needs_log(errno)) {
685                                 log_err("recvmsg failed: %s", strerror(errno));
686                         }
687                         return;
688                 }
689                 rep.addrlen = msg.msg_namelen;
690                 sldns_buffer_skip(rep.c->buffer, rcv);
691                 sldns_buffer_flip(rep.c->buffer);
692                 rep.srctype = 0;
693 #ifndef S_SPLINT_S
694                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
695                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
696                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
697                                 cmsg->cmsg_type == IPV6_PKTINFO) {
698                                 rep.srctype = 6;
699                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
700                                         sizeof(struct in6_pktinfo));
701                                 break;
702 #ifdef IP_PKTINFO
703                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
704                                 cmsg->cmsg_type == IP_PKTINFO) {
705                                 rep.srctype = 4;
706                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
707                                         sizeof(struct in_pktinfo));
708                                 break;
709 #elif defined(IP_RECVDSTADDR)
710                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
711                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
712                                 rep.srctype = 4;
713                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
714                                         sizeof(struct in_addr));
715                                 break;
716 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
717                         }
718                 }
719                 if(verbosity >= VERB_ALGO)
720                         p_ancil("receive_udp on interface", &rep);
721 #endif /* S_SPLINT_S */
722                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
723                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
724                         /* send back immediate reply */
725                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
726                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
727                 }
728                 if(!rep.c || rep.c->fd == -1) /* commpoint closed */
729                         break;
730         }
731 #else
732         (void)fd;
733         (void)event;
734         (void)arg;
735         fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. "
736                 "Please disable interface-automatic");
737 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
738 }
739
740 void 
741 comm_point_udp_callback(int fd, short event, void* arg)
742 {
743         struct comm_reply rep;
744         ssize_t rcv;
745         int i;
746         struct sldns_buffer *buffer;
747
748         rep.c = (struct comm_point*)arg;
749         log_assert(rep.c->type == comm_udp);
750
751         if(!(event&UB_EV_READ))
752                 return;
753         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
754         ub_comm_base_now(rep.c->ev->base);
755         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
756                 sldns_buffer_clear(rep.c->buffer);
757                 rep.addrlen = (socklen_t)sizeof(rep.addr);
758                 log_assert(fd != -1);
759                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
760                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
761                         sldns_buffer_remaining(rep.c->buffer), 0, 
762                         (struct sockaddr*)&rep.addr, &rep.addrlen);
763                 if(rcv == -1) {
764 #ifndef USE_WINSOCK
765                         if(errno != EAGAIN && errno != EINTR
766                                 && udp_recv_needs_log(errno))
767                                 log_err("recvfrom %d failed: %s", 
768                                         fd, strerror(errno));
769 #else
770                         if(WSAGetLastError() != WSAEINPROGRESS &&
771                                 WSAGetLastError() != WSAECONNRESET &&
772                                 WSAGetLastError()!= WSAEWOULDBLOCK &&
773                                 udp_recv_needs_log(WSAGetLastError()))
774                                 log_err("recvfrom failed: %s",
775                                         wsa_strerror(WSAGetLastError()));
776 #endif
777                         return;
778                 }
779                 sldns_buffer_skip(rep.c->buffer, rcv);
780                 sldns_buffer_flip(rep.c->buffer);
781                 rep.srctype = 0;
782                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
783                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
784                         /* send back immediate reply */
785 #ifdef USE_DNSCRYPT
786                         buffer = rep.c->dnscrypt_buffer;
787 #else
788                         buffer = rep.c->buffer;
789 #endif
790                         (void)comm_point_send_udp_msg(rep.c, buffer,
791                                 (struct sockaddr*)&rep.addr, rep.addrlen, 0);
792                 }
793                 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
794                 another UDP port. Note rep.c cannot be reused with TCP fd. */
795                         break;
796         }
797 }
798
799 int adjusted_tcp_timeout(struct comm_point* c)
800 {
801         if(c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM)
802                 return TCP_QUERY_TIMEOUT_MINIMUM;
803         return c->tcp_timeout_msec;
804 }
805
806 /** Use a new tcp handler for new query fd, set to read query */
807 static void
808 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 
809 {
810         int handler_usage;
811         log_assert(c->type == comm_tcp || c->type == comm_http);
812         log_assert(c->fd == -1);
813         sldns_buffer_clear(c->buffer);
814 #ifdef USE_DNSCRYPT
815         if (c->dnscrypt)
816                 sldns_buffer_clear(c->dnscrypt_buffer);
817 #endif
818         c->tcp_is_reading = 1;
819         c->tcp_byte_count = 0;
820         /* if more than half the tcp handlers are in use, use a shorter
821          * timeout for this TCP connection, we need to make space for
822          * other connections to be able to get attention */
823         /* If > 50% TCP handler structures in use, set timeout to 1/100th
824          *      configured value.
825          * If > 65%TCP handler structures in use, set to 1/500th configured
826          *      value.
827          * If > 80% TCP handler structures in use, set to 0.
828          *
829          * If the timeout to use falls below 200 milliseconds, an actual
830          * timeout of 200ms is used.
831          */
832         handler_usage = (cur * 100) / max;
833         if(handler_usage > 50 && handler_usage <= 65)
834                 c->tcp_timeout_msec /= 100;
835         else if (handler_usage > 65 && handler_usage <= 80)
836                 c->tcp_timeout_msec /= 500;
837         else if (handler_usage > 80)
838                 c->tcp_timeout_msec = 0;
839         comm_point_start_listening(c, fd, adjusted_tcp_timeout(c));
840 }
841
842 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
843         short ATTR_UNUSED(event), void* arg)
844 {
845         struct comm_base* b = (struct comm_base*)arg;
846         /* timeout for the slow accept, re-enable accepts again */
847         if(b->start_accept) {
848                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
849                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
850                 (*b->start_accept)(b->cb_arg);
851                 b->eb->slow_accept_enabled = 0;
852         }
853 }
854
855 int comm_point_perform_accept(struct comm_point* c,
856         struct sockaddr_storage* addr, socklen_t* addrlen)
857 {
858         int new_fd;
859         *addrlen = (socklen_t)sizeof(*addr);
860 #ifndef HAVE_ACCEPT4
861         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
862 #else
863         /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
864         new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
865 #endif
866         if(new_fd == -1) {
867 #ifndef USE_WINSOCK
868                 /* EINTR is signal interrupt. others are closed connection. */
869                 if(     errno == EINTR || errno == EAGAIN
870 #ifdef EWOULDBLOCK
871                         || errno == EWOULDBLOCK 
872 #endif
873 #ifdef ECONNABORTED
874                         || errno == ECONNABORTED 
875 #endif
876 #ifdef EPROTO
877                         || errno == EPROTO
878 #endif /* EPROTO */
879                         )
880                         return -1;
881 #if defined(ENFILE) && defined(EMFILE)
882                 if(errno == ENFILE || errno == EMFILE) {
883                         /* out of file descriptors, likely outside of our
884                          * control. stop accept() calls for some time */
885                         if(c->ev->base->stop_accept) {
886                                 struct comm_base* b = c->ev->base;
887                                 struct timeval tv;
888                                 verbose(VERB_ALGO, "out of file descriptors: "
889                                         "slow accept");
890                                 b->eb->slow_accept_enabled = 1;
891                                 fptr_ok(fptr_whitelist_stop_accept(
892                                         b->stop_accept));
893                                 (*b->stop_accept)(b->cb_arg);
894                                 /* set timeout, no mallocs */
895                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
896                                 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
897                                 b->eb->slow_accept = ub_event_new(b->eb->base,
898                                         -1, UB_EV_TIMEOUT,
899                                         comm_base_handle_slow_accept, b);
900                                 if(b->eb->slow_accept == NULL) {
901                                         /* we do not want to log here, because
902                                          * that would spam the logfiles.
903                                          * error: "event_base_set failed." */
904                                 }
905                                 else if(ub_event_add(b->eb->slow_accept, &tv)
906                                         != 0) {
907                                         /* we do not want to log here,
908                                          * error: "event_add failed." */
909                                 }
910                         }
911                         return -1;
912                 }
913 #endif
914 #else /* USE_WINSOCK */
915                 if(WSAGetLastError() == WSAEINPROGRESS ||
916                         WSAGetLastError() == WSAECONNRESET)
917                         return -1;
918                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
919                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
920                         return -1;
921                 }
922 #endif
923                 log_err_addr("accept failed", sock_strerror(errno), addr,
924                         *addrlen);
925                 return -1;
926         }
927         if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
928                 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
929                 if(!tcl_new_connection(c->tcl_addr)) {
930                         if(verbosity >= 3)
931                                 log_err_addr("accept rejected",
932                                 "connection limit exceeded", addr, *addrlen);
933                         close(new_fd);
934                         return -1;
935                 }
936         }
937 #ifndef HAVE_ACCEPT4
938         fd_set_nonblock(new_fd);
939 #endif
940         return new_fd;
941 }
942
943 #ifdef USE_WINSOCK
944 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
945         int ATTR_UNUSED(argi), long argl, long retvalue)
946 {
947         int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
948         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
949                 (oper&BIO_CB_RETURN)?"return":"before",
950                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
951                 wsa_err==WSAEWOULDBLOCK?"wsawb":"");
952         /* on windows, check if previous operation caused EWOULDBLOCK */
953         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
954                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
955                 if(wsa_err == WSAEWOULDBLOCK)
956                         ub_winsock_tcp_wouldblock((struct ub_event*)
957                                 BIO_get_callback_arg(b), UB_EV_READ);
958         }
959         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
960                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
961                 if(wsa_err == WSAEWOULDBLOCK)
962                         ub_winsock_tcp_wouldblock((struct ub_event*)
963                                 BIO_get_callback_arg(b), UB_EV_WRITE);
964         }
965         /* return original return value */
966         return retvalue;
967 }
968
969 /** set win bio callbacks for nonblocking operations */
970 void
971 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
972 {
973         SSL* ssl = (SSL*)thessl;
974         /* set them both just in case, but usually they are the same BIO */
975         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
976         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
977         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
978         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
979 }
980 #endif
981
982 #ifdef HAVE_NGHTTP2
983 /** Create http2 session server.  Per connection, after TCP accepted.*/
984 static int http2_session_server_create(struct http2_session* h2_session)
985 {
986         log_assert(h2_session->callbacks);
987         h2_session->is_drop = 0;
988         if(nghttp2_session_server_new(&h2_session->session,
989                         h2_session->callbacks,
990                 h2_session) == NGHTTP2_ERR_NOMEM) {
991                 log_err("failed to create nghttp2 session server");
992                 return 0;
993         }
994
995         return 1;
996 }
997
998 /** Submit http2 setting to session. Once per session. */
999 static int http2_submit_settings(struct http2_session* h2_session)
1000 {
1001         int ret;
1002         nghttp2_settings_entry settings[1] = {
1003                 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS,
1004                  h2_session->c->http2_max_streams}};
1005
1006         ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE,
1007                 settings, 1);
1008         if(ret) {
1009                 verbose(VERB_QUERY, "http2: submit_settings failed, "
1010                         "error: %s", nghttp2_strerror(ret));
1011                 return 0;
1012         }
1013         return 1;
1014 }
1015 #endif /* HAVE_NGHTTP2 */
1016
1017
1018 void 
1019 comm_point_tcp_accept_callback(int fd, short event, void* arg)
1020 {
1021         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
1022         int new_fd;
1023         log_assert(c->type == comm_tcp_accept);
1024         if(!(event & UB_EV_READ)) {
1025                 log_info("ignoring tcp accept event %d", (int)event);
1026                 return;
1027         }
1028         ub_comm_base_now(c->ev->base);
1029         /* find free tcp handler. */
1030         if(!c->tcp_free) {
1031                 log_warn("accepted too many tcp, connections full");
1032                 return;
1033         }
1034         /* accept incoming connection. */
1035         c_hdl = c->tcp_free;
1036         /* clear leftover flags from previous use, and then set the
1037          * correct event base for the event structure for libevent */
1038         ub_event_free(c_hdl->ev->ev);
1039         if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) ||
1040                 c_hdl->type == comm_local || c_hdl->type == comm_raw)
1041                 c_hdl->tcp_do_toggle_rw = 0;
1042         else    c_hdl->tcp_do_toggle_rw = 1;
1043
1044         if(c_hdl->type == comm_http) {
1045 #ifdef HAVE_NGHTTP2
1046                 if(!c_hdl->h2_session ||
1047                         !http2_session_server_create(c_hdl->h2_session)) {
1048                         log_warn("failed to create nghttp2");
1049                         return;
1050                 }
1051                 if(!c_hdl->h2_session ||
1052                         !http2_submit_settings(c_hdl->h2_session)) {
1053                         log_warn("failed to submit http2 settings");
1054                         return;
1055                 }
1056                 if(!c->ssl) {
1057                         c_hdl->tcp_do_toggle_rw = 0;
1058                         c_hdl->use_h2 = 1;
1059                 }
1060 #endif
1061                 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
1062                         UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
1063                         comm_point_http_handle_callback, c_hdl);
1064         } else {
1065                 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
1066                         UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
1067                         comm_point_tcp_handle_callback, c_hdl);
1068         }
1069         if(!c_hdl->ev->ev) {
1070                 log_warn("could not ub_event_new, dropped tcp");
1071                 return;
1072         }
1073         log_assert(fd != -1);
1074         (void)fd;
1075         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
1076                 &c_hdl->repinfo.addrlen);
1077         if(new_fd == -1)
1078                 return;
1079         if(c->ssl) {
1080                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
1081                 if(!c_hdl->ssl) {
1082                         c_hdl->fd = new_fd;
1083                         comm_point_close(c_hdl);
1084                         return;
1085                 }
1086                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
1087 #ifdef USE_WINSOCK
1088                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
1089 #endif
1090         }
1091
1092         /* grab the tcp handler buffers */
1093         c->cur_tcp_count++;
1094         c->tcp_free = c_hdl->tcp_free;
1095         if(!c->tcp_free) {
1096                 /* stop accepting incoming queries for now. */
1097                 comm_point_stop_listening(c);
1098         }
1099         setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
1100 }
1101
1102 /** Make tcp handler free for next assignment */
1103 static void
1104 reclaim_tcp_handler(struct comm_point* c)
1105 {
1106         log_assert(c->type == comm_tcp);
1107         if(c->ssl) {
1108 #ifdef HAVE_SSL
1109                 SSL_shutdown(c->ssl);
1110                 SSL_free(c->ssl);
1111                 c->ssl = NULL;
1112 #endif
1113         }
1114         comm_point_close(c);
1115         if(c->tcp_parent) {
1116                 c->tcp_parent->cur_tcp_count--;
1117                 c->tcp_free = c->tcp_parent->tcp_free;
1118                 c->tcp_parent->tcp_free = c;
1119                 if(!c->tcp_free) {
1120                         /* re-enable listening on accept socket */
1121                         comm_point_start_listening(c->tcp_parent, -1, -1);
1122                 }
1123         }
1124         c->tcp_more_read_again = NULL;
1125         c->tcp_more_write_again = NULL;
1126 }
1127
1128 /** do the callback when writing is done */
1129 static void
1130 tcp_callback_writer(struct comm_point* c)
1131 {
1132         log_assert(c->type == comm_tcp);
1133         if(!c->tcp_write_and_read) {
1134                 sldns_buffer_clear(c->buffer);
1135                 c->tcp_byte_count = 0;
1136         }
1137         if(c->tcp_do_toggle_rw)
1138                 c->tcp_is_reading = 1;
1139         /* switch from listening(write) to listening(read) */
1140         if(c->tcp_req_info) {
1141                 tcp_req_info_handle_writedone(c->tcp_req_info);
1142         } else {
1143                 comm_point_stop_listening(c);
1144                 if(c->tcp_write_and_read) {
1145                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1146                         if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN,
1147                                 &c->repinfo) ) {
1148                                 comm_point_start_listening(c, -1,
1149                                         adjusted_tcp_timeout(c));
1150                         }
1151                 } else {
1152                         comm_point_start_listening(c, -1,
1153                                         adjusted_tcp_timeout(c));
1154                 }
1155         }
1156 }
1157
1158 /** do the callback when reading is done */
1159 static void
1160 tcp_callback_reader(struct comm_point* c)
1161 {
1162         log_assert(c->type == comm_tcp || c->type == comm_local);
1163         sldns_buffer_flip(c->buffer);
1164         if(c->tcp_do_toggle_rw)
1165                 c->tcp_is_reading = 0;
1166         c->tcp_byte_count = 0;
1167         if(c->tcp_req_info) {
1168                 tcp_req_info_handle_readdone(c->tcp_req_info);
1169         } else {
1170                 if(c->type == comm_tcp)
1171                         comm_point_stop_listening(c);
1172                 fptr_ok(fptr_whitelist_comm_point(c->callback));
1173                 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1174                         comm_point_start_listening(c, -1,
1175                                         adjusted_tcp_timeout(c));
1176                 }
1177         }
1178 }
1179
1180 #ifdef HAVE_SSL
1181 /** true if the ssl handshake error has to be squelched from the logs */
1182 int
1183 squelch_err_ssl_handshake(unsigned long err)
1184 {
1185         if(verbosity >= VERB_QUERY)
1186                 return 0; /* only squelch on low verbosity */
1187         /* this is very specific, we could filter on ERR_GET_REASON()
1188          * (the third element in ERR_PACK) */
1189         if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) ||
1190                 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) ||
1191                 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) ||
1192                 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE)
1193 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
1194                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER)
1195 #endif
1196 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
1197                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL)
1198                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL)
1199 #  ifdef SSL_R_VERSION_TOO_LOW
1200                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW)
1201 #  endif
1202 #endif
1203                 )
1204                 return 1;
1205         return 0;
1206 }
1207 #endif /* HAVE_SSL */
1208
1209 /** continue ssl handshake */
1210 #ifdef HAVE_SSL
1211 static int
1212 ssl_handshake(struct comm_point* c)
1213 {
1214         int r;
1215         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
1216                 /* read condition satisfied back to writing */
1217                 comm_point_listen_for_rw(c, 0, 1);
1218                 c->ssl_shake_state = comm_ssl_shake_none;
1219                 return 1;
1220         }
1221         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
1222                 /* write condition satisfied, back to reading */
1223                 comm_point_listen_for_rw(c, 1, 0);
1224                 c->ssl_shake_state = comm_ssl_shake_none;
1225                 return 1;
1226         }
1227
1228         ERR_clear_error();
1229         r = SSL_do_handshake(c->ssl);
1230         if(r != 1) {
1231                 int want = SSL_get_error(c->ssl, r);
1232                 if(want == SSL_ERROR_WANT_READ) {
1233                         if(c->ssl_shake_state == comm_ssl_shake_read)
1234                                 return 1;
1235                         c->ssl_shake_state = comm_ssl_shake_read;
1236                         comm_point_listen_for_rw(c, 1, 0);
1237                         return 1;
1238                 } else if(want == SSL_ERROR_WANT_WRITE) {
1239                         if(c->ssl_shake_state == comm_ssl_shake_write)
1240                                 return 1;
1241                         c->ssl_shake_state = comm_ssl_shake_write;
1242                         comm_point_listen_for_rw(c, 0, 1);
1243                         return 1;
1244                 } else if(r == 0) {
1245                         return 0; /* closed */
1246                 } else if(want == SSL_ERROR_SYSCALL) {
1247                         /* SYSCALL and errno==0 means closed uncleanly */
1248 #ifdef EPIPE
1249                         if(errno == EPIPE && verbosity < 2)
1250                                 return 0; /* silence 'broken pipe' */
1251 #endif
1252 #ifdef ECONNRESET
1253                         if(errno == ECONNRESET && verbosity < 2)
1254                                 return 0; /* silence reset by peer */
1255 #endif
1256                         if(errno != 0)
1257                                 log_err("SSL_handshake syscall: %s",
1258                                         strerror(errno));
1259                         return 0;
1260                 } else {
1261                         unsigned long err = ERR_get_error();
1262                         if(!squelch_err_ssl_handshake(err)) {
1263                                 log_crypto_err_code("ssl handshake failed", err);
1264                                 log_addr(VERB_OPS, "ssl handshake failed", &c->repinfo.addr,
1265                                         c->repinfo.addrlen);
1266                         }
1267                         return 0;
1268                 }
1269         }
1270         /* this is where peer verification could take place */
1271         if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
1272                 /* verification */
1273                 if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
1274 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
1275                         X509* x = SSL_get1_peer_certificate(c->ssl);
1276 #else
1277                         X509* x = SSL_get_peer_certificate(c->ssl);
1278 #endif
1279                         if(!x) {
1280                                 log_addr(VERB_ALGO, "SSL connection failed: "
1281                                         "no certificate",
1282                                         &c->repinfo.addr, c->repinfo.addrlen);
1283                                 return 0;
1284                         }
1285                         log_cert(VERB_ALGO, "peer certificate", x);
1286 #ifdef HAVE_SSL_GET0_PEERNAME
1287                         if(SSL_get0_peername(c->ssl)) {
1288                                 char buf[255];
1289                                 snprintf(buf, sizeof(buf), "SSL connection "
1290                                         "to %s authenticated",
1291                                         SSL_get0_peername(c->ssl));
1292                                 log_addr(VERB_ALGO, buf, &c->repinfo.addr,
1293                                         c->repinfo.addrlen);
1294                         } else {
1295 #endif
1296                                 log_addr(VERB_ALGO, "SSL connection "
1297                                         "authenticated", &c->repinfo.addr,
1298                                         c->repinfo.addrlen);
1299 #ifdef HAVE_SSL_GET0_PEERNAME
1300                         }
1301 #endif
1302                         X509_free(x);
1303                 } else {
1304 #ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
1305                         X509* x = SSL_get1_peer_certificate(c->ssl);
1306 #else
1307                         X509* x = SSL_get_peer_certificate(c->ssl);
1308 #endif
1309                         if(x) {
1310                                 log_cert(VERB_ALGO, "peer certificate", x);
1311                                 X509_free(x);
1312                         }
1313                         log_addr(VERB_ALGO, "SSL connection failed: "
1314                                 "failed to authenticate",
1315                                 &c->repinfo.addr, c->repinfo.addrlen);
1316                         return 0;
1317                 }
1318         } else {
1319                 /* unauthenticated, the verify peer flag was not set
1320                  * in c->ssl when the ssl object was created from ssl_ctx */
1321                 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr,
1322                         c->repinfo.addrlen);
1323         }
1324
1325 #ifdef HAVE_SSL_GET0_ALPN_SELECTED
1326         /* check if http2 use is negotiated */
1327         if(c->type == comm_http && c->h2_session) {
1328                 const unsigned char *alpn;
1329                 unsigned int alpnlen = 0;
1330                 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen);
1331                 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) {
1332                         /* connection upgraded to HTTP2 */
1333                         c->tcp_do_toggle_rw = 0;
1334                         c->use_h2 = 1;
1335                 }
1336         }
1337 #endif
1338
1339         /* setup listen rw correctly */
1340         if(c->tcp_is_reading) {
1341                 if(c->ssl_shake_state != comm_ssl_shake_read)
1342                         comm_point_listen_for_rw(c, 1, 0);
1343         } else {
1344                 comm_point_listen_for_rw(c, 0, 1);
1345         }
1346         c->ssl_shake_state = comm_ssl_shake_none;
1347         return 1;
1348 }
1349 #endif /* HAVE_SSL */
1350
1351 /** ssl read callback on TCP */
1352 static int
1353 ssl_handle_read(struct comm_point* c)
1354 {
1355 #ifdef HAVE_SSL
1356         int r;
1357         if(c->ssl_shake_state != comm_ssl_shake_none) {
1358                 if(!ssl_handshake(c))
1359                         return 0;
1360                 if(c->ssl_shake_state != comm_ssl_shake_none)
1361                         return 1;
1362         }
1363         if(c->tcp_byte_count < sizeof(uint16_t)) {
1364                 /* read length bytes */
1365                 ERR_clear_error();
1366                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1367                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1368                         c->tcp_byte_count))) <= 0) {
1369                         int want = SSL_get_error(c->ssl, r);
1370                         if(want == SSL_ERROR_ZERO_RETURN) {
1371                                 if(c->tcp_req_info)
1372                                         return tcp_req_info_handle_read_close(c->tcp_req_info);
1373                                 return 0; /* shutdown, closed */
1374                         } else if(want == SSL_ERROR_WANT_READ) {
1375 #ifdef USE_WINSOCK
1376                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1377 #endif
1378                                 return 1; /* read more later */
1379                         } else if(want == SSL_ERROR_WANT_WRITE) {
1380                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1381                                 comm_point_listen_for_rw(c, 0, 1);
1382                                 return 1;
1383                         } else if(want == SSL_ERROR_SYSCALL) {
1384 #ifdef ECONNRESET
1385                                 if(errno == ECONNRESET && verbosity < 2)
1386                                         return 0; /* silence reset by peer */
1387 #endif
1388                                 if(errno != 0)
1389                                         log_err("SSL_read syscall: %s",
1390                                                 strerror(errno));
1391                                 return 0;
1392                         }
1393                         log_crypto_err("could not SSL_read");
1394                         return 0;
1395                 }
1396                 c->tcp_byte_count += r;
1397                 if(c->tcp_byte_count < sizeof(uint16_t))
1398                         return 1;
1399                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1400                         sldns_buffer_capacity(c->buffer)) {
1401                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1402                         return 0;
1403                 }
1404                 sldns_buffer_set_limit(c->buffer,
1405                         sldns_buffer_read_u16_at(c->buffer, 0));
1406                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1407                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1408                         return 0;
1409                 }
1410                 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
1411                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1412                         (int)sldns_buffer_limit(c->buffer));
1413         }
1414         if(sldns_buffer_remaining(c->buffer) > 0) {
1415                 ERR_clear_error();
1416                 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1417                         (int)sldns_buffer_remaining(c->buffer));
1418                 if(r <= 0) {
1419                         int want = SSL_get_error(c->ssl, r);
1420                         if(want == SSL_ERROR_ZERO_RETURN) {
1421                                 if(c->tcp_req_info)
1422                                         return tcp_req_info_handle_read_close(c->tcp_req_info);
1423                                 return 0; /* shutdown, closed */
1424                         } else if(want == SSL_ERROR_WANT_READ) {
1425 #ifdef USE_WINSOCK
1426                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1427 #endif
1428                                 return 1; /* read more later */
1429                         } else if(want == SSL_ERROR_WANT_WRITE) {
1430                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1431                                 comm_point_listen_for_rw(c, 0, 1);
1432                                 return 1;
1433                         } else if(want == SSL_ERROR_SYSCALL) {
1434 #ifdef ECONNRESET
1435                                 if(errno == ECONNRESET && verbosity < 2)
1436                                         return 0; /* silence reset by peer */
1437 #endif
1438                                 if(errno != 0)
1439                                         log_err("SSL_read syscall: %s",
1440                                                 strerror(errno));
1441                                 return 0;
1442                         }
1443                         log_crypto_err("could not SSL_read");
1444                         return 0;
1445                 }
1446                 sldns_buffer_skip(c->buffer, (ssize_t)r);
1447         }
1448         if(sldns_buffer_remaining(c->buffer) <= 0) {
1449                 tcp_callback_reader(c);
1450         }
1451         return 1;
1452 #else
1453         (void)c;
1454         return 0;
1455 #endif /* HAVE_SSL */
1456 }
1457
1458 /** ssl write callback on TCP */
1459 static int
1460 ssl_handle_write(struct comm_point* c)
1461 {
1462 #ifdef HAVE_SSL
1463         int r;
1464         if(c->ssl_shake_state != comm_ssl_shake_none) {
1465                 if(!ssl_handshake(c))
1466                         return 0;
1467                 if(c->ssl_shake_state != comm_ssl_shake_none)
1468                         return 1;
1469         }
1470         /* ignore return, if fails we may simply block */
1471         (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE);
1472         if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
1473                 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer));
1474                 ERR_clear_error();
1475                 if(c->tcp_write_and_read) {
1476                         if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) {
1477                                 /* combine the tcp length and the query for
1478                                  * write, this emulates writev */
1479                                 uint8_t buf[LDNS_RR_BUF_SIZE];
1480                                 memmove(buf, &len, sizeof(uint16_t));
1481                                 memmove(buf+sizeof(uint16_t),
1482                                         c->tcp_write_pkt,
1483                                         c->tcp_write_pkt_len);
1484                                 r = SSL_write(c->ssl,
1485                                         (void*)(buf+c->tcp_write_byte_count),
1486                                         c->tcp_write_pkt_len + 2 -
1487                                         c->tcp_write_byte_count);
1488                         } else {
1489                                 r = SSL_write(c->ssl,
1490                                         (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
1491                                         (int)(sizeof(uint16_t)-c->tcp_write_byte_count));
1492                         }
1493                 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
1494                         LDNS_RR_BUF_SIZE) {
1495                         /* combine the tcp length and the query for write,
1496                          * this emulates writev */
1497                         uint8_t buf[LDNS_RR_BUF_SIZE];
1498                         memmove(buf, &len, sizeof(uint16_t));
1499                         memmove(buf+sizeof(uint16_t),
1500                                 sldns_buffer_current(c->buffer),
1501                                 sldns_buffer_remaining(c->buffer));
1502                         r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
1503                                 (int)(sizeof(uint16_t)+
1504                                 sldns_buffer_remaining(c->buffer)
1505                                 - c->tcp_byte_count));
1506                 } else {
1507                         r = SSL_write(c->ssl,
1508                                 (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1509                                 (int)(sizeof(uint16_t)-c->tcp_byte_count));
1510                 }
1511                 if(r <= 0) {
1512                         int want = SSL_get_error(c->ssl, r);
1513                         if(want == SSL_ERROR_ZERO_RETURN) {
1514                                 return 0; /* closed */
1515                         } else if(want == SSL_ERROR_WANT_READ) {
1516                                 c->ssl_shake_state = comm_ssl_shake_hs_read;
1517                                 comm_point_listen_for_rw(c, 1, 0);
1518                                 return 1; /* wait for read condition */
1519                         } else if(want == SSL_ERROR_WANT_WRITE) {
1520 #ifdef USE_WINSOCK
1521                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1522 #endif
1523                                 return 1; /* write more later */
1524                         } else if(want == SSL_ERROR_SYSCALL) {
1525 #ifdef EPIPE
1526                                 if(errno == EPIPE && verbosity < 2)
1527                                         return 0; /* silence 'broken pipe' */
1528 #endif
1529                                 if(errno != 0)
1530                                         log_err("SSL_write syscall: %s",
1531                                                 strerror(errno));
1532                                 return 0;
1533                         }
1534                         log_crypto_err("could not SSL_write");
1535                         return 0;
1536                 }
1537                 if(c->tcp_write_and_read) {
1538                         c->tcp_write_byte_count += r;
1539                         if(c->tcp_write_byte_count < sizeof(uint16_t))
1540                                 return 1;
1541                 } else {
1542                         c->tcp_byte_count += r;
1543                         if(c->tcp_byte_count < sizeof(uint16_t))
1544                                 return 1;
1545                         sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1546                                 sizeof(uint16_t));
1547                 }
1548                 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
1549                         tcp_callback_writer(c);
1550                         return 1;
1551                 }
1552         }
1553         log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0);
1554         log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
1555         ERR_clear_error();
1556         if(c->tcp_write_and_read) {
1557                 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
1558                         (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count));
1559         } else {
1560                 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1561                         (int)sldns_buffer_remaining(c->buffer));
1562         }
1563         if(r <= 0) {
1564                 int want = SSL_get_error(c->ssl, r);
1565                 if(want == SSL_ERROR_ZERO_RETURN) {
1566                         return 0; /* closed */
1567                 } else if(want == SSL_ERROR_WANT_READ) {
1568                         c->ssl_shake_state = comm_ssl_shake_hs_read;
1569                         comm_point_listen_for_rw(c, 1, 0);
1570                         return 1; /* wait for read condition */
1571                 } else if(want == SSL_ERROR_WANT_WRITE) {
1572 #ifdef USE_WINSOCK
1573                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1574 #endif
1575                         return 1; /* write more later */
1576                 } else if(want == SSL_ERROR_SYSCALL) {
1577 #ifdef EPIPE
1578                         if(errno == EPIPE && verbosity < 2)
1579                                 return 0; /* silence 'broken pipe' */
1580 #endif
1581                         if(errno != 0)
1582                                 log_err("SSL_write syscall: %s",
1583                                         strerror(errno));
1584                         return 0;
1585                 }
1586                 log_crypto_err("could not SSL_write");
1587                 return 0;
1588         }
1589         if(c->tcp_write_and_read) {
1590                 c->tcp_write_byte_count += r;
1591         } else {
1592                 sldns_buffer_skip(c->buffer, (ssize_t)r);
1593         }
1594
1595         if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
1596                 tcp_callback_writer(c);
1597         }
1598         return 1;
1599 #else
1600         (void)c;
1601         return 0;
1602 #endif /* HAVE_SSL */
1603 }
1604
1605 /** handle ssl tcp connection with dns contents */
1606 static int
1607 ssl_handle_it(struct comm_point* c, int is_write)
1608 {
1609         /* handle case where renegotiation wants read during write call
1610          * or write during read calls */
1611         if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write)
1612                 return ssl_handle_read(c);
1613         else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read)
1614                 return ssl_handle_write(c);
1615         /* handle read events for read operation and write events for a
1616          * write operation */
1617         else if(!is_write)
1618                 return ssl_handle_read(c);
1619         return ssl_handle_write(c);
1620 }
1621
1622 /** Handle tcp reading callback. 
1623  * @param fd: file descriptor of socket.
1624  * @param c: comm point to read from into buffer.
1625  * @param short_ok: if true, very short packets are OK (for comm_local).
1626  * @return: 0 on error 
1627  */
1628 static int
1629 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1630 {
1631         ssize_t r;
1632         log_assert(c->type == comm_tcp || c->type == comm_local);
1633         if(c->ssl)
1634                 return ssl_handle_it(c, 0);
1635         if(!c->tcp_is_reading && !c->tcp_write_and_read)
1636                 return 0;
1637
1638         log_assert(fd != -1);
1639         if(c->tcp_byte_count < sizeof(uint16_t)) {
1640                 /* read length bytes */
1641                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1642                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1643                 if(r == 0) {
1644                         if(c->tcp_req_info)
1645                                 return tcp_req_info_handle_read_close(c->tcp_req_info);
1646                         return 0;
1647                 } else if(r == -1) {
1648 #ifndef USE_WINSOCK
1649                         if(errno == EINTR || errno == EAGAIN)
1650                                 return 1;
1651 #ifdef ECONNRESET
1652                         if(errno == ECONNRESET && verbosity < 2)
1653                                 return 0; /* silence reset by peer */
1654 #endif
1655 #ifdef ECONNREFUSED
1656                         if(errno == ECONNREFUSED && verbosity < 2)
1657                                 return 0; /* silence reset by peer */
1658 #endif
1659 #ifdef ENETUNREACH
1660                         if(errno == ENETUNREACH && verbosity < 2)
1661                                 return 0; /* silence it */
1662 #endif
1663 #ifdef EHOSTDOWN
1664                         if(errno == EHOSTDOWN && verbosity < 2)
1665                                 return 0; /* silence it */
1666 #endif
1667 #ifdef EHOSTUNREACH
1668                         if(errno == EHOSTUNREACH && verbosity < 2)
1669                                 return 0; /* silence it */
1670 #endif
1671 #ifdef ENETDOWN
1672                         if(errno == ENETDOWN && verbosity < 2)
1673                                 return 0; /* silence it */
1674 #endif
1675 #ifdef EACCES
1676                         if(errno == EACCES && verbosity < 2)
1677                                 return 0; /* silence it */
1678 #endif
1679 #ifdef ENOTCONN
1680                         if(errno == ENOTCONN) {
1681                                 log_err_addr("read (in tcp s) failed and this could be because TCP Fast Open is enabled [--disable-tfo-client --disable-tfo-server] but does not work", sock_strerror(errno),
1682                                         &c->repinfo.addr, c->repinfo.addrlen);
1683                                 return 0;
1684                         }
1685 #endif
1686 #else /* USE_WINSOCK */
1687                         if(WSAGetLastError() == WSAECONNREFUSED && verbosity < 2)
1688                                 return 0;
1689                         if(WSAGetLastError() == WSAEHOSTDOWN && verbosity < 2)
1690                                 return 0;
1691                         if(WSAGetLastError() == WSAEHOSTUNREACH && verbosity < 2)
1692                                 return 0;
1693                         if(WSAGetLastError() == WSAENETDOWN && verbosity < 2)
1694                                 return 0;
1695                         if(WSAGetLastError() == WSAENETUNREACH && verbosity < 2)
1696                                 return 0;
1697                         if(WSAGetLastError() == WSAECONNRESET)
1698                                 return 0;
1699                         if(WSAGetLastError() == WSAEINPROGRESS)
1700                                 return 1;
1701                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1702                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1703                                         UB_EV_READ);
1704                                 return 1;
1705                         }
1706 #endif
1707                         log_err_addr("read (in tcp s)", sock_strerror(errno),
1708                                 &c->repinfo.addr, c->repinfo.addrlen);
1709                         return 0;
1710                 } 
1711                 c->tcp_byte_count += r;
1712                 if(c->tcp_byte_count != sizeof(uint16_t))
1713                         return 1;
1714                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1715                         sldns_buffer_capacity(c->buffer)) {
1716                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1717                         return 0;
1718                 }
1719                 sldns_buffer_set_limit(c->buffer, 
1720                         sldns_buffer_read_u16_at(c->buffer, 0));
1721                 if(!short_ok && 
1722                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1723                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1724                         return 0;
1725                 }
1726                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1727                         (int)sldns_buffer_limit(c->buffer));
1728         }
1729
1730         if(sldns_buffer_remaining(c->buffer) == 0)
1731                 log_err("in comm_point_tcp_handle_read buffer_remaining is not > 0 as expected, continuing with (harmless) 0 length recv");
1732         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1733                 sldns_buffer_remaining(c->buffer), 0);
1734         if(r == 0) {
1735                 if(c->tcp_req_info)
1736                         return tcp_req_info_handle_read_close(c->tcp_req_info);
1737                 return 0;
1738         } else if(r == -1) {
1739 #ifndef USE_WINSOCK
1740                 if(errno == EINTR || errno == EAGAIN)
1741                         return 1;
1742 #else /* USE_WINSOCK */
1743                 if(WSAGetLastError() == WSAECONNRESET)
1744                         return 0;
1745                 if(WSAGetLastError() == WSAEINPROGRESS)
1746                         return 1;
1747                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1748                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1749                         return 1;
1750                 }
1751 #endif
1752                 log_err_addr("read (in tcp r)", sock_strerror(errno),
1753                         &c->repinfo.addr, c->repinfo.addrlen);
1754                 return 0;
1755         }
1756         sldns_buffer_skip(c->buffer, r);
1757         if(sldns_buffer_remaining(c->buffer) <= 0) {
1758                 tcp_callback_reader(c);
1759         }
1760         return 1;
1761 }
1762
1763 /** 
1764  * Handle tcp writing callback. 
1765  * @param fd: file descriptor of socket.
1766  * @param c: comm point to write buffer out of.
1767  * @return: 0 on error
1768  */
1769 static int
1770 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1771 {
1772         ssize_t r;
1773         struct sldns_buffer *buffer;
1774         log_assert(c->type == comm_tcp);
1775 #ifdef USE_DNSCRYPT
1776         buffer = c->dnscrypt_buffer;
1777 #else
1778         buffer = c->buffer;
1779 #endif
1780         if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read)
1781                 return 0;
1782         log_assert(fd != -1);
1783         if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) {
1784                 /* check for pending error from nonblocking connect */
1785                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1786                 int error = 0;
1787                 socklen_t len = (socklen_t)sizeof(error);
1788                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1789                         &len) < 0){
1790 #ifndef USE_WINSOCK
1791                         error = errno; /* on solaris errno is error */
1792 #else /* USE_WINSOCK */
1793                         error = WSAGetLastError();
1794 #endif
1795                 }
1796 #ifndef USE_WINSOCK
1797 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1798                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1799                         return 1; /* try again later */
1800                 else
1801 #endif
1802                 if(error != 0 && verbosity < 2)
1803                         return 0; /* silence lots of chatter in the logs */
1804                 else if(error != 0) {
1805                         log_err_addr("tcp connect", strerror(error),
1806                                 &c->repinfo.addr, c->repinfo.addrlen);
1807 #else /* USE_WINSOCK */
1808                 /* examine error */
1809                 if(error == WSAEINPROGRESS)
1810                         return 1;
1811                 else if(error == WSAEWOULDBLOCK) {
1812                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1813                         return 1;
1814                 } else if(error != 0 && verbosity < 2)
1815                         return 0;
1816                 else if(error != 0) {
1817                         log_err_addr("tcp connect", wsa_strerror(error),
1818                                 &c->repinfo.addr, c->repinfo.addrlen);
1819 #endif /* USE_WINSOCK */
1820                         return 0;
1821                 }
1822         }
1823         if(c->ssl)
1824                 return ssl_handle_it(c, 1);
1825
1826 #ifdef USE_MSG_FASTOPEN
1827         /* Only try this on first use of a connection that uses tfo, 
1828            otherwise fall through to normal write */
1829         /* Also, TFO support on WINDOWS not implemented at the moment */
1830         if(c->tcp_do_fastopen == 1) {
1831                 /* this form of sendmsg() does both a connect() and send() so need to
1832                    look for various flavours of error*/
1833                 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
1834                 struct msghdr msg;
1835                 struct iovec iov[2];
1836                 c->tcp_do_fastopen = 0;
1837                 memset(&msg, 0, sizeof(msg));
1838                 if(c->tcp_write_and_read) {
1839                         iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
1840                         iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
1841                         iov[1].iov_base = c->tcp_write_pkt;
1842                         iov[1].iov_len = c->tcp_write_pkt_len;
1843                 } else {
1844                         iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1845                         iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1846                         iov[1].iov_base = sldns_buffer_begin(buffer);
1847                         iov[1].iov_len = sldns_buffer_limit(buffer);
1848                 }
1849                 log_assert(iov[0].iov_len > 0);
1850                 msg.msg_name = &c->repinfo.addr;
1851                 msg.msg_namelen = c->repinfo.addrlen;
1852                 msg.msg_iov = iov;
1853                 msg.msg_iovlen = 2;
1854                 r = sendmsg(fd, &msg, MSG_FASTOPEN);
1855                 if (r == -1) {
1856 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1857                         /* Handshake is underway, maybe because no TFO cookie available.
1858                            Come back to write the message*/
1859                         if(errno == EINPROGRESS || errno == EWOULDBLOCK)
1860                                 return 1;
1861 #endif
1862                         if(errno == EINTR || errno == EAGAIN)
1863                                 return 1;
1864                         /* Not handling EISCONN here as shouldn't ever hit that case.*/
1865                         if(errno != EPIPE && errno != 0 && verbosity < 2)
1866                                 return 0; /* silence lots of chatter in the logs */
1867                         if(errno != EPIPE && errno != 0) {
1868                                 log_err_addr("tcp sendmsg", strerror(errno),
1869                                         &c->repinfo.addr, c->repinfo.addrlen);
1870                                 return 0;
1871                         }
1872                         /* fallthrough to nonFASTOPEN
1873                          * (MSG_FASTOPEN on Linux 3 produces EPIPE)
1874                          * we need to perform connect() */
1875                         if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) {
1876 #ifdef EINPROGRESS
1877                                 if(errno == EINPROGRESS)
1878                                         return 1; /* wait until connect done*/
1879 #endif
1880 #ifdef USE_WINSOCK
1881                                 if(WSAGetLastError() == WSAEINPROGRESS ||
1882                                         WSAGetLastError() == WSAEWOULDBLOCK)
1883                                         return 1; /* wait until connect done*/
1884 #endif
1885                                 if(tcp_connect_errno_needs_log(
1886                                         (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) {
1887                                         log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
1888                                                 strerror(errno), &c->repinfo.addr, c->repinfo.addrlen);
1889                                 }
1890                                 return 0;
1891                         }
1892
1893                 } else {
1894                         if(c->tcp_write_and_read) {
1895                                 c->tcp_write_byte_count += r;
1896                                 if(c->tcp_write_byte_count < sizeof(uint16_t))
1897                                         return 1;
1898                         } else {
1899                                 c->tcp_byte_count += r;
1900                                 if(c->tcp_byte_count < sizeof(uint16_t))
1901                                         return 1;
1902                                 sldns_buffer_set_position(buffer, c->tcp_byte_count -
1903                                         sizeof(uint16_t));
1904                         }
1905                         if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
1906                                 tcp_callback_writer(c);
1907                                 return 1;
1908                         }
1909                 }
1910         }
1911 #endif /* USE_MSG_FASTOPEN */
1912
1913         if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
1914                 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
1915 #ifdef HAVE_WRITEV
1916                 struct iovec iov[2];
1917                 if(c->tcp_write_and_read) {
1918                         iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
1919                         iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
1920                         iov[1].iov_base = c->tcp_write_pkt;
1921                         iov[1].iov_len = c->tcp_write_pkt_len;
1922                 } else {
1923                         iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1924                         iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1925                         iov[1].iov_base = sldns_buffer_begin(buffer);
1926                         iov[1].iov_len = sldns_buffer_limit(buffer);
1927                 }
1928                 log_assert(iov[0].iov_len > 0);
1929                 r = writev(fd, iov, 2);
1930 #else /* HAVE_WRITEV */
1931                 if(c->tcp_write_and_read) {
1932                         r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
1933                                 sizeof(uint16_t)-c->tcp_write_byte_count, 0);
1934                 } else {
1935                         r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1936                                 sizeof(uint16_t)-c->tcp_byte_count, 0);
1937                 }
1938 #endif /* HAVE_WRITEV */
1939                 if(r == -1) {
1940 #ifndef USE_WINSOCK
1941 #  ifdef EPIPE
1942                         if(errno == EPIPE && verbosity < 2)
1943                                 return 0; /* silence 'broken pipe' */
1944   #endif
1945                         if(errno == EINTR || errno == EAGAIN)
1946                                 return 1;
1947 #ifdef ECONNRESET
1948                         if(errno == ECONNRESET && verbosity < 2)
1949                                 return 0; /* silence reset by peer */
1950 #endif
1951 #  ifdef HAVE_WRITEV
1952                         log_err_addr("tcp writev", strerror(errno),
1953                                 &c->repinfo.addr, c->repinfo.addrlen);
1954 #  else /* HAVE_WRITEV */
1955                         log_err_addr("tcp send s", strerror(errno),
1956                                 &c->repinfo.addr, c->repinfo.addrlen);
1957 #  endif /* HAVE_WRITEV */
1958 #else
1959                         if(WSAGetLastError() == WSAENOTCONN)
1960                                 return 1;
1961                         if(WSAGetLastError() == WSAEINPROGRESS)
1962                                 return 1;
1963                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1964                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1965                                         UB_EV_WRITE);
1966                                 return 1; 
1967                         }
1968                         if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
1969                                 return 0; /* silence reset by peer */
1970                         log_err_addr("tcp send s",
1971                                 wsa_strerror(WSAGetLastError()),
1972                                 &c->repinfo.addr, c->repinfo.addrlen);
1973 #endif
1974                         return 0;
1975                 }
1976                 if(c->tcp_write_and_read) {
1977                         c->tcp_write_byte_count += r;
1978                         if(c->tcp_write_byte_count < sizeof(uint16_t))
1979                                 return 1;
1980                 } else {
1981                         c->tcp_byte_count += r;
1982                         if(c->tcp_byte_count < sizeof(uint16_t))
1983                                 return 1;
1984                         sldns_buffer_set_position(buffer, c->tcp_byte_count -
1985                                 sizeof(uint16_t));
1986                 }
1987                 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
1988                         tcp_callback_writer(c);
1989                         return 1;
1990                 }
1991         }
1992         log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0);
1993         log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
1994         if(c->tcp_write_and_read) {
1995                 r = send(fd, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
1996                         c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0);
1997         } else {
1998                 r = send(fd, (void*)sldns_buffer_current(buffer),
1999                         sldns_buffer_remaining(buffer), 0);
2000         }
2001         if(r == -1) {
2002 #ifndef USE_WINSOCK
2003                 if(errno == EINTR || errno == EAGAIN)
2004                         return 1;
2005 #ifdef ECONNRESET
2006                 if(errno == ECONNRESET && verbosity < 2)
2007                         return 0; /* silence reset by peer */
2008 #endif
2009 #else
2010                 if(WSAGetLastError() == WSAEINPROGRESS)
2011                         return 1;
2012                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2013                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2014                         return 1; 
2015                 }
2016                 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
2017                         return 0; /* silence reset by peer */
2018 #endif
2019                 log_err_addr("tcp send r", sock_strerror(errno),
2020                         &c->repinfo.addr, c->repinfo.addrlen);
2021                 return 0;
2022         }
2023         if(c->tcp_write_and_read) {
2024                 c->tcp_write_byte_count += r;
2025         } else {
2026                 sldns_buffer_skip(buffer, r);
2027         }
2028
2029         if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
2030                 tcp_callback_writer(c);
2031         }
2032         
2033         return 1;
2034 }
2035
2036 /** read again to drain buffers when there could be more to read */
2037 static void
2038 tcp_req_info_read_again(int fd, struct comm_point* c)
2039 {
2040         while(c->tcp_req_info->read_again) {
2041                 int r;
2042                 c->tcp_req_info->read_again = 0;
2043                 if(c->tcp_is_reading)
2044                         r = comm_point_tcp_handle_read(fd, c, 0);
2045                 else    r = comm_point_tcp_handle_write(fd, c);
2046                 if(!r) {
2047                         reclaim_tcp_handler(c);
2048                         if(!c->tcp_do_close) {
2049                                 fptr_ok(fptr_whitelist_comm_point(
2050                                         c->callback));
2051                                 (void)(*c->callback)(c, c->cb_arg, 
2052                                         NETEVENT_CLOSED, NULL);
2053                         }
2054                         return;
2055                 }
2056         }
2057 }
2058
2059 /** read again to drain buffers when there could be more to read */
2060 static void
2061 tcp_more_read_again(int fd, struct comm_point* c)
2062 {
2063         /* if the packet is done, but another one could be waiting on
2064          * the connection, the callback signals this, and we try again */
2065         /* this continues until the read routines get EAGAIN or so,
2066          * and thus does not call the callback, and the bool is 0 */
2067         int* moreread = c->tcp_more_read_again;
2068         while(moreread && *moreread) {
2069                 *moreread = 0;
2070                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
2071                         reclaim_tcp_handler(c);
2072                         if(!c->tcp_do_close) {
2073                                 fptr_ok(fptr_whitelist_comm_point(
2074                                         c->callback));
2075                                 (void)(*c->callback)(c, c->cb_arg,
2076                                         NETEVENT_CLOSED, NULL);
2077                         }
2078                         return;
2079                 }
2080         }
2081 }
2082
2083 /** write again to fill up when there could be more to write */
2084 static void
2085 tcp_more_write_again(int fd, struct comm_point* c)
2086 {
2087         /* if the packet is done, but another is waiting to be written,
2088          * the callback signals it and we try again. */
2089         /* this continues until the write routines get EAGAIN or so,
2090          * and thus does not call the callback, and the bool is 0 */
2091         int* morewrite = c->tcp_more_write_again;
2092         while(morewrite && *morewrite) {
2093                 *morewrite = 0;
2094                 if(!comm_point_tcp_handle_write(fd, c)) {
2095                         reclaim_tcp_handler(c);
2096                         if(!c->tcp_do_close) {
2097                                 fptr_ok(fptr_whitelist_comm_point(
2098                                         c->callback));
2099                                 (void)(*c->callback)(c, c->cb_arg,
2100                                         NETEVENT_CLOSED, NULL);
2101                         }
2102                         return;
2103                 }
2104         }
2105 }
2106
2107 void 
2108 comm_point_tcp_handle_callback(int fd, short event, void* arg)
2109 {
2110         struct comm_point* c = (struct comm_point*)arg;
2111         log_assert(c->type == comm_tcp);
2112         ub_comm_base_now(c->ev->base);
2113
2114 #ifdef USE_DNSCRYPT
2115         /* Initialize if this is a dnscrypt socket */
2116         if(c->tcp_parent) {
2117                 c->dnscrypt = c->tcp_parent->dnscrypt;
2118         }
2119         if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
2120                 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
2121                 if(!c->dnscrypt_buffer) {
2122                         log_err("Could not allocate dnscrypt buffer");
2123                         reclaim_tcp_handler(c);
2124                         if(!c->tcp_do_close) {
2125                                 fptr_ok(fptr_whitelist_comm_point(
2126                                         c->callback));
2127                                 (void)(*c->callback)(c, c->cb_arg,
2128                                         NETEVENT_CLOSED, NULL);
2129                         }
2130                         return;
2131                 }
2132         }
2133 #endif
2134
2135         if(event&UB_EV_TIMEOUT) {
2136                 verbose(VERB_QUERY, "tcp took too long, dropped");
2137                 reclaim_tcp_handler(c);
2138                 if(!c->tcp_do_close) {
2139                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2140                         (void)(*c->callback)(c, c->cb_arg,
2141                                 NETEVENT_TIMEOUT, NULL);
2142                 }
2143                 return;
2144         }
2145         if(event&UB_EV_READ
2146 #ifdef USE_MSG_FASTOPEN
2147                 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE))
2148 #endif
2149                 ) {
2150                 int has_tcpq = (c->tcp_req_info != NULL);
2151                 int* moreread = c->tcp_more_read_again;
2152                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
2153                         reclaim_tcp_handler(c);
2154                         if(!c->tcp_do_close) {
2155                                 fptr_ok(fptr_whitelist_comm_point(
2156                                         c->callback));
2157                                 (void)(*c->callback)(c, c->cb_arg,
2158                                         NETEVENT_CLOSED, NULL);
2159                         }
2160                         return;
2161                 }
2162                 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again)
2163                         tcp_req_info_read_again(fd, c);
2164                 if(moreread && *moreread)
2165                         tcp_more_read_again(fd, c);
2166                 return;
2167         }
2168         if(event&UB_EV_WRITE) {
2169                 int has_tcpq = (c->tcp_req_info != NULL);
2170                 int* morewrite = c->tcp_more_write_again;
2171                 if(!comm_point_tcp_handle_write(fd, c)) {
2172                         reclaim_tcp_handler(c);
2173                         if(!c->tcp_do_close) {
2174                                 fptr_ok(fptr_whitelist_comm_point(
2175                                         c->callback));
2176                                 (void)(*c->callback)(c, c->cb_arg,
2177                                         NETEVENT_CLOSED, NULL);
2178                         }
2179                         return;
2180                 }
2181                 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again)
2182                         tcp_req_info_read_again(fd, c);
2183                 if(morewrite && *morewrite)
2184                         tcp_more_write_again(fd, c);
2185                 return;
2186         }
2187         log_err("Ignored event %d for tcphdl.", event);
2188 }
2189
2190 /** Make http handler free for next assignment */
2191 static void
2192 reclaim_http_handler(struct comm_point* c)
2193 {
2194         log_assert(c->type == comm_http);
2195         if(c->ssl) {
2196 #ifdef HAVE_SSL
2197                 SSL_shutdown(c->ssl);
2198                 SSL_free(c->ssl);
2199                 c->ssl = NULL;
2200 #endif
2201         }
2202         comm_point_close(c);
2203         if(c->tcp_parent) {
2204                 c->tcp_parent->cur_tcp_count--;
2205                 c->tcp_free = c->tcp_parent->tcp_free;
2206                 c->tcp_parent->tcp_free = c;
2207                 if(!c->tcp_free) {
2208                         /* re-enable listening on accept socket */
2209                         comm_point_start_listening(c->tcp_parent, -1, -1);
2210                 }
2211         }
2212 }
2213
2214 /** read more data for http (with ssl) */
2215 static int
2216 ssl_http_read_more(struct comm_point* c)
2217 {
2218 #ifdef HAVE_SSL
2219         int r;
2220         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2221         ERR_clear_error();
2222         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
2223                 (int)sldns_buffer_remaining(c->buffer));
2224         if(r <= 0) {
2225                 int want = SSL_get_error(c->ssl, r);
2226                 if(want == SSL_ERROR_ZERO_RETURN) {
2227                         return 0; /* shutdown, closed */
2228                 } else if(want == SSL_ERROR_WANT_READ) {
2229                         return 1; /* read more later */
2230                 } else if(want == SSL_ERROR_WANT_WRITE) {
2231                         c->ssl_shake_state = comm_ssl_shake_hs_write;
2232                         comm_point_listen_for_rw(c, 0, 1);
2233                         return 1;
2234                 } else if(want == SSL_ERROR_SYSCALL) {
2235 #ifdef ECONNRESET
2236                         if(errno == ECONNRESET && verbosity < 2)
2237                                 return 0; /* silence reset by peer */
2238 #endif
2239                         if(errno != 0)
2240                                 log_err("SSL_read syscall: %s",
2241                                         strerror(errno));
2242                         return 0;
2243                 }
2244                 log_crypto_err("could not SSL_read");
2245                 return 0;
2246         }
2247         verbose(VERB_ALGO, "ssl http read more skip to %d + %d",
2248                 (int)sldns_buffer_position(c->buffer), (int)r);
2249         sldns_buffer_skip(c->buffer, (ssize_t)r);
2250         return 1;
2251 #else
2252         (void)c;
2253         return 0;
2254 #endif /* HAVE_SSL */
2255 }
2256
2257 /** read more data for http */
2258 static int
2259 http_read_more(int fd, struct comm_point* c)
2260 {
2261         ssize_t r;
2262         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2263         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
2264                 sldns_buffer_remaining(c->buffer), 0);
2265         if(r == 0) {
2266                 return 0;
2267         } else if(r == -1) {
2268 #ifndef USE_WINSOCK
2269                 if(errno == EINTR || errno == EAGAIN)
2270                         return 1;
2271 #else /* USE_WINSOCK */
2272                 if(WSAGetLastError() == WSAECONNRESET)
2273                         return 0;
2274                 if(WSAGetLastError() == WSAEINPROGRESS)
2275                         return 1;
2276                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2277                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
2278                         return 1;
2279                 }
2280 #endif
2281                 log_err_addr("read (in http r)", sock_strerror(errno),
2282                         &c->repinfo.addr, c->repinfo.addrlen);
2283                 return 0;
2284         }
2285         verbose(VERB_ALGO, "http read more skip to %d + %d",
2286                 (int)sldns_buffer_position(c->buffer), (int)r);
2287         sldns_buffer_skip(c->buffer, r);
2288         return 1;
2289 }
2290
2291 /** return true if http header has been read (one line complete) */
2292 static int
2293 http_header_done(sldns_buffer* buf)
2294 {
2295         size_t i;
2296         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
2297                 /* there was a \r before the \n, but we ignore that */
2298                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
2299                         return 1;
2300         }
2301         return 0;
2302 }
2303
2304 /** return character string into buffer for header line, moves buffer
2305  * past that line and puts zero terminator into linefeed-newline */
2306 static char*
2307 http_header_line(sldns_buffer* buf)
2308 {
2309         char* result = (char*)sldns_buffer_current(buf);
2310         size_t i;
2311         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
2312                 /* terminate the string on the \r */
2313                 if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
2314                         sldns_buffer_write_u8_at(buf, i, 0);
2315                 /* terminate on the \n and skip past the it and done */
2316                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
2317                         sldns_buffer_write_u8_at(buf, i, 0);
2318                         sldns_buffer_set_position(buf, i+1);
2319                         return result;
2320                 }
2321         }
2322         return NULL;
2323 }
2324
2325 /** move unread buffer to start and clear rest for putting the rest into it */
2326 static void
2327 http_moveover_buffer(sldns_buffer* buf)
2328 {
2329         size_t pos = sldns_buffer_position(buf);
2330         size_t len = sldns_buffer_remaining(buf);
2331         sldns_buffer_clear(buf);
2332         memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
2333         sldns_buffer_set_position(buf, len);
2334 }
2335
2336 /** a http header is complete, process it */
2337 static int
2338 http_process_initial_header(struct comm_point* c)
2339 {
2340         char* line = http_header_line(c->buffer);
2341         if(!line) return 1;
2342         verbose(VERB_ALGO, "http header: %s", line);
2343         if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
2344                 /* check returncode */
2345                 if(line[9] != '2') {
2346                         verbose(VERB_ALGO, "http bad status %s", line+9);
2347                         return 0;
2348                 }
2349         } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
2350                 if(!c->http_is_chunked)
2351                         c->tcp_byte_count = (size_t)atoi(line+16);
2352         } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
2353                 c->tcp_byte_count = 0;
2354                 c->http_is_chunked = 1;
2355         } else if(line[0] == 0) {
2356                 /* end of initial headers */
2357                 c->http_in_headers = 0;
2358                 if(c->http_is_chunked)
2359                         c->http_in_chunk_headers = 1;
2360                 /* remove header text from front of buffer
2361                  * the buffer is going to be used to return the data segment
2362                  * itself and we don't want the header to get returned
2363                  * prepended with it */
2364                 http_moveover_buffer(c->buffer);
2365                 sldns_buffer_flip(c->buffer);
2366                 return 1;
2367         }
2368         /* ignore other headers */
2369         return 1;
2370 }
2371
2372 /** a chunk header is complete, process it, return 0=fail, 1=continue next
2373  * header line, 2=done with chunked transfer*/
2374 static int
2375 http_process_chunk_header(struct comm_point* c)
2376 {
2377         char* line = http_header_line(c->buffer);
2378         if(!line) return 1;
2379         if(c->http_in_chunk_headers == 3) {
2380                 verbose(VERB_ALGO, "http chunk trailer: %s", line);
2381                 /* are we done ? */
2382                 if(line[0] == 0 && c->tcp_byte_count == 0) {
2383                         /* callback of http reader when NETEVENT_DONE,
2384                          * end of data, with no data in buffer */
2385                         sldns_buffer_set_position(c->buffer, 0);
2386                         sldns_buffer_set_limit(c->buffer, 0);
2387                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2388                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
2389                         /* return that we are done */
2390                         return 2;
2391                 }
2392                 if(line[0] == 0) {
2393                         /* continue with header of the next chunk */
2394                         c->http_in_chunk_headers = 1;
2395                         /* remove header text from front of buffer */
2396                         http_moveover_buffer(c->buffer);
2397                         sldns_buffer_flip(c->buffer);
2398                         return 1;
2399                 }
2400                 /* ignore further trail headers */
2401                 return 1;
2402         }
2403         verbose(VERB_ALGO, "http chunk header: %s", line);
2404         if(c->http_in_chunk_headers == 1) {
2405                 /* read chunked start line */
2406                 char* end = NULL;
2407                 c->tcp_byte_count = (size_t)strtol(line, &end, 16);
2408                 if(end == line)
2409                         return 0;
2410                 c->http_in_chunk_headers = 0;
2411                 /* remove header text from front of buffer */
2412                 http_moveover_buffer(c->buffer);
2413                 sldns_buffer_flip(c->buffer);
2414                 if(c->tcp_byte_count == 0) {
2415                         /* done with chunks, process chunk_trailer lines */
2416                         c->http_in_chunk_headers = 3;
2417                 }
2418                 return 1;
2419         }
2420         /* ignore other headers */
2421         return 1;
2422 }
2423
2424 /** handle nonchunked data segment, 0=fail, 1=wait */
2425 static int
2426 http_nonchunk_segment(struct comm_point* c)
2427 {
2428         /* c->buffer at position..limit has new data we read in.
2429          * the buffer itself is full of nonchunked data.
2430          * we are looking to read tcp_byte_count more data
2431          * and then the transfer is done. */
2432         size_t remainbufferlen;
2433         size_t got_now = sldns_buffer_limit(c->buffer);
2434         if(c->tcp_byte_count <= got_now) {
2435                 /* done, this is the last data fragment */
2436                 c->http_stored = 0;
2437                 sldns_buffer_set_position(c->buffer, 0);
2438                 fptr_ok(fptr_whitelist_comm_point(c->callback));
2439                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
2440                 return 1;
2441         }
2442         /* if we have the buffer space,
2443          * read more data collected into the buffer */
2444         remainbufferlen = sldns_buffer_capacity(c->buffer) -
2445                 sldns_buffer_limit(c->buffer);
2446         if(remainbufferlen+got_now >= c->tcp_byte_count ||
2447                 remainbufferlen >= (c->ssl?16384:2048)) {
2448                 size_t total = sldns_buffer_limit(c->buffer);
2449                 sldns_buffer_clear(c->buffer);
2450                 sldns_buffer_set_position(c->buffer, total);
2451                 c->http_stored = total;
2452                 /* return and wait to read more */
2453                 return 1;
2454         }
2455         /* call callback with this data amount, then
2456          * wait for more */
2457         c->tcp_byte_count -= got_now;
2458         c->http_stored = 0;
2459         sldns_buffer_set_position(c->buffer, 0);
2460         fptr_ok(fptr_whitelist_comm_point(c->callback));
2461         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2462         /* c->callback has to buffer_clear(c->buffer). */
2463         /* return and wait to read more */
2464         return 1;
2465 }
2466
2467 /** handle chunked data segment, return 0=fail, 1=wait, 2=process more */
2468 static int
2469 http_chunked_segment(struct comm_point* c)
2470 {
2471         /* the c->buffer has from position..limit new data we read. */
2472         /* the current chunk has length tcp_byte_count.
2473          * once we read that read more chunk headers.
2474          */
2475         size_t remainbufferlen;
2476         size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
2477         verbose(VERB_ALGO, "http_chunked_segment: got now %d, tcpbytcount %d, http_stored %d, buffer pos %d, buffer limit %d", (int)got_now, (int)c->tcp_byte_count, (int)c->http_stored, (int)sldns_buffer_position(c->buffer), (int)sldns_buffer_limit(c->buffer));
2478         if(c->tcp_byte_count <= got_now) {
2479                 /* the chunk has completed (with perhaps some extra data
2480                  * from next chunk header and next chunk) */
2481                 /* save too much info into temp buffer */
2482                 size_t fraglen;
2483                 struct comm_reply repinfo;
2484                 c->http_stored = 0;
2485                 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
2486                 sldns_buffer_clear(c->http_temp);
2487                 sldns_buffer_write(c->http_temp,
2488                         sldns_buffer_current(c->buffer),
2489                         sldns_buffer_remaining(c->buffer));
2490                 sldns_buffer_flip(c->http_temp);
2491
2492                 /* callback with this fragment */
2493                 fraglen = sldns_buffer_position(c->buffer);
2494                 sldns_buffer_set_position(c->buffer, 0);
2495                 sldns_buffer_set_limit(c->buffer, fraglen);
2496                 repinfo = c->repinfo;
2497                 fptr_ok(fptr_whitelist_comm_point(c->callback));
2498                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
2499                 /* c->callback has to buffer_clear(). */
2500
2501                 /* is commpoint deleted? */
2502                 if(!repinfo.c) {
2503                         return 1;
2504                 }
2505                 /* copy waiting info */
2506                 sldns_buffer_clear(c->buffer);
2507                 sldns_buffer_write(c->buffer,
2508                         sldns_buffer_begin(c->http_temp),
2509                         sldns_buffer_remaining(c->http_temp));
2510                 sldns_buffer_flip(c->buffer);
2511                 /* process end of chunk trailer header lines, until
2512                  * an empty line */
2513                 c->http_in_chunk_headers = 3;
2514                 /* process more data in buffer (if any) */
2515                 return 2;
2516         }
2517         c->tcp_byte_count -= got_now;
2518
2519         /* if we have the buffer space,
2520          * read more data collected into the buffer */
2521         remainbufferlen = sldns_buffer_capacity(c->buffer) -
2522                 sldns_buffer_limit(c->buffer);
2523         if(remainbufferlen >= c->tcp_byte_count ||
2524                 remainbufferlen >= 2048) {
2525                 size_t total = sldns_buffer_limit(c->buffer);
2526                 sldns_buffer_clear(c->buffer);
2527                 sldns_buffer_set_position(c->buffer, total);
2528                 c->http_stored = total;
2529                 /* return and wait to read more */
2530                 return 1;
2531         }
2532         
2533         /* callback of http reader for a new part of the data */
2534         c->http_stored = 0;
2535         sldns_buffer_set_position(c->buffer, 0);
2536         fptr_ok(fptr_whitelist_comm_point(c->callback));
2537         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2538         /* c->callback has to buffer_clear(c->buffer). */
2539         /* return and wait to read more */
2540         return 1;
2541 }
2542
2543 #ifdef HAVE_NGHTTP2
2544 /** Create new http2 session. Called when creating handling comm point. */
2545 static struct http2_session* http2_session_create(struct comm_point* c)
2546 {
2547         struct http2_session* session = calloc(1, sizeof(*session));
2548         if(!session) {
2549                 log_err("malloc failure while creating http2 session");
2550                 return NULL;
2551         }
2552         session->c = c;
2553
2554         return session;
2555 }
2556 #endif
2557
2558 /** Delete http2 session. After closing connection or on error */
2559 static void http2_session_delete(struct http2_session* h2_session)
2560 {
2561 #ifdef HAVE_NGHTTP2
2562         if(h2_session->callbacks)
2563                 nghttp2_session_callbacks_del(h2_session->callbacks);
2564         free(h2_session);
2565 #else
2566         (void)h2_session;
2567 #endif
2568 }
2569
2570 #ifdef HAVE_NGHTTP2
2571 struct http2_stream* http2_stream_create(int32_t stream_id)
2572 {
2573         struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream));
2574         if(!h2_stream) {
2575                 log_err("malloc failure while creating http2 stream");
2576                 return NULL;
2577         }
2578         h2_stream->stream_id = stream_id;
2579         return h2_stream;
2580 }
2581
2582 /** Delete http2 stream. After session delete or stream close callback */
2583 static void http2_stream_delete(struct http2_session* h2_session,
2584         struct http2_stream* h2_stream)
2585 {
2586         if(h2_stream->mesh_state) {
2587                 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state,
2588                         h2_session->c);
2589                 h2_stream->mesh_state = NULL;
2590         }
2591         http2_req_stream_clear(h2_stream);
2592         free(h2_stream);
2593 }
2594 #endif
2595
2596 void http2_stream_add_meshstate(struct http2_stream* h2_stream,
2597         struct mesh_area* mesh, struct mesh_state* m)
2598 {
2599         h2_stream->mesh = mesh;
2600         h2_stream->mesh_state = m;
2601 }
2602
2603 /** delete http2 session server. After closing connection. */
2604 static void http2_session_server_delete(struct http2_session* h2_session)
2605 {
2606 #ifdef HAVE_NGHTTP2
2607         struct http2_stream* h2_stream, *next;
2608         nghttp2_session_del(h2_session->session); /* NULL input is fine */
2609         h2_session->session = NULL;
2610         for(h2_stream = h2_session->first_stream; h2_stream;) {
2611                 next = h2_stream->next;
2612                 http2_stream_delete(h2_session, h2_stream);
2613                 h2_stream = next;
2614         }
2615         h2_session->first_stream = NULL;
2616         h2_session->is_drop = 0;
2617         h2_session->postpone_drop = 0;
2618         h2_session->c->h2_stream = NULL;
2619 #endif
2620         (void)h2_session;
2621 }
2622
2623 #ifdef HAVE_NGHTTP2
2624 void http2_session_add_stream(struct http2_session* h2_session,
2625         struct http2_stream* h2_stream)
2626 {
2627         if(h2_session->first_stream)
2628                 h2_session->first_stream->prev = h2_stream;
2629         h2_stream->next = h2_session->first_stream;
2630         h2_session->first_stream = h2_stream;
2631 }
2632
2633 /** remove stream from session linked list. After stream close callback or
2634  * closing connection */
2635 static void http2_session_remove_stream(struct http2_session* h2_session,
2636         struct http2_stream* h2_stream)
2637 {
2638         if(h2_stream->prev)
2639                 h2_stream->prev->next = h2_stream->next;
2640         else
2641                 h2_session->first_stream = h2_stream->next;
2642         if(h2_stream->next)
2643                 h2_stream->next->prev = h2_stream->prev;
2644
2645 }
2646
2647 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session),
2648         int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg)
2649 {
2650         struct http2_stream* h2_stream;
2651         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2652         if(!(h2_stream = nghttp2_session_get_stream_user_data(
2653                 h2_session->session, stream_id))) {
2654                 return 0;
2655         }
2656         http2_session_remove_stream(h2_session, h2_stream);
2657         http2_stream_delete(h2_session, h2_stream);
2658         return 0;
2659 }
2660
2661 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf,
2662         size_t len, int ATTR_UNUSED(flags), void* cb_arg)
2663 {
2664         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2665         ssize_t ret;
2666
2667         log_assert(h2_session->c->type == comm_http);
2668         log_assert(h2_session->c->h2_session);
2669
2670 #ifdef HAVE_SSL
2671         if(h2_session->c->ssl) {
2672                 int r;
2673                 ERR_clear_error();
2674                 r = SSL_read(h2_session->c->ssl, buf, len);
2675                 if(r <= 0) {
2676                         int want = SSL_get_error(h2_session->c->ssl, r);
2677                         if(want == SSL_ERROR_ZERO_RETURN) {
2678                                 return NGHTTP2_ERR_EOF;
2679                         } else if(want == SSL_ERROR_WANT_READ) {
2680                                 return NGHTTP2_ERR_WOULDBLOCK;
2681                         } else if(want == SSL_ERROR_WANT_WRITE) {
2682                                 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write;
2683                                 comm_point_listen_for_rw(h2_session->c, 0, 1);
2684                                 return NGHTTP2_ERR_WOULDBLOCK;
2685                         } else if(want == SSL_ERROR_SYSCALL) {
2686 #ifdef ECONNRESET
2687                                 if(errno == ECONNRESET && verbosity < 2)
2688                                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2689 #endif
2690                                 if(errno != 0)
2691                                         log_err("SSL_read syscall: %s",
2692                                                 strerror(errno));
2693                                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2694                         }
2695                         log_crypto_err("could not SSL_read");
2696                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2697                 }
2698                 return r;
2699         }
2700 #endif /* HAVE_SSL */
2701
2702         ret = recv(h2_session->c->fd, buf, len, 0);
2703         if(ret == 0) {
2704                 return NGHTTP2_ERR_EOF;
2705         } else if(ret < 0) {
2706 #ifndef USE_WINSOCK
2707                 if(errno == EINTR || errno == EAGAIN)
2708                         return NGHTTP2_ERR_WOULDBLOCK;
2709 #ifdef ECONNRESET
2710                 if(errno == ECONNRESET && verbosity < 2)
2711                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2712 #endif
2713                 log_err_addr("could not http2 recv: %s", strerror(errno),
2714                         &h2_session->c->repinfo.addr,
2715                         h2_session->c->repinfo.addrlen);
2716 #else /* USE_WINSOCK */
2717                 if(WSAGetLastError() == WSAECONNRESET)
2718                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2719                 if(WSAGetLastError() == WSAEINPROGRESS)
2720                         return NGHTTP2_ERR_WOULDBLOCK;
2721                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2722                         ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
2723                                 UB_EV_READ);
2724                         return NGHTTP2_ERR_WOULDBLOCK;
2725                 }
2726                 log_err_addr("could not http2 recv: %s",
2727                         wsa_strerror(WSAGetLastError()),
2728                         &h2_session->c->repinfo.addr,
2729                         h2_session->c->repinfo.addrlen);
2730 #endif
2731                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2732         }
2733         return ret;
2734 }
2735 #endif /* HAVE_NGHTTP2 */
2736
2737 /** Handle http2 read */
2738 static int
2739 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c)
2740 {
2741 #ifdef HAVE_NGHTTP2
2742         int ret;
2743         log_assert(c->h2_session);
2744
2745         /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */
2746         ret = nghttp2_session_recv(c->h2_session->session);
2747         if(ret) {
2748                 if(ret != NGHTTP2_ERR_EOF &&
2749                         ret != NGHTTP2_ERR_CALLBACK_FAILURE) {
2750                         char a[256];
2751                         addr_to_str(&c->repinfo.addr, c->repinfo.addrlen,
2752                                 a, sizeof(a));
2753                         verbose(VERB_QUERY, "http2: session_recv from %s failed, "
2754                                 "error: %s", a, nghttp2_strerror(ret));
2755                 }
2756                 return 0;
2757         }
2758         if(nghttp2_session_want_write(c->h2_session->session)) {
2759                 c->tcp_is_reading = 0;
2760                 comm_point_stop_listening(c);
2761                 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
2762         } else if(!nghttp2_session_want_read(c->h2_session->session))
2763                 return 0; /* connection can be closed */
2764         return 1;
2765 #else
2766         (void)c;
2767         return 0;
2768 #endif
2769 }
2770
2771 /**
2772  * Handle http reading callback.
2773  * @param fd: file descriptor of socket.
2774  * @param c: comm point to read from into buffer.
2775  * @return: 0 on error
2776  */
2777 static int
2778 comm_point_http_handle_read(int fd, struct comm_point* c)
2779 {
2780         log_assert(c->type == comm_http);
2781         log_assert(fd != -1);
2782
2783         /* if we are in ssl handshake, handle SSL handshake */
2784 #ifdef HAVE_SSL
2785         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2786                 if(!ssl_handshake(c))
2787                         return 0;
2788                 if(c->ssl_shake_state != comm_ssl_shake_none)
2789                         return 1;
2790         }
2791 #endif /* HAVE_SSL */
2792
2793         if(!c->tcp_is_reading)
2794                 return 1;
2795
2796         if(c->use_h2) {
2797                 return comm_point_http2_handle_read(fd, c);
2798         }
2799
2800         /* http version is <= http/1.1 */
2801
2802         if(c->http_min_version >= http_version_2) {
2803                 /* HTTP/2 failed, not allowed to use lower version. */
2804                 return 0;
2805         }
2806
2807         /* read more data */
2808         if(c->ssl) {
2809                 if(!ssl_http_read_more(c))
2810                         return 0;
2811         } else {
2812                 if(!http_read_more(fd, c))
2813                         return 0;
2814         }
2815
2816         if(c->http_stored >= sldns_buffer_position(c->buffer)) {
2817                 /* read did not work but we wanted more data, there is
2818                  * no bytes to process now. */
2819                 return 1;
2820         }
2821         sldns_buffer_flip(c->buffer);
2822         /* if we are partway in a segment of data, position us at the point
2823          * where we left off previously */
2824         if(c->http_stored < sldns_buffer_limit(c->buffer))
2825                 sldns_buffer_set_position(c->buffer, c->http_stored);
2826         else    sldns_buffer_set_position(c->buffer, sldns_buffer_limit(c->buffer));
2827
2828         while(sldns_buffer_remaining(c->buffer) > 0) {
2829                 /* Handle HTTP/1.x data */
2830                 /* if we are reading headers, read more headers */
2831                 if(c->http_in_headers || c->http_in_chunk_headers) {
2832                         /* if header is done, process the header */
2833                         if(!http_header_done(c->buffer)) {
2834                                 /* copy remaining data to front of buffer
2835                                  * and set rest for writing into it */
2836                                 http_moveover_buffer(c->buffer);
2837                                 /* return and wait to read more */
2838                                 return 1;
2839                         }
2840                         if(!c->http_in_chunk_headers) {
2841                                 /* process initial headers */
2842                                 if(!http_process_initial_header(c))
2843                                         return 0;
2844                         } else {
2845                                 /* process chunk headers */
2846                                 int r = http_process_chunk_header(c);
2847                                 if(r == 0) return 0;
2848                                 if(r == 2) return 1; /* done */
2849                                 /* r == 1, continue */
2850                         }
2851                         /* see if we have more to process */
2852                         continue;
2853                 }
2854
2855                 if(!c->http_is_chunked) {
2856                         /* if we are reading nonchunks, process that*/
2857                         return http_nonchunk_segment(c);
2858                 } else {
2859                         /* if we are reading chunks, read the chunk */
2860                         int r = http_chunked_segment(c);
2861                         if(r == 0) return 0;
2862                         if(r == 1) return 1;
2863                         continue;
2864                 }
2865         }
2866         /* broke out of the loop; could not process header instead need
2867          * to read more */
2868         /* moveover any remaining data and read more data */
2869         http_moveover_buffer(c->buffer);
2870         /* return and wait to read more */
2871         return 1;
2872 }
2873
2874 /** check pending connect for http */
2875 static int
2876 http_check_connect(int fd, struct comm_point* c)
2877 {
2878         /* check for pending error from nonblocking connect */
2879         /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
2880         int error = 0;
2881         socklen_t len = (socklen_t)sizeof(error);
2882         if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
2883                 &len) < 0){
2884 #ifndef USE_WINSOCK
2885                 error = errno; /* on solaris errno is error */
2886 #else /* USE_WINSOCK */
2887                 error = WSAGetLastError();
2888 #endif
2889         }
2890 #ifndef USE_WINSOCK
2891 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
2892         if(error == EINPROGRESS || error == EWOULDBLOCK)
2893                 return 1; /* try again later */
2894         else
2895 #endif
2896         if(error != 0 && verbosity < 2)
2897                 return 0; /* silence lots of chatter in the logs */
2898         else if(error != 0) {
2899                 log_err_addr("http connect", strerror(error),
2900                         &c->repinfo.addr, c->repinfo.addrlen);
2901 #else /* USE_WINSOCK */
2902         /* examine error */
2903         if(error == WSAEINPROGRESS)
2904                 return 1;
2905         else if(error == WSAEWOULDBLOCK) {
2906                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2907                 return 1;
2908         } else if(error != 0 && verbosity < 2)
2909                 return 0;
2910         else if(error != 0) {
2911                 log_err_addr("http connect", wsa_strerror(error),
2912                         &c->repinfo.addr, c->repinfo.addrlen);
2913 #endif /* USE_WINSOCK */
2914                 return 0;
2915         }
2916         /* keep on processing this socket */
2917         return 2;
2918 }
2919
2920 /** write more data for http (with ssl) */
2921 static int
2922 ssl_http_write_more(struct comm_point* c)
2923 {
2924 #ifdef HAVE_SSL
2925         int r;
2926         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2927         ERR_clear_error();
2928         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
2929                 (int)sldns_buffer_remaining(c->buffer));
2930         if(r <= 0) {
2931                 int want = SSL_get_error(c->ssl, r);
2932                 if(want == SSL_ERROR_ZERO_RETURN) {
2933                         return 0; /* closed */
2934                 } else if(want == SSL_ERROR_WANT_READ) {
2935                         c->ssl_shake_state = comm_ssl_shake_hs_read;
2936                         comm_point_listen_for_rw(c, 1, 0);
2937                         return 1; /* wait for read condition */
2938                 } else if(want == SSL_ERROR_WANT_WRITE) {
2939                         return 1; /* write more later */
2940                 } else if(want == SSL_ERROR_SYSCALL) {
2941 #ifdef EPIPE
2942                         if(errno == EPIPE && verbosity < 2)
2943                                 return 0; /* silence 'broken pipe' */
2944 #endif
2945                         if(errno != 0)
2946                                 log_err("SSL_write syscall: %s",
2947                                         strerror(errno));
2948                         return 0;
2949                 }
2950                 log_crypto_err("could not SSL_write");
2951                 return 0;
2952         }
2953         sldns_buffer_skip(c->buffer, (ssize_t)r);
2954         return 1;
2955 #else
2956         (void)c;
2957         return 0;
2958 #endif /* HAVE_SSL */
2959 }
2960
2961 /** write more data for http */
2962 static int
2963 http_write_more(int fd, struct comm_point* c)
2964 {
2965         ssize_t r;
2966         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2967         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
2968                 sldns_buffer_remaining(c->buffer), 0);
2969         if(r == -1) {
2970 #ifndef USE_WINSOCK
2971                 if(errno == EINTR || errno == EAGAIN)
2972                         return 1;
2973 #else
2974                 if(WSAGetLastError() == WSAEINPROGRESS)
2975                         return 1;
2976                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2977                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2978                         return 1; 
2979                 }
2980 #endif
2981                 log_err_addr("http send r", sock_strerror(errno),
2982                         &c->repinfo.addr, c->repinfo.addrlen);
2983                 return 0;
2984         }
2985         sldns_buffer_skip(c->buffer, r);
2986         return 1;
2987 }
2988
2989 #ifdef HAVE_NGHTTP2
2990 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf,
2991         size_t len, int ATTR_UNUSED(flags), void* cb_arg)
2992 {
2993         ssize_t ret;
2994         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2995         log_assert(h2_session->c->type == comm_http);
2996         log_assert(h2_session->c->h2_session);
2997
2998 #ifdef HAVE_SSL
2999         if(h2_session->c->ssl) {
3000                 int r;
3001                 ERR_clear_error();
3002                 r = SSL_write(h2_session->c->ssl, buf, len);
3003                 if(r <= 0) {
3004                         int want = SSL_get_error(h2_session->c->ssl, r);
3005                         if(want == SSL_ERROR_ZERO_RETURN) {
3006                                 return NGHTTP2_ERR_CALLBACK_FAILURE;
3007                         } else if(want == SSL_ERROR_WANT_READ) {
3008                                 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read;
3009                                 comm_point_listen_for_rw(h2_session->c, 1, 0);
3010                                 return NGHTTP2_ERR_WOULDBLOCK;
3011                         } else if(want == SSL_ERROR_WANT_WRITE) {
3012                                 return NGHTTP2_ERR_WOULDBLOCK;
3013                         } else if(want == SSL_ERROR_SYSCALL) {
3014 #ifdef EPIPE
3015                                 if(errno == EPIPE && verbosity < 2)
3016                                         return NGHTTP2_ERR_CALLBACK_FAILURE;
3017 #endif
3018                                 if(errno != 0)
3019                                         log_err("SSL_write syscall: %s",
3020                                                 strerror(errno));
3021                                 return NGHTTP2_ERR_CALLBACK_FAILURE;
3022                         }
3023                         log_crypto_err("could not SSL_write");
3024                         return NGHTTP2_ERR_CALLBACK_FAILURE;
3025                 }
3026                 return r;
3027         }
3028 #endif /* HAVE_SSL */
3029
3030         ret = send(h2_session->c->fd, buf, len, 0);
3031         if(ret == 0) {
3032                 return NGHTTP2_ERR_CALLBACK_FAILURE;
3033         } else if(ret < 0) {
3034 #ifndef USE_WINSOCK
3035                 if(errno == EINTR || errno == EAGAIN)
3036                         return NGHTTP2_ERR_WOULDBLOCK;
3037 #ifdef EPIPE
3038                 if(errno == EPIPE && verbosity < 2)
3039                         return NGHTTP2_ERR_CALLBACK_FAILURE;
3040 #endif
3041 #ifdef ECONNRESET
3042                 if(errno == ECONNRESET && verbosity < 2)
3043                         return NGHTTP2_ERR_CALLBACK_FAILURE;
3044 #endif
3045                 log_err_addr("could not http2 write: %s", strerror(errno),
3046                         &h2_session->c->repinfo.addr,
3047                         h2_session->c->repinfo.addrlen);
3048 #else /* USE_WINSOCK */
3049                 if(WSAGetLastError() == WSAENOTCONN)
3050                         return NGHTTP2_ERR_WOULDBLOCK;
3051                 if(WSAGetLastError() == WSAEINPROGRESS)
3052                         return NGHTTP2_ERR_WOULDBLOCK;
3053                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
3054                         ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
3055                                 UB_EV_WRITE);
3056                         return NGHTTP2_ERR_WOULDBLOCK;
3057                 }
3058                 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
3059                         return NGHTTP2_ERR_CALLBACK_FAILURE;
3060                 log_err_addr("could not http2 write: %s",
3061                         wsa_strerror(WSAGetLastError()),
3062                         &h2_session->c->repinfo.addr,
3063                         h2_session->c->repinfo.addrlen);
3064 #endif
3065                 return NGHTTP2_ERR_CALLBACK_FAILURE;
3066         }
3067         return ret;
3068 }
3069 #endif /* HAVE_NGHTTP2 */
3070
3071 /** Handle http2 writing */
3072 static int
3073 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c)
3074 {
3075 #ifdef HAVE_NGHTTP2
3076         int ret;
3077         log_assert(c->h2_session);
3078
3079         ret = nghttp2_session_send(c->h2_session->session);
3080         if(ret) {
3081                 verbose(VERB_QUERY, "http2: session_send failed, "
3082                         "error: %s", nghttp2_strerror(ret));
3083                 return 0;
3084         }
3085
3086         if(nghttp2_session_want_read(c->h2_session->session)) {
3087                 c->tcp_is_reading = 1;
3088                 comm_point_stop_listening(c);
3089                 comm_point_start_listening(c, -1, adjusted_tcp_timeout(c));
3090         } else if(!nghttp2_session_want_write(c->h2_session->session))
3091                 return 0; /* connection can be closed */
3092         return 1;
3093 #else
3094         (void)c;
3095         return 0;
3096 #endif
3097 }
3098
3099 /** 
3100  * Handle http writing callback. 
3101  * @param fd: file descriptor of socket.
3102  * @param c: comm point to write buffer out of.
3103  * @return: 0 on error
3104  */
3105 static int
3106 comm_point_http_handle_write(int fd, struct comm_point* c)
3107 {
3108         log_assert(c->type == comm_http);
3109         log_assert(fd != -1);
3110
3111         /* check pending connect errors, if that fails, we wait for more,
3112          * or we can continue to write contents */
3113         if(c->tcp_check_nb_connect) {
3114                 int r = http_check_connect(fd, c);
3115                 if(r == 0) return 0;
3116                 if(r == 1) return 1;
3117                 c->tcp_check_nb_connect = 0;
3118         }
3119         /* if we are in ssl handshake, handle SSL handshake */
3120 #ifdef HAVE_SSL
3121         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
3122                 if(!ssl_handshake(c))
3123                         return 0;
3124                 if(c->ssl_shake_state != comm_ssl_shake_none)
3125                         return 1;
3126         }
3127 #endif /* HAVE_SSL */
3128         if(c->tcp_is_reading)
3129                 return 1;
3130
3131         if(c->use_h2) {
3132                 return comm_point_http2_handle_write(fd, c);
3133         }
3134
3135         /* http version is <= http/1.1 */
3136
3137         if(c->http_min_version >= http_version_2) {
3138                 /* HTTP/2 failed, not allowed to use lower version. */
3139                 return 0;
3140         }
3141
3142         /* if we are writing, write more */
3143         if(c->ssl) {
3144                 if(!ssl_http_write_more(c))
3145                         return 0;
3146         } else {
3147                 if(!http_write_more(fd, c))
3148                         return 0;
3149         }
3150
3151         /* we write a single buffer contents, that can contain
3152          * the http request, and then flip to read the results */
3153         /* see if write is done */
3154         if(sldns_buffer_remaining(c->buffer) == 0) {
3155                 sldns_buffer_clear(c->buffer);
3156                 if(c->tcp_do_toggle_rw)
3157                         c->tcp_is_reading = 1;
3158                 c->tcp_byte_count = 0;
3159                 /* switch from listening(write) to listening(read) */
3160                 comm_point_stop_listening(c);
3161                 comm_point_start_listening(c, -1, -1);
3162         }
3163         return 1;
3164 }
3165
3166 void 
3167 comm_point_http_handle_callback(int fd, short event, void* arg)
3168 {
3169         struct comm_point* c = (struct comm_point*)arg;
3170         log_assert(c->type == comm_http);
3171         ub_comm_base_now(c->ev->base);
3172
3173         if(event&UB_EV_TIMEOUT) {
3174                 verbose(VERB_QUERY, "http took too long, dropped");
3175                 reclaim_http_handler(c);
3176                 if(!c->tcp_do_close) {
3177                         fptr_ok(fptr_whitelist_comm_point(c->callback));
3178                         (void)(*c->callback)(c, c->cb_arg,
3179                                 NETEVENT_TIMEOUT, NULL);
3180                 }
3181                 return;
3182         }
3183         if(event&UB_EV_READ) {
3184                 if(!comm_point_http_handle_read(fd, c)) {
3185                         reclaim_http_handler(c);
3186                         if(!c->tcp_do_close) {
3187                                 fptr_ok(fptr_whitelist_comm_point(
3188                                         c->callback));
3189                                 (void)(*c->callback)(c, c->cb_arg,
3190                                         NETEVENT_CLOSED, NULL);
3191                         }
3192                 }
3193                 return;
3194         }
3195         if(event&UB_EV_WRITE) {
3196                 if(!comm_point_http_handle_write(fd, c)) {
3197                         reclaim_http_handler(c);
3198                         if(!c->tcp_do_close) {
3199                                 fptr_ok(fptr_whitelist_comm_point(
3200                                         c->callback));
3201                                 (void)(*c->callback)(c, c->cb_arg,
3202                                         NETEVENT_CLOSED, NULL);
3203                         }
3204                 }
3205                 return;
3206         }
3207         log_err("Ignored event %d for httphdl.", event);
3208 }
3209
3210 void comm_point_local_handle_callback(int fd, short event, void* arg)
3211 {
3212         struct comm_point* c = (struct comm_point*)arg;
3213         log_assert(c->type == comm_local);
3214         ub_comm_base_now(c->ev->base);
3215
3216         if(event&UB_EV_READ) {
3217                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
3218                         fptr_ok(fptr_whitelist_comm_point(c->callback));
3219                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
3220                                 NULL);
3221                 }
3222                 return;
3223         }
3224         log_err("Ignored event %d for localhdl.", event);
3225 }
3226
3227 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
3228         short event, void* arg)
3229 {
3230         struct comm_point* c = (struct comm_point*)arg;
3231         int err = NETEVENT_NOERROR;
3232         log_assert(c->type == comm_raw);
3233         ub_comm_base_now(c->ev->base);
3234         
3235         if(event&UB_EV_TIMEOUT)
3236                 err = NETEVENT_TIMEOUT;
3237         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
3238         (void)(*c->callback)(c, c->cb_arg, err, NULL);
3239 }
3240
3241 struct comm_point* 
3242 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
3243         comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket)
3244 {
3245         struct comm_point* c = (struct comm_point*)calloc(1,
3246                 sizeof(struct comm_point));
3247         short evbits;
3248         if(!c)
3249                 return NULL;
3250         c->ev = (struct internal_event*)calloc(1,
3251                 sizeof(struct internal_event));
3252         if(!c->ev) {
3253                 free(c);
3254                 return NULL;
3255         }
3256         c->ev->base = base;
3257         c->fd = fd;
3258         c->buffer = buffer;
3259         c->timeout = NULL;
3260         c->tcp_is_reading = 0;
3261         c->tcp_byte_count = 0;
3262         c->tcp_parent = NULL;
3263         c->max_tcp_count = 0;
3264         c->cur_tcp_count = 0;
3265         c->tcp_handlers = NULL;
3266         c->tcp_free = NULL;
3267         c->type = comm_udp;
3268         c->tcp_do_close = 0;
3269         c->do_not_close = 0;
3270         c->tcp_do_toggle_rw = 0;
3271         c->tcp_check_nb_connect = 0;
3272 #ifdef USE_MSG_FASTOPEN
3273         c->tcp_do_fastopen = 0;
3274 #endif
3275 #ifdef USE_DNSCRYPT
3276         c->dnscrypt = 0;
3277         c->dnscrypt_buffer = buffer;
3278 #endif
3279         c->inuse = 0;
3280         c->callback = callback;
3281         c->cb_arg = callback_arg;
3282         c->socket = socket;
3283         evbits = UB_EV_READ | UB_EV_PERSIST;
3284         /* ub_event stuff */
3285         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3286                 comm_point_udp_callback, c);
3287         if(c->ev->ev == NULL) {
3288                 log_err("could not baseset udp event");
3289                 comm_point_delete(c);
3290                 return NULL;
3291         }
3292         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
3293                 log_err("could not add udp event");
3294                 comm_point_delete(c);
3295                 return NULL;
3296         }
3297         c->event_added = 1;
3298         return c;
3299 }
3300
3301 struct comm_point* 
3302 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
3303         sldns_buffer* buffer, 
3304         comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket)
3305 {
3306         struct comm_point* c = (struct comm_point*)calloc(1,
3307                 sizeof(struct comm_point));
3308         short evbits;
3309         if(!c)
3310                 return NULL;
3311         c->ev = (struct internal_event*)calloc(1,
3312                 sizeof(struct internal_event));
3313         if(!c->ev) {
3314                 free(c);
3315                 return NULL;
3316         }
3317         c->ev->base = base;
3318         c->fd = fd;
3319         c->buffer = buffer;
3320         c->timeout = NULL;
3321         c->tcp_is_reading = 0;
3322         c->tcp_byte_count = 0;
3323         c->tcp_parent = NULL;
3324         c->max_tcp_count = 0;
3325         c->cur_tcp_count = 0;
3326         c->tcp_handlers = NULL;
3327         c->tcp_free = NULL;
3328         c->type = comm_udp;
3329         c->tcp_do_close = 0;
3330         c->do_not_close = 0;
3331 #ifdef USE_DNSCRYPT
3332         c->dnscrypt = 0;
3333         c->dnscrypt_buffer = buffer;
3334 #endif
3335         c->inuse = 0;
3336         c->tcp_do_toggle_rw = 0;
3337         c->tcp_check_nb_connect = 0;
3338 #ifdef USE_MSG_FASTOPEN
3339         c->tcp_do_fastopen = 0;
3340 #endif
3341         c->callback = callback;
3342         c->cb_arg = callback_arg;
3343         c->socket = socket;
3344         evbits = UB_EV_READ | UB_EV_PERSIST;
3345         /* ub_event stuff */
3346         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3347                 comm_point_udp_ancil_callback, c);
3348         if(c->ev->ev == NULL) {
3349                 log_err("could not baseset udp event");
3350                 comm_point_delete(c);
3351                 return NULL;
3352         }
3353         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
3354                 log_err("could not add udp event");
3355                 comm_point_delete(c);
3356                 return NULL;
3357         }
3358         c->event_added = 1;
3359         return c;
3360 }
3361
3362 static struct comm_point* 
3363 comm_point_create_tcp_handler(struct comm_base *base, 
3364         struct comm_point* parent, size_t bufsize,
3365         struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
3366         void* callback_arg, struct unbound_socket* socket)
3367 {
3368         struct comm_point* c = (struct comm_point*)calloc(1,
3369                 sizeof(struct comm_point));
3370         short evbits;
3371         if(!c)
3372                 return NULL;
3373         c->ev = (struct internal_event*)calloc(1,
3374                 sizeof(struct internal_event));
3375         if(!c->ev) {
3376                 free(c);
3377                 return NULL;
3378         }
3379         c->ev->base = base;
3380         c->fd = -1;
3381         c->buffer = sldns_buffer_new(bufsize);
3382         if(!c->buffer) {
3383                 free(c->ev);
3384                 free(c);
3385                 return NULL;
3386         }
3387         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
3388         if(!c->timeout) {
3389                 sldns_buffer_free(c->buffer);
3390                 free(c->ev);
3391                 free(c);
3392                 return NULL;
3393         }
3394         c->tcp_is_reading = 0;
3395         c->tcp_byte_count = 0;
3396         c->tcp_parent = parent;
3397         c->tcp_timeout_msec = parent->tcp_timeout_msec;
3398         c->tcp_conn_limit = parent->tcp_conn_limit;
3399         c->tcl_addr = NULL;
3400         c->tcp_keepalive = 0;
3401         c->max_tcp_count = 0;
3402         c->cur_tcp_count = 0;
3403         c->tcp_handlers = NULL;
3404         c->tcp_free = NULL;
3405         c->type = comm_tcp;
3406         c->tcp_do_close = 0;
3407         c->do_not_close = 0;
3408         c->tcp_do_toggle_rw = 1;
3409         c->tcp_check_nb_connect = 0;
3410 #ifdef USE_MSG_FASTOPEN
3411         c->tcp_do_fastopen = 0;
3412 #endif
3413 #ifdef USE_DNSCRYPT
3414         c->dnscrypt = 0;
3415         /* We don't know just yet if this is a dnscrypt channel. Allocation
3416          * will be done when handling the callback. */
3417         c->dnscrypt_buffer = c->buffer;
3418 #endif
3419         c->repinfo.c = c;
3420         c->callback = callback;
3421         c->cb_arg = callback_arg;
3422         c->socket = socket;
3423         if(spoolbuf) {
3424                 c->tcp_req_info = tcp_req_info_create(spoolbuf);
3425                 if(!c->tcp_req_info) {
3426                         log_err("could not create tcp commpoint");
3427                         sldns_buffer_free(c->buffer);
3428                         free(c->timeout);
3429                         free(c->ev);
3430                         free(c);
3431                         return NULL;
3432                 }
3433                 c->tcp_req_info->cp = c;
3434                 c->tcp_do_close = 1;
3435                 c->tcp_do_toggle_rw = 0;
3436         }
3437         /* add to parent free list */
3438         c->tcp_free = parent->tcp_free;
3439         parent->tcp_free = c;
3440         /* ub_event stuff */
3441         evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
3442         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3443                 comm_point_tcp_handle_callback, c);
3444         if(c->ev->ev == NULL)
3445         {
3446                 log_err("could not basetset tcphdl event");
3447                 parent->tcp_free = c->tcp_free;
3448                 tcp_req_info_delete(c->tcp_req_info);
3449                 sldns_buffer_free(c->buffer);
3450                 free(c->timeout);
3451                 free(c->ev);
3452                 free(c);
3453                 return NULL;
3454         }
3455         return c;
3456 }
3457
3458 static struct comm_point* 
3459 comm_point_create_http_handler(struct comm_base *base, 
3460         struct comm_point* parent, size_t bufsize, int harden_large_queries,
3461         uint32_t http_max_streams, char* http_endpoint,
3462         comm_point_callback_type* callback, void* callback_arg,
3463         struct unbound_socket* socket)
3464 {
3465         struct comm_point* c = (struct comm_point*)calloc(1,
3466                 sizeof(struct comm_point));
3467         short evbits;
3468         if(!c)
3469                 return NULL;
3470         c->ev = (struct internal_event*)calloc(1,
3471                 sizeof(struct internal_event));
3472         if(!c->ev) {
3473                 free(c);
3474                 return NULL;
3475         }
3476         c->ev->base = base;
3477         c->fd = -1;
3478         c->buffer = sldns_buffer_new(bufsize);
3479         if(!c->buffer) {
3480                 free(c->ev);
3481                 free(c);
3482                 return NULL;
3483         }
3484         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
3485         if(!c->timeout) {
3486                 sldns_buffer_free(c->buffer);
3487                 free(c->ev);
3488                 free(c);
3489                 return NULL;
3490         }
3491         c->tcp_is_reading = 0;
3492         c->tcp_byte_count = 0;
3493         c->tcp_parent = parent;
3494         c->tcp_timeout_msec = parent->tcp_timeout_msec;
3495         c->tcp_conn_limit = parent->tcp_conn_limit;
3496         c->tcl_addr = NULL;
3497         c->tcp_keepalive = 0;
3498         c->max_tcp_count = 0;
3499         c->cur_tcp_count = 0;
3500         c->tcp_handlers = NULL;
3501         c->tcp_free = NULL;
3502         c->type = comm_http;
3503         c->tcp_do_close = 1;
3504         c->do_not_close = 0;
3505         c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */
3506         c->tcp_check_nb_connect = 0;
3507 #ifdef USE_MSG_FASTOPEN
3508         c->tcp_do_fastopen = 0;
3509 #endif
3510 #ifdef USE_DNSCRYPT
3511         c->dnscrypt = 0;
3512         c->dnscrypt_buffer = NULL;
3513 #endif
3514         c->repinfo.c = c;
3515         c->callback = callback;
3516         c->cb_arg = callback_arg;
3517         c->socket = socket;
3518
3519         c->http_min_version = http_version_2;
3520         c->http2_stream_max_qbuffer_size = bufsize;
3521         if(harden_large_queries && bufsize > 512)
3522                 c->http2_stream_max_qbuffer_size = 512;
3523         c->http2_max_streams = http_max_streams;
3524         if(!(c->http_endpoint = strdup(http_endpoint))) {
3525                 log_err("could not strdup http_endpoint");
3526                 sldns_buffer_free(c->buffer);
3527                 free(c->timeout);
3528                 free(c->ev);
3529                 free(c);
3530                 return NULL;
3531         }
3532         c->use_h2 = 0;
3533 #ifdef HAVE_NGHTTP2
3534         if(!(c->h2_session = http2_session_create(c))) {
3535                 log_err("could not create http2 session");
3536                 free(c->http_endpoint);
3537                 sldns_buffer_free(c->buffer);
3538                 free(c->timeout);
3539                 free(c->ev);
3540                 free(c);
3541                 return NULL;
3542         }
3543         if(!(c->h2_session->callbacks = http2_req_callbacks_create())) {
3544                 log_err("could not create http2 callbacks");
3545                 http2_session_delete(c->h2_session);
3546                 free(c->http_endpoint);
3547                 sldns_buffer_free(c->buffer);
3548                 free(c->timeout);
3549                 free(c->ev);
3550                 free(c);
3551                 return NULL;
3552         }
3553 #endif
3554         
3555         /* add to parent free list */
3556         c->tcp_free = parent->tcp_free;
3557         parent->tcp_free = c;
3558         /* ub_event stuff */
3559         evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
3560         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3561                 comm_point_http_handle_callback, c);
3562         if(c->ev->ev == NULL)
3563         {
3564                 log_err("could not set http handler event");
3565                 parent->tcp_free = c->tcp_free;
3566                 http2_session_delete(c->h2_session);
3567                 sldns_buffer_free(c->buffer);
3568                 free(c->timeout);
3569                 free(c->ev);
3570                 free(c);
3571                 return NULL;
3572         }
3573         return c;
3574 }
3575
3576 struct comm_point* 
3577 comm_point_create_tcp(struct comm_base *base, int fd, int num,
3578         int idle_timeout, int harden_large_queries,
3579         uint32_t http_max_streams, char* http_endpoint,
3580         struct tcl_list* tcp_conn_limit, size_t bufsize,
3581         struct sldns_buffer* spoolbuf, enum listen_type port_type,
3582         comm_point_callback_type* callback, void* callback_arg, struct unbound_socket* socket)
3583 {
3584         struct comm_point* c = (struct comm_point*)calloc(1,
3585                 sizeof(struct comm_point));
3586         short evbits;
3587         int i;
3588         /* first allocate the TCP accept listener */
3589         if(!c)
3590                 return NULL;
3591         c->ev = (struct internal_event*)calloc(1,
3592                 sizeof(struct internal_event));
3593         if(!c->ev) {
3594                 free(c);
3595                 return NULL;
3596         }
3597         c->ev->base = base;
3598         c->fd = fd;
3599         c->buffer = NULL;
3600         c->timeout = NULL;
3601         c->tcp_is_reading = 0;
3602         c->tcp_byte_count = 0;
3603         c->tcp_timeout_msec = idle_timeout;
3604         c->tcp_conn_limit = tcp_conn_limit;
3605         c->tcl_addr = NULL;
3606         c->tcp_keepalive = 0;
3607         c->tcp_parent = NULL;
3608         c->max_tcp_count = num;
3609         c->cur_tcp_count = 0;
3610         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
3611                 sizeof(struct comm_point*));
3612         if(!c->tcp_handlers) {
3613                 free(c->ev);
3614                 free(c);
3615                 return NULL;
3616         }
3617         c->tcp_free = NULL;
3618         c->type = comm_tcp_accept;
3619         c->tcp_do_close = 0;
3620         c->do_not_close = 0;
3621         c->tcp_do_toggle_rw = 0;
3622         c->tcp_check_nb_connect = 0;
3623 #ifdef USE_MSG_FASTOPEN
3624         c->tcp_do_fastopen = 0;
3625 #endif
3626 #ifdef USE_DNSCRYPT
3627         c->dnscrypt = 0;
3628         c->dnscrypt_buffer = NULL;
3629 #endif
3630         c->callback = NULL;
3631         c->cb_arg = NULL;
3632         c->socket = socket;
3633         evbits = UB_EV_READ | UB_EV_PERSIST;
3634         /* ub_event stuff */
3635         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3636                 comm_point_tcp_accept_callback, c);
3637         if(c->ev->ev == NULL) {
3638                 log_err("could not baseset tcpacc event");
3639                 comm_point_delete(c);
3640                 return NULL;
3641         }
3642         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
3643                 log_err("could not add tcpacc event");
3644                 comm_point_delete(c);
3645                 return NULL;
3646         }
3647         c->event_added = 1;
3648         /* now prealloc the handlers */
3649         for(i=0; i<num; i++) {
3650                 if(port_type == listen_type_tcp ||
3651                         port_type == listen_type_ssl ||
3652                         port_type == listen_type_tcp_dnscrypt) {
3653                         c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
3654                                 c, bufsize, spoolbuf, callback, callback_arg, socket);
3655                 } else if(port_type == listen_type_http) {
3656                         c->tcp_handlers[i] = comm_point_create_http_handler(
3657                                 base, c, bufsize, harden_large_queries,
3658                                 http_max_streams, http_endpoint,
3659                                 callback, callback_arg, socket);
3660                 }
3661                 else {
3662                         log_err("could not create tcp handler, unknown listen "
3663                                 "type");
3664                         return NULL;
3665                 }
3666                 if(!c->tcp_handlers[i]) {
3667                         comm_point_delete(c);
3668                         return NULL;
3669                 }
3670         }
3671         
3672         return c;
3673 }
3674
3675 struct comm_point* 
3676 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
3677         comm_point_callback_type* callback, void* callback_arg)
3678 {
3679         struct comm_point* c = (struct comm_point*)calloc(1,
3680                 sizeof(struct comm_point));
3681         short evbits;
3682         if(!c)
3683                 return NULL;
3684         c->ev = (struct internal_event*)calloc(1,
3685                 sizeof(struct internal_event));
3686         if(!c->ev) {
3687                 free(c);
3688                 return NULL;
3689         }
3690         c->ev->base = base;
3691         c->fd = -1;
3692         c->buffer = sldns_buffer_new(bufsize);
3693         if(!c->buffer) {
3694                 free(c->ev);
3695                 free(c);
3696                 return NULL;
3697         }
3698         c->timeout = NULL;
3699         c->tcp_is_reading = 0;
3700         c->tcp_byte_count = 0;
3701         c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
3702         c->tcp_conn_limit = NULL;
3703         c->tcl_addr = NULL;
3704         c->tcp_keepalive = 0;
3705         c->tcp_parent = NULL;
3706         c->max_tcp_count = 0;
3707         c->cur_tcp_count = 0;
3708         c->tcp_handlers = NULL;
3709         c->tcp_free = NULL;
3710         c->type = comm_tcp;
3711         c->tcp_do_close = 0;
3712         c->do_not_close = 0;
3713         c->tcp_do_toggle_rw = 1;
3714         c->tcp_check_nb_connect = 1;
3715 #ifdef USE_MSG_FASTOPEN
3716         c->tcp_do_fastopen = 1;
3717 #endif
3718 #ifdef USE_DNSCRYPT
3719         c->dnscrypt = 0;
3720         c->dnscrypt_buffer = c->buffer;
3721 #endif
3722         c->repinfo.c = c;
3723         c->callback = callback;
3724         c->cb_arg = callback_arg;
3725         evbits = UB_EV_PERSIST | UB_EV_WRITE;
3726         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3727                 comm_point_tcp_handle_callback, c);
3728         if(c->ev->ev == NULL)
3729         {
3730                 log_err("could not baseset tcpout event");
3731                 sldns_buffer_free(c->buffer);
3732                 free(c->ev);
3733                 free(c);
3734                 return NULL;
3735         }
3736
3737         return c;
3738 }
3739
3740 struct comm_point* 
3741 comm_point_create_http_out(struct comm_base *base, size_t bufsize,
3742         comm_point_callback_type* callback, void* callback_arg,
3743         sldns_buffer* temp)
3744 {
3745         struct comm_point* c = (struct comm_point*)calloc(1,
3746                 sizeof(struct comm_point));
3747         short evbits;
3748         if(!c)
3749                 return NULL;
3750         c->ev = (struct internal_event*)calloc(1,
3751                 sizeof(struct internal_event));
3752         if(!c->ev) {
3753                 free(c);
3754                 return NULL;
3755         }
3756         c->ev->base = base;
3757         c->fd = -1;
3758         c->buffer = sldns_buffer_new(bufsize);
3759         if(!c->buffer) {
3760                 free(c->ev);
3761                 free(c);
3762                 return NULL;
3763         }
3764         c->timeout = NULL;
3765         c->tcp_is_reading = 0;
3766         c->tcp_byte_count = 0;
3767         c->tcp_parent = NULL;
3768         c->max_tcp_count = 0;
3769         c->cur_tcp_count = 0;
3770         c->tcp_handlers = NULL;
3771         c->tcp_free = NULL;
3772         c->type = comm_http;
3773         c->tcp_do_close = 0;
3774         c->do_not_close = 0;
3775         c->tcp_do_toggle_rw = 1;
3776         c->tcp_check_nb_connect = 1;
3777         c->http_in_headers = 1;
3778         c->http_in_chunk_headers = 0;
3779         c->http_is_chunked = 0;
3780         c->http_temp = temp;
3781 #ifdef USE_MSG_FASTOPEN
3782         c->tcp_do_fastopen = 1;
3783 #endif
3784 #ifdef USE_DNSCRYPT
3785         c->dnscrypt = 0;
3786         c->dnscrypt_buffer = c->buffer;
3787 #endif
3788         c->repinfo.c = c;
3789         c->callback = callback;
3790         c->cb_arg = callback_arg;
3791         evbits = UB_EV_PERSIST | UB_EV_WRITE;
3792         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3793                 comm_point_http_handle_callback, c);
3794         if(c->ev->ev == NULL)
3795         {
3796                 log_err("could not baseset tcpout event");
3797 #ifdef HAVE_SSL
3798                 SSL_free(c->ssl);
3799 #endif
3800                 sldns_buffer_free(c->buffer);
3801                 free(c->ev);
3802                 free(c);
3803                 return NULL;
3804         }
3805
3806         return c;
3807 }
3808
3809 struct comm_point* 
3810 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
3811         comm_point_callback_type* callback, void* callback_arg)
3812 {
3813         struct comm_point* c = (struct comm_point*)calloc(1,
3814                 sizeof(struct comm_point));
3815         short evbits;
3816         if(!c)
3817                 return NULL;
3818         c->ev = (struct internal_event*)calloc(1,
3819                 sizeof(struct internal_event));
3820         if(!c->ev) {
3821                 free(c);
3822                 return NULL;
3823         }
3824         c->ev->base = base;
3825         c->fd = fd;
3826         c->buffer = sldns_buffer_new(bufsize);
3827         if(!c->buffer) {
3828                 free(c->ev);
3829                 free(c);
3830                 return NULL;
3831         }
3832         c->timeout = NULL;
3833         c->tcp_is_reading = 1;
3834         c->tcp_byte_count = 0;
3835         c->tcp_parent = NULL;
3836         c->max_tcp_count = 0;
3837         c->cur_tcp_count = 0;
3838         c->tcp_handlers = NULL;
3839         c->tcp_free = NULL;
3840         c->type = comm_local;
3841         c->tcp_do_close = 0;
3842         c->do_not_close = 1;
3843         c->tcp_do_toggle_rw = 0;
3844         c->tcp_check_nb_connect = 0;
3845 #ifdef USE_MSG_FASTOPEN
3846         c->tcp_do_fastopen = 0;
3847 #endif
3848 #ifdef USE_DNSCRYPT
3849         c->dnscrypt = 0;
3850         c->dnscrypt_buffer = c->buffer;
3851 #endif
3852         c->callback = callback;
3853         c->cb_arg = callback_arg;
3854         /* ub_event stuff */
3855         evbits = UB_EV_PERSIST | UB_EV_READ;
3856         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3857                 comm_point_local_handle_callback, c);
3858         if(c->ev->ev == NULL) {
3859                 log_err("could not baseset localhdl event");
3860                 free(c->ev);
3861                 free(c);
3862                 return NULL;
3863         }
3864         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
3865                 log_err("could not add localhdl event");
3866                 ub_event_free(c->ev->ev);
3867                 free(c->ev);
3868                 free(c);
3869                 return NULL;
3870         }
3871         c->event_added = 1;
3872         return c;
3873 }
3874
3875 struct comm_point* 
3876 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
3877         comm_point_callback_type* callback, void* callback_arg)
3878 {
3879         struct comm_point* c = (struct comm_point*)calloc(1,
3880                 sizeof(struct comm_point));
3881         short evbits;
3882         if(!c)
3883                 return NULL;
3884         c->ev = (struct internal_event*)calloc(1,
3885                 sizeof(struct internal_event));
3886         if(!c->ev) {
3887                 free(c);
3888                 return NULL;
3889         }
3890         c->ev->base = base;
3891         c->fd = fd;
3892         c->buffer = NULL;
3893         c->timeout = NULL;
3894         c->tcp_is_reading = 0;
3895         c->tcp_byte_count = 0;
3896         c->tcp_parent = NULL;
3897         c->max_tcp_count = 0;
3898         c->cur_tcp_count = 0;
3899         c->tcp_handlers = NULL;
3900         c->tcp_free = NULL;
3901         c->type = comm_raw;
3902         c->tcp_do_close = 0;
3903         c->do_not_close = 1;
3904         c->tcp_do_toggle_rw = 0;
3905         c->tcp_check_nb_connect = 0;
3906 #ifdef USE_MSG_FASTOPEN
3907         c->tcp_do_fastopen = 0;
3908 #endif
3909 #ifdef USE_DNSCRYPT
3910         c->dnscrypt = 0;
3911         c->dnscrypt_buffer = c->buffer;
3912 #endif
3913         c->callback = callback;
3914         c->cb_arg = callback_arg;
3915         /* ub_event stuff */
3916         if(writing)
3917                 evbits = UB_EV_PERSIST | UB_EV_WRITE;
3918         else    evbits = UB_EV_PERSIST | UB_EV_READ;
3919         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3920                 comm_point_raw_handle_callback, c);
3921         if(c->ev->ev == NULL) {
3922                 log_err("could not baseset rawhdl event");
3923                 free(c->ev);
3924                 free(c);
3925                 return NULL;
3926         }
3927         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
3928                 log_err("could not add rawhdl event");
3929                 ub_event_free(c->ev->ev);
3930                 free(c->ev);
3931                 free(c);
3932                 return NULL;
3933         }
3934         c->event_added = 1;
3935         return c;
3936 }
3937
3938 void 
3939 comm_point_close(struct comm_point* c)
3940 {
3941         if(!c)
3942                 return;
3943         if(c->fd != -1) {
3944                 verbose(5, "comm_point_close of %d: event_del", c->fd);
3945                 if(c->event_added) {
3946                         if(ub_event_del(c->ev->ev) != 0) {
3947                                 log_err("could not event_del on close");
3948                         }
3949                         c->event_added = 0;
3950                 }
3951         }
3952         tcl_close_connection(c->tcl_addr);
3953         if(c->tcp_req_info)
3954                 tcp_req_info_clear(c->tcp_req_info);
3955         if(c->h2_session)
3956                 http2_session_server_delete(c->h2_session);
3957
3958         /* close fd after removing from event lists, or epoll.. is messed up */
3959         if(c->fd != -1 && !c->do_not_close) {
3960 #ifdef USE_WINSOCK
3961                 if(c->type == comm_tcp || c->type == comm_http) {
3962                         /* delete sticky events for the fd, it gets closed */
3963                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3964                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3965                 }
3966 #endif
3967                 verbose(VERB_ALGO, "close fd %d", c->fd);
3968                 sock_close(c->fd);
3969         }
3970         c->fd = -1;
3971 }
3972
3973 void 
3974 comm_point_delete(struct comm_point* c)
3975 {
3976         if(!c) 
3977                 return;
3978         if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
3979 #ifdef HAVE_SSL
3980                 SSL_shutdown(c->ssl);
3981                 SSL_free(c->ssl);
3982 #endif
3983         }
3984         if(c->type == comm_http && c->http_endpoint) {
3985                 free(c->http_endpoint);
3986                 c->http_endpoint = NULL;
3987         }
3988         comm_point_close(c);
3989         if(c->tcp_handlers) {
3990                 int i;
3991                 for(i=0; i<c->max_tcp_count; i++)
3992                         comm_point_delete(c->tcp_handlers[i]);
3993                 free(c->tcp_handlers);
3994         }
3995         free(c->timeout);
3996         if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
3997                 sldns_buffer_free(c->buffer);
3998 #ifdef USE_DNSCRYPT
3999                 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
4000                         sldns_buffer_free(c->dnscrypt_buffer);
4001                 }
4002 #endif
4003                 if(c->tcp_req_info) {
4004                         tcp_req_info_delete(c->tcp_req_info);
4005                 }
4006                 if(c->h2_session) {
4007                         http2_session_delete(c->h2_session);
4008                 }
4009         }
4010         ub_event_free(c->ev->ev);
4011         free(c->ev);
4012         free(c);
4013 }
4014
4015 void 
4016 comm_point_send_reply(struct comm_reply *repinfo)
4017 {
4018         struct sldns_buffer* buffer;
4019         log_assert(repinfo && repinfo->c);
4020 #ifdef USE_DNSCRYPT
4021         buffer = repinfo->c->dnscrypt_buffer;
4022         if(!dnsc_handle_uncurved_request(repinfo)) {
4023                 return;
4024         }
4025 #else
4026         buffer = repinfo->c->buffer;
4027 #endif
4028         if(repinfo->c->type == comm_udp) {
4029                 if(repinfo->srctype)
4030                         comm_point_send_udp_msg_if(repinfo->c, 
4031                         buffer, (struct sockaddr*)&repinfo->addr, 
4032                         repinfo->addrlen, repinfo);
4033                 else
4034                         comm_point_send_udp_msg(repinfo->c, buffer,
4035                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen, 0);
4036 #ifdef USE_DNSTAP
4037                 /*
4038                  * sending src (client)/dst (local service) addresses over DNSTAP from udp callback
4039                  */
4040                 if(repinfo->c->dtenv != NULL && repinfo->c->dtenv->log_client_response_messages) {
4041                         log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen);
4042                         log_addr(VERB_ALGO, "response to client", &repinfo->addr, repinfo->addrlen);
4043                         dt_msg_send_client_response(repinfo->c->dtenv, &repinfo->addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type, repinfo->c->buffer);
4044                 }
4045 #endif
4046         } else {
4047 #ifdef USE_DNSTAP
4048                 /*
4049                  * sending src (client)/dst (local service) addresses over DNSTAP from TCP callback
4050                  */
4051                 if(repinfo->c->tcp_parent->dtenv != NULL && repinfo->c->tcp_parent->dtenv->log_client_response_messages) {
4052                         log_addr(VERB_ALGO, "from local addr", (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->socket->addr->ai_addrlen);
4053                         log_addr(VERB_ALGO, "response to client", &repinfo->addr, repinfo->addrlen);
4054                         dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv, &repinfo->addr, (void*)repinfo->c->socket->addr->ai_addr, repinfo->c->type,
4055                                 ( repinfo->c->tcp_req_info? repinfo->c->tcp_req_info->spool_buffer: repinfo->c->buffer ));
4056                 }
4057 #endif
4058                 if(repinfo->c->tcp_req_info) {
4059                         tcp_req_info_send_reply(repinfo->c->tcp_req_info);
4060                 } else if(repinfo->c->use_h2) {
4061                         if(!http2_submit_dns_response(repinfo->c->h2_session)) {
4062                                 comm_point_drop_reply(repinfo);
4063                                 return;
4064                         }
4065                         repinfo->c->h2_stream = NULL;
4066                         repinfo->c->tcp_is_reading = 0;
4067                         comm_point_stop_listening(repinfo->c);
4068                         comm_point_start_listening(repinfo->c, -1,
4069                                 adjusted_tcp_timeout(repinfo->c));
4070                         return;
4071                 } else {
4072                         comm_point_start_listening(repinfo->c, -1,
4073                                 adjusted_tcp_timeout(repinfo->c));
4074                 }
4075         }
4076 }
4077
4078 void 
4079 comm_point_drop_reply(struct comm_reply* repinfo)
4080 {
4081         if(!repinfo)
4082                 return;
4083         log_assert(repinfo->c);
4084         log_assert(repinfo->c->type != comm_tcp_accept);
4085         if(repinfo->c->type == comm_udp)
4086                 return;
4087         if(repinfo->c->tcp_req_info)
4088                 repinfo->c->tcp_req_info->is_drop = 1;
4089         if(repinfo->c->type == comm_http) {
4090                 if(repinfo->c->h2_session) {
4091                         repinfo->c->h2_session->is_drop = 1;
4092                         if(!repinfo->c->h2_session->postpone_drop)
4093                                 reclaim_http_handler(repinfo->c);
4094                         return;
4095                 }
4096                 reclaim_http_handler(repinfo->c);
4097                 return;
4098         }
4099         reclaim_tcp_handler(repinfo->c);
4100 }
4101
4102 void 
4103 comm_point_stop_listening(struct comm_point* c)
4104 {
4105         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
4106         if(c->event_added) {
4107                 if(ub_event_del(c->ev->ev) != 0) {
4108                         log_err("event_del error to stoplisten");
4109                 }
4110                 c->event_added = 0;
4111         }
4112 }
4113
4114 void 
4115 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
4116 {
4117         verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 
4118                 c->fd==-1?newfd:c->fd, msec);
4119         if(c->type == comm_tcp_accept && !c->tcp_free) {
4120                 /* no use to start listening no free slots. */
4121                 return;
4122         }
4123         if(c->event_added) {
4124                 if(ub_event_del(c->ev->ev) != 0) {
4125                         log_err("event_del error to startlisten");
4126                 }
4127                 c->event_added = 0;
4128         }
4129         if(msec != -1 && msec != 0) {
4130                 if(!c->timeout) {
4131                         c->timeout = (struct timeval*)malloc(sizeof(
4132                                 struct timeval));
4133                         if(!c->timeout) {
4134                                 log_err("cpsl: malloc failed. No net read.");
4135                                 return;
4136                         }
4137                 }
4138                 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
4139 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
4140                 c->timeout->tv_sec = msec/1000;
4141                 c->timeout->tv_usec = (msec%1000)*1000;
4142 #endif /* S_SPLINT_S */
4143         }
4144         if(c->type == comm_tcp || c->type == comm_http) {
4145                 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
4146                 if(c->tcp_write_and_read) {
4147                         verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd));
4148                         ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
4149                 } else if(c->tcp_is_reading) {
4150                         verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd));
4151                         ub_event_add_bits(c->ev->ev, UB_EV_READ);
4152                 } else  {
4153                         verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd));
4154                         ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
4155                 }
4156         }
4157         if(newfd != -1) {
4158                 if(c->fd != -1 && c->fd != newfd) {
4159                         verbose(5, "cpsl close of fd %d for %d", c->fd, newfd);
4160                         sock_close(c->fd);
4161                 }
4162                 c->fd = newfd;
4163                 ub_event_set_fd(c->ev->ev, c->fd);
4164         }
4165         if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
4166                 log_err("event_add failed. in cpsl.");
4167         }
4168         c->event_added = 1;
4169 }
4170
4171 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
4172 {
4173         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
4174         if(c->event_added) {
4175                 if(ub_event_del(c->ev->ev) != 0) {
4176                         log_err("event_del error to cplf");
4177                 }
4178                 c->event_added = 0;
4179         }
4180         ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
4181         if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
4182         if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
4183         if(ub_event_add(c->ev->ev, c->timeout) != 0) {
4184                 log_err("event_add failed. in cplf.");
4185         }
4186         c->event_added = 1;
4187 }
4188
4189 size_t comm_point_get_mem(struct comm_point* c)
4190 {
4191         size_t s;
4192         if(!c) 
4193                 return 0;
4194         s = sizeof(*c) + sizeof(*c->ev);
4195         if(c->timeout) 
4196                 s += sizeof(*c->timeout);
4197         if(c->type == comm_tcp || c->type == comm_local) {
4198                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
4199 #ifdef USE_DNSCRYPT
4200                 s += sizeof(*c->dnscrypt_buffer);
4201                 if(c->buffer != c->dnscrypt_buffer) {
4202                         s += sldns_buffer_capacity(c->dnscrypt_buffer);
4203                 }
4204 #endif
4205         }
4206         if(c->type == comm_tcp_accept) {
4207                 int i;
4208                 for(i=0; i<c->max_tcp_count; i++)
4209                         s += comm_point_get_mem(c->tcp_handlers[i]);
4210         }
4211         return s;
4212 }
4213
4214 struct comm_timer* 
4215 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
4216 {
4217         struct internal_timer *tm = (struct internal_timer*)calloc(1,
4218                 sizeof(struct internal_timer));
4219         if(!tm) {
4220                 log_err("malloc failed");
4221                 return NULL;
4222         }
4223         tm->super.ev_timer = tm;
4224         tm->base = base;
4225         tm->super.callback = cb;
4226         tm->super.cb_arg = cb_arg;
4227         tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 
4228                 comm_timer_callback, &tm->super);
4229         if(tm->ev == NULL) {
4230                 log_err("timer_create: event_base_set failed.");
4231                 free(tm);
4232                 return NULL;
4233         }
4234         return &tm->super;
4235 }
4236
4237 void 
4238 comm_timer_disable(struct comm_timer* timer)
4239 {
4240         if(!timer)
4241                 return;
4242         ub_timer_del(timer->ev_timer->ev);
4243         timer->ev_timer->enabled = 0;
4244 }
4245
4246 void 
4247 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
4248 {
4249         log_assert(tv);
4250         if(timer->ev_timer->enabled)
4251                 comm_timer_disable(timer);
4252         if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
4253                 comm_timer_callback, timer, tv) != 0)
4254                 log_err("comm_timer_set: evtimer_add failed.");
4255         timer->ev_timer->enabled = 1;
4256 }
4257
4258 void 
4259 comm_timer_delete(struct comm_timer* timer)
4260 {
4261         if(!timer)
4262                 return;
4263         comm_timer_disable(timer);
4264         /* Free the sub struct timer->ev_timer derived from the super struct timer.
4265          * i.e. assert(timer == timer->ev_timer)
4266          */
4267         ub_event_free(timer->ev_timer->ev);
4268         free(timer->ev_timer);
4269 }
4270
4271 void 
4272 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
4273 {
4274         struct comm_timer* tm = (struct comm_timer*)arg;
4275         if(!(event&UB_EV_TIMEOUT))
4276                 return;
4277         ub_comm_base_now(tm->ev_timer->base);
4278         tm->ev_timer->enabled = 0;
4279         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
4280         (*tm->callback)(tm->cb_arg);
4281 }
4282
4283 int 
4284 comm_timer_is_set(struct comm_timer* timer)
4285 {
4286         return (int)timer->ev_timer->enabled;
4287 }
4288
4289 size_t 
4290 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer))
4291 {
4292         return sizeof(struct internal_timer);
4293 }
4294
4295 struct comm_signal* 
4296 comm_signal_create(struct comm_base* base,
4297         void (*callback)(int, void*), void* cb_arg)
4298 {
4299         struct comm_signal* com = (struct comm_signal*)malloc(
4300                 sizeof(struct comm_signal));
4301         if(!com) {
4302                 log_err("malloc failed");
4303                 return NULL;
4304         }
4305         com->base = base;
4306         com->callback = callback;
4307         com->cb_arg = cb_arg;
4308         com->ev_signal = NULL;
4309         return com;
4310 }
4311
4312 void 
4313 comm_signal_callback(int sig, short event, void* arg)
4314 {
4315         struct comm_signal* comsig = (struct comm_signal*)arg;
4316         if(!(event & UB_EV_SIGNAL))
4317                 return;
4318         ub_comm_base_now(comsig->base);
4319         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
4320         (*comsig->callback)(sig, comsig->cb_arg);
4321 }
4322
4323 int 
4324 comm_signal_bind(struct comm_signal* comsig, int sig)
4325 {
4326         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
4327                 sizeof(struct internal_signal));
4328         if(!entry) {
4329                 log_err("malloc failed");
4330                 return 0;
4331         }
4332         log_assert(comsig);
4333         /* add signal event */
4334         entry->ev = ub_signal_new(comsig->base->eb->base, sig,
4335                 comm_signal_callback, comsig);
4336         if(entry->ev == NULL) {
4337                 log_err("Could not create signal event");
4338                 free(entry);
4339                 return 0;
4340         }
4341         if(ub_signal_add(entry->ev, NULL) != 0) {
4342                 log_err("Could not add signal handler");
4343                 ub_event_free(entry->ev);
4344                 free(entry);
4345                 return 0;
4346         }
4347         /* link into list */
4348         entry->next = comsig->ev_signal;
4349         comsig->ev_signal = entry;
4350         return 1;
4351 }
4352
4353 void 
4354 comm_signal_delete(struct comm_signal* comsig)
4355 {
4356         struct internal_signal* p, *np;
4357         if(!comsig)
4358                 return;
4359         p=comsig->ev_signal;
4360         while(p) {
4361                 np = p->next;
4362                 ub_signal_del(p->ev);
4363                 ub_event_free(p->ev);
4364                 free(p);
4365                 p = np;
4366         }
4367         free(comsig);
4368 }