]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/util/netevent.c
MFV r368746:
[FreeBSD/FreeBSD.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/ub_event.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/tcp_conn_limit.h"
47 #include "util/fptr_wlist.h"
48 #include "sldns/pkthdr.h"
49 #include "sldns/sbuffer.h"
50 #include "sldns/str2wire.h"
51 #include "dnstap/dnstap.h"
52 #include "dnscrypt/dnscrypt.h"
53 #include "services/listen_dnsport.h"
54 #ifdef HAVE_OPENSSL_SSL_H
55 #include <openssl/ssl.h>
56 #endif
57 #ifdef HAVE_OPENSSL_ERR_H
58 #include <openssl/err.h>
59 #endif
60
61 /* -------- Start of local definitions -------- */
62 /** if CMSG_ALIGN is not defined on this platform, a workaround */
63 #ifndef CMSG_ALIGN
64 #  ifdef __CMSG_ALIGN
65 #    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
66 #  elif defined(CMSG_DATA_ALIGN)
67 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
68 #  else
69 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
70 #  endif
71 #endif
72
73 /** if CMSG_LEN is not defined on this platform, a workaround */
74 #ifndef CMSG_LEN
75 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
76 #endif
77
78 /** if CMSG_SPACE is not defined on this platform, a workaround */
79 #ifndef CMSG_SPACE
80 #  ifdef _CMSG_HDR_ALIGN
81 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
82 #  else
83 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
84 #  endif
85 #endif
86
87 /** The TCP writing query timeout in milliseconds */
88 #define TCP_QUERY_TIMEOUT 120000
89 /** The minimum actual TCP timeout to use, regardless of what we advertise,
90  * in msec */
91 #define TCP_QUERY_TIMEOUT_MINIMUM 200
92
93 #ifndef NONBLOCKING_IS_BROKEN
94 /** number of UDP reads to perform per read indication from select */
95 #define NUM_UDP_PER_SELECT 100
96 #else
97 #define NUM_UDP_PER_SELECT 1
98 #endif
99
100 /**
101  * The internal event structure for keeping ub_event info for the event.
102  * Possibly other structures (list, tree) this is part of.
103  */
104 struct internal_event {
105         /** the comm base */
106         struct comm_base* base;
107         /** ub_event event type */
108         struct ub_event* ev;
109 };
110
111 /**
112  * Internal base structure, so that every thread has its own events.
113  */
114 struct internal_base {
115         /** ub_event event_base type. */
116         struct ub_event_base* base;
117         /** seconds time pointer points here */
118         time_t secs;
119         /** timeval with current time */
120         struct timeval now;
121         /** the event used for slow_accept timeouts */
122         struct ub_event* slow_accept;
123         /** true if slow_accept is enabled */
124         int slow_accept_enabled;
125 };
126
127 /**
128  * Internal timer structure, to store timer event in.
129  */
130 struct internal_timer {
131         /** the super struct from which derived */
132         struct comm_timer super;
133         /** the comm base */
134         struct comm_base* base;
135         /** ub_event event type */
136         struct ub_event* ev;
137         /** is timer enabled */
138         uint8_t enabled;
139 };
140
141 /**
142  * Internal signal structure, to store signal event in.
143  */
144 struct internal_signal {
145         /** ub_event event type */
146         struct ub_event* ev;
147         /** next in signal list */
148         struct internal_signal* next;
149 };
150
151 /** create a tcp handler with a parent */
152 static struct comm_point* comm_point_create_tcp_handler(
153         struct comm_base *base, struct comm_point* parent, size_t bufsize,
154         struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
155         void* callback_arg);
156
157 /* -------- End of local definitions -------- */
158
159 struct comm_base* 
160 comm_base_create(int sigs)
161 {
162         struct comm_base* b = (struct comm_base*)calloc(1,
163                 sizeof(struct comm_base));
164         const char *evnm="event", *evsys="", *evmethod="";
165
166         if(!b)
167                 return NULL;
168         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
169         if(!b->eb) {
170                 free(b);
171                 return NULL;
172         }
173         b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
174         if(!b->eb->base) {
175                 free(b->eb);
176                 free(b);
177                 return NULL;
178         }
179         ub_comm_base_now(b);
180         ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
181         verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod);
182         return b;
183 }
184
185 struct comm_base*
186 comm_base_create_event(struct ub_event_base* base)
187 {
188         struct comm_base* b = (struct comm_base*)calloc(1,
189                 sizeof(struct comm_base));
190         if(!b)
191                 return NULL;
192         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
193         if(!b->eb) {
194                 free(b);
195                 return NULL;
196         }
197         b->eb->base = base;
198         ub_comm_base_now(b);
199         return b;
200 }
201
202 void 
203 comm_base_delete(struct comm_base* b)
204 {
205         if(!b)
206                 return;
207         if(b->eb->slow_accept_enabled) {
208                 if(ub_event_del(b->eb->slow_accept) != 0) {
209                         log_err("could not event_del slow_accept");
210                 }
211                 ub_event_free(b->eb->slow_accept);
212         }
213         ub_event_base_free(b->eb->base);
214         b->eb->base = NULL;
215         free(b->eb);
216         free(b);
217 }
218
219 void 
220 comm_base_delete_no_base(struct comm_base* b)
221 {
222         if(!b)
223                 return;
224         if(b->eb->slow_accept_enabled) {
225                 if(ub_event_del(b->eb->slow_accept) != 0) {
226                         log_err("could not event_del slow_accept");
227                 }
228                 ub_event_free(b->eb->slow_accept);
229         }
230         b->eb->base = NULL;
231         free(b->eb);
232         free(b);
233 }
234
235 void 
236 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
237 {
238         *tt = &b->eb->secs;
239         *tv = &b->eb->now;
240 }
241
242 void 
243 comm_base_dispatch(struct comm_base* b)
244 {
245         int retval;
246         retval = ub_event_base_dispatch(b->eb->base);
247         if(retval < 0) {
248                 fatal_exit("event_dispatch returned error %d, "
249                         "errno is %s", retval, strerror(errno));
250         }
251 }
252
253 void comm_base_exit(struct comm_base* b)
254 {
255         if(ub_event_base_loopexit(b->eb->base) != 0) {
256                 log_err("Could not loopexit");
257         }
258 }
259
260 void comm_base_set_slow_accept_handlers(struct comm_base* b,
261         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
262 {
263         b->stop_accept = stop_acc;
264         b->start_accept = start_acc;
265         b->cb_arg = arg;
266 }
267
268 struct ub_event_base* comm_base_internal(struct comm_base* b)
269 {
270         return b->eb->base;
271 }
272
273 /** see if errno for udp has to be logged or not uses globals */
274 static int
275 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
276 {
277         /* do not log transient errors (unless high verbosity) */
278 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
279         switch(errno) {
280 #  ifdef ENETUNREACH
281                 case ENETUNREACH:
282 #  endif
283 #  ifdef EHOSTDOWN
284                 case EHOSTDOWN:
285 #  endif
286 #  ifdef EHOSTUNREACH
287                 case EHOSTUNREACH:
288 #  endif
289 #  ifdef ENETDOWN
290                 case ENETDOWN:
291 #  endif
292                         if(verbosity < VERB_ALGO)
293                                 return 0;
294                 default:
295                         break;
296         }
297 #endif
298         /* permission denied is gotten for every send if the
299          * network is disconnected (on some OS), squelch it */
300         if( ((errno == EPERM)
301 #  ifdef EADDRNOTAVAIL
302                 /* 'Cannot assign requested address' also when disconnected */
303                 || (errno == EADDRNOTAVAIL)
304 #  endif
305                 ) && verbosity < VERB_DETAIL)
306                 return 0;
307 #  ifdef EADDRINUSE
308         /* If SO_REUSEADDR is set, we could try to connect to the same server
309          * from the same source port twice. */
310         if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
311                 return 0;
312 #  endif
313         /* squelch errors where people deploy AAAA ::ffff:bla for
314          * authority servers, which we try for intranets. */
315         if(errno == EINVAL && addr_is_ip4mapped(
316                 (struct sockaddr_storage*)addr, addrlen) &&
317                 verbosity < VERB_DETAIL)
318                 return 0;
319         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
320          * but a dns cache does not need it. */
321         if(errno == EACCES && addr_is_broadcast(
322                 (struct sockaddr_storage*)addr, addrlen) &&
323                 verbosity < VERB_DETAIL)
324                 return 0;
325         return 1;
326 }
327
328 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
329 {
330         return udp_send_errno_needs_log(addr, addrlen);
331 }
332
333 /* send a UDP reply */
334 int
335 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
336         struct sockaddr* addr, socklen_t addrlen, int is_connected)
337 {
338         ssize_t sent;
339         log_assert(c->fd != -1);
340 #ifdef UNBOUND_DEBUG
341         if(sldns_buffer_remaining(packet) == 0)
342                 log_err("error: send empty UDP packet");
343 #endif
344         log_assert(addr && addrlen > 0);
345         if(!is_connected) {
346                 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
347                         sldns_buffer_remaining(packet), 0,
348                         addr, addrlen);
349         } else {
350                 sent = send(c->fd, (void*)sldns_buffer_begin(packet),
351                         sldns_buffer_remaining(packet), 0);
352         }
353         if(sent == -1) {
354                 /* try again and block, waiting for IO to complete,
355                  * we want to send the answer, and we will wait for
356                  * the ethernet interface buffer to have space. */
357 #ifndef USE_WINSOCK
358                 if(errno == EAGAIN || 
359 #  ifdef EWOULDBLOCK
360                         errno == EWOULDBLOCK ||
361 #  endif
362                         errno == ENOBUFS) {
363 #else
364                 if(WSAGetLastError() == WSAEINPROGRESS ||
365                         WSAGetLastError() == WSAENOBUFS ||
366                         WSAGetLastError() == WSAEWOULDBLOCK) {
367 #endif
368                         int e;
369                         fd_set_block(c->fd);
370                         if (!is_connected) {
371                                 sent = sendto(c->fd, (void*)sldns_buffer_begin(packet),
372                                         sldns_buffer_remaining(packet), 0,
373                                         addr, addrlen);
374                         } else {
375                                 sent = send(c->fd, (void*)sldns_buffer_begin(packet),
376                                         sldns_buffer_remaining(packet), 0);
377                         }
378                         e = errno;
379                         fd_set_nonblock(c->fd);
380                         errno = e;
381                 }
382         }
383         if(sent == -1) {
384                 if(!udp_send_errno_needs_log(addr, addrlen))
385                         return 0;
386                 if (!is_connected) {
387                         verbose(VERB_OPS, "sendto failed: %s", sock_strerror(errno));
388                 } else {
389                         verbose(VERB_OPS, "send failed: %s", sock_strerror(errno));
390                 }
391                 log_addr(VERB_OPS, "remote address is",
392                         (struct sockaddr_storage*)addr, addrlen);
393                 return 0;
394         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
395                 log_err("sent %d in place of %d bytes", 
396                         (int)sent, (int)sldns_buffer_remaining(packet));
397                 return 0;
398         }
399         return 1;
400 }
401
402 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
403 /** print debug ancillary info */
404 static void p_ancil(const char* str, struct comm_reply* r)
405 {
406         if(r->srctype != 4 && r->srctype != 6) {
407                 log_info("%s: unknown srctype %d", str, r->srctype);
408                 return;
409         }
410         if(r->srctype == 6) {
411                 char buf[1024];
412                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
413                         buf, (socklen_t)sizeof(buf)) == 0) {
414                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
415                 }
416                 buf[sizeof(buf)-1]=0;
417                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
418         } else if(r->srctype == 4) {
419 #ifdef IP_PKTINFO
420                 char buf1[1024], buf2[1024];
421                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
422                         buf1, (socklen_t)sizeof(buf1)) == 0) {
423                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
424                 }
425                 buf1[sizeof(buf1)-1]=0;
426 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
427                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
428                         buf2, (socklen_t)sizeof(buf2)) == 0) {
429                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
430                 }
431                 buf2[sizeof(buf2)-1]=0;
432 #else
433                 buf2[0]=0;
434 #endif
435                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
436                         buf1, buf2);
437 #elif defined(IP_RECVDSTADDR)
438                 char buf1[1024];
439                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
440                         buf1, (socklen_t)sizeof(buf1)) == 0) {
441                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
442                 }
443                 buf1[sizeof(buf1)-1]=0;
444                 log_info("%s: %s", str, buf1);
445 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
446         }
447 }
448 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
449
450 /** send a UDP reply over specified interface*/
451 static int
452 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
453         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
454 {
455 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
456         ssize_t sent;
457         struct msghdr msg;
458         struct iovec iov[1];
459         union {
460                 struct cmsghdr hdr;
461                 char buf[256];
462         } control;
463 #ifndef S_SPLINT_S
464         struct cmsghdr *cmsg;
465 #endif /* S_SPLINT_S */
466
467         log_assert(c->fd != -1);
468 #ifdef UNBOUND_DEBUG
469         if(sldns_buffer_remaining(packet) == 0)
470                 log_err("error: send empty UDP packet");
471 #endif
472         log_assert(addr && addrlen > 0);
473
474         msg.msg_name = addr;
475         msg.msg_namelen = addrlen;
476         iov[0].iov_base = sldns_buffer_begin(packet);
477         iov[0].iov_len = sldns_buffer_remaining(packet);
478         msg.msg_iov = iov;
479         msg.msg_iovlen = 1;
480         msg.msg_control = control.buf;
481 #ifndef S_SPLINT_S
482         msg.msg_controllen = sizeof(control.buf);
483 #endif /* S_SPLINT_S */
484         msg.msg_flags = 0;
485
486 #ifndef S_SPLINT_S
487         cmsg = CMSG_FIRSTHDR(&msg);
488         if(r->srctype == 4) {
489 #ifdef IP_PKTINFO
490                 void* cmsg_data;
491                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
492                 log_assert(msg.msg_controllen <= sizeof(control.buf));
493                 cmsg->cmsg_level = IPPROTO_IP;
494                 cmsg->cmsg_type = IP_PKTINFO;
495                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
496                         sizeof(struct in_pktinfo));
497                 /* unset the ifindex to not bypass the routing tables */
498                 cmsg_data = CMSG_DATA(cmsg);
499                 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
500                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
501 #elif defined(IP_SENDSRCADDR)
502                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
503                 log_assert(msg.msg_controllen <= sizeof(control.buf));
504                 cmsg->cmsg_level = IPPROTO_IP;
505                 cmsg->cmsg_type = IP_SENDSRCADDR;
506                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
507                         sizeof(struct in_addr));
508                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
509 #else
510                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
511                 msg.msg_control = NULL;
512 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
513         } else if(r->srctype == 6) {
514                 void* cmsg_data;
515                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
516                 log_assert(msg.msg_controllen <= sizeof(control.buf));
517                 cmsg->cmsg_level = IPPROTO_IPV6;
518                 cmsg->cmsg_type = IPV6_PKTINFO;
519                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
520                         sizeof(struct in6_pktinfo));
521                 /* unset the ifindex to not bypass the routing tables */
522                 cmsg_data = CMSG_DATA(cmsg);
523                 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
524                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
525         } else {
526                 /* try to pass all 0 to use default route */
527                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
528                 log_assert(msg.msg_controllen <= sizeof(control.buf));
529                 cmsg->cmsg_level = IPPROTO_IPV6;
530                 cmsg->cmsg_type = IPV6_PKTINFO;
531                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
532                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
533         }
534 #endif /* S_SPLINT_S */
535         if(verbosity >= VERB_ALGO)
536                 p_ancil("send_udp over interface", r);
537         sent = sendmsg(c->fd, &msg, 0);
538         if(sent == -1) {
539                 /* try again and block, waiting for IO to complete,
540                  * we want to send the answer, and we will wait for
541                  * the ethernet interface buffer to have space. */
542 #ifndef USE_WINSOCK
543                 if(errno == EAGAIN || 
544 #  ifdef EWOULDBLOCK
545                         errno == EWOULDBLOCK ||
546 #  endif
547                         errno == ENOBUFS) {
548 #else
549                 if(WSAGetLastError() == WSAEINPROGRESS ||
550                         WSAGetLastError() == WSAENOBUFS ||
551                         WSAGetLastError() == WSAEWOULDBLOCK) {
552 #endif
553                         int e;
554                         fd_set_block(c->fd);
555                         sent = sendmsg(c->fd, &msg, 0);
556                         e = errno;
557                         fd_set_nonblock(c->fd);
558                         errno = e;
559                 }
560         }
561         if(sent == -1) {
562                 if(!udp_send_errno_needs_log(addr, addrlen))
563                         return 0;
564                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
565                 log_addr(VERB_OPS, "remote address is", 
566                         (struct sockaddr_storage*)addr, addrlen);
567 #ifdef __NetBSD__
568                 /* netbsd 7 has IP_PKTINFO for recv but not send */
569                 if(errno == EINVAL && r->srctype == 4)
570                         log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
571                                 "Please disable interface-automatic");
572 #endif
573                 return 0;
574         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
575                 log_err("sent %d in place of %d bytes", 
576                         (int)sent, (int)sldns_buffer_remaining(packet));
577                 return 0;
578         }
579         return 1;
580 #else
581         (void)c;
582         (void)packet;
583         (void)addr;
584         (void)addrlen;
585         (void)r;
586         log_err("sendmsg: IPV6_PKTINFO not supported");
587         return 0;
588 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
589 }
590
591 /** return true is UDP receive error needs to be logged */
592 static int udp_recv_needs_log(int err)
593 {
594         switch(err) {
595         case ECONNREFUSED:
596 #  ifdef ENETUNREACH
597         case ENETUNREACH:
598 #  endif
599 #  ifdef EHOSTDOWN
600         case EHOSTDOWN:
601 #  endif
602 #  ifdef EHOSTUNREACH
603         case EHOSTUNREACH:
604 #  endif
605 #  ifdef ENETDOWN
606         case ENETDOWN:
607 #  endif
608                 if(verbosity >= VERB_ALGO)
609                         return 1;
610                 return 0;
611         default:
612                 break;
613         }
614         return 1;
615 }
616
617 void 
618 comm_point_udp_ancil_callback(int fd, short event, void* arg)
619 {
620 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
621         struct comm_reply rep;
622         struct msghdr msg;
623         struct iovec iov[1];
624         ssize_t rcv;
625         union {
626                 struct cmsghdr hdr;
627                 char buf[256];
628         } ancil;
629         int i;
630 #ifndef S_SPLINT_S
631         struct cmsghdr* cmsg;
632 #endif /* S_SPLINT_S */
633
634         rep.c = (struct comm_point*)arg;
635         log_assert(rep.c->type == comm_udp);
636
637         if(!(event&UB_EV_READ))
638                 return;
639         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
640         ub_comm_base_now(rep.c->ev->base);
641         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
642                 sldns_buffer_clear(rep.c->buffer);
643                 rep.addrlen = (socklen_t)sizeof(rep.addr);
644                 log_assert(fd != -1);
645                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
646                 msg.msg_name = &rep.addr;
647                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
648                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
649                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
650                 msg.msg_iov = iov;
651                 msg.msg_iovlen = 1;
652                 msg.msg_control = ancil.buf;
653 #ifndef S_SPLINT_S
654                 msg.msg_controllen = sizeof(ancil.buf);
655 #endif /* S_SPLINT_S */
656                 msg.msg_flags = 0;
657                 rcv = recvmsg(fd, &msg, 0);
658                 if(rcv == -1) {
659                         if(errno != EAGAIN && errno != EINTR
660                                 && udp_recv_needs_log(errno)) {
661                                 log_err("recvmsg failed: %s", strerror(errno));
662                         }
663                         return;
664                 }
665                 rep.addrlen = msg.msg_namelen;
666                 sldns_buffer_skip(rep.c->buffer, rcv);
667                 sldns_buffer_flip(rep.c->buffer);
668                 rep.srctype = 0;
669 #ifndef S_SPLINT_S
670                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
671                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
672                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
673                                 cmsg->cmsg_type == IPV6_PKTINFO) {
674                                 rep.srctype = 6;
675                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
676                                         sizeof(struct in6_pktinfo));
677                                 break;
678 #ifdef IP_PKTINFO
679                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
680                                 cmsg->cmsg_type == IP_PKTINFO) {
681                                 rep.srctype = 4;
682                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
683                                         sizeof(struct in_pktinfo));
684                                 break;
685 #elif defined(IP_RECVDSTADDR)
686                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
687                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
688                                 rep.srctype = 4;
689                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
690                                         sizeof(struct in_addr));
691                                 break;
692 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
693                         }
694                 }
695                 if(verbosity >= VERB_ALGO)
696                         p_ancil("receive_udp on interface", &rep);
697 #endif /* S_SPLINT_S */
698                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
699                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
700                         /* send back immediate reply */
701                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
702                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
703                 }
704                 if(!rep.c || rep.c->fd == -1) /* commpoint closed */
705                         break;
706         }
707 #else
708         (void)fd;
709         (void)event;
710         (void)arg;
711         fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. "
712                 "Please disable interface-automatic");
713 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
714 }
715
716 void 
717 comm_point_udp_callback(int fd, short event, void* arg)
718 {
719         struct comm_reply rep;
720         ssize_t rcv;
721         int i;
722         struct sldns_buffer *buffer;
723
724         rep.c = (struct comm_point*)arg;
725         log_assert(rep.c->type == comm_udp);
726
727         if(!(event&UB_EV_READ))
728                 return;
729         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
730         ub_comm_base_now(rep.c->ev->base);
731         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
732                 sldns_buffer_clear(rep.c->buffer);
733                 rep.addrlen = (socklen_t)sizeof(rep.addr);
734                 log_assert(fd != -1);
735                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
736                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
737                         sldns_buffer_remaining(rep.c->buffer), 0, 
738                         (struct sockaddr*)&rep.addr, &rep.addrlen);
739                 if(rcv == -1) {
740 #ifndef USE_WINSOCK
741                         if(errno != EAGAIN && errno != EINTR
742                                 && udp_recv_needs_log(errno))
743                                 log_err("recvfrom %d failed: %s", 
744                                         fd, strerror(errno));
745 #else
746                         if(WSAGetLastError() != WSAEINPROGRESS &&
747                                 WSAGetLastError() != WSAECONNRESET &&
748                                 WSAGetLastError()!= WSAEWOULDBLOCK)
749                                 log_err("recvfrom failed: %s",
750                                         wsa_strerror(WSAGetLastError()));
751 #endif
752                         return;
753                 }
754                 sldns_buffer_skip(rep.c->buffer, rcv);
755                 sldns_buffer_flip(rep.c->buffer);
756                 rep.srctype = 0;
757                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
758                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
759                         /* send back immediate reply */
760 #ifdef USE_DNSCRYPT
761                         buffer = rep.c->dnscrypt_buffer;
762 #else
763                         buffer = rep.c->buffer;
764 #endif
765                         (void)comm_point_send_udp_msg(rep.c, buffer,
766                                 (struct sockaddr*)&rep.addr, rep.addrlen, 0);
767                 }
768                 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
769                 another UDP port. Note rep.c cannot be reused with TCP fd. */
770                         break;
771         }
772 }
773
774 /** Use a new tcp handler for new query fd, set to read query */
775 static void
776 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 
777 {
778         int handler_usage;
779         log_assert(c->type == comm_tcp || c->type == comm_http);
780         log_assert(c->fd == -1);
781         sldns_buffer_clear(c->buffer);
782 #ifdef USE_DNSCRYPT
783         if (c->dnscrypt)
784                 sldns_buffer_clear(c->dnscrypt_buffer);
785 #endif
786         c->tcp_is_reading = 1;
787         c->tcp_byte_count = 0;
788         /* if more than half the tcp handlers are in use, use a shorter
789          * timeout for this TCP connection, we need to make space for
790          * other connections to be able to get attention */
791         /* If > 50% TCP handler structures in use, set timeout to 1/100th
792          *      configured value.
793          * If > 65%TCP handler structures in use, set to 1/500th configured
794          *      value.
795          * If > 80% TCP handler structures in use, set to 0.
796          *
797          * If the timeout to use falls below 200 milliseconds, an actual
798          * timeout of 200ms is used.
799          */
800         handler_usage = (cur * 100) / max;
801         if(handler_usage > 50 && handler_usage <= 65)
802                 c->tcp_timeout_msec /= 100;
803         else if (handler_usage > 65 && handler_usage <= 80)
804                 c->tcp_timeout_msec /= 500;
805         else if (handler_usage > 80)
806                 c->tcp_timeout_msec = 0;
807         comm_point_start_listening(c, fd,
808                 c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM
809                         ? TCP_QUERY_TIMEOUT_MINIMUM
810                         : c->tcp_timeout_msec);
811 }
812
813 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
814         short ATTR_UNUSED(event), void* arg)
815 {
816         struct comm_base* b = (struct comm_base*)arg;
817         /* timeout for the slow accept, re-enable accepts again */
818         if(b->start_accept) {
819                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
820                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
821                 (*b->start_accept)(b->cb_arg);
822                 b->eb->slow_accept_enabled = 0;
823         }
824 }
825
826 int comm_point_perform_accept(struct comm_point* c,
827         struct sockaddr_storage* addr, socklen_t* addrlen)
828 {
829         int new_fd;
830         *addrlen = (socklen_t)sizeof(*addr);
831 #ifndef HAVE_ACCEPT4
832         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
833 #else
834         /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
835         new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
836 #endif
837         if(new_fd == -1) {
838 #ifndef USE_WINSOCK
839                 /* EINTR is signal interrupt. others are closed connection. */
840                 if(     errno == EINTR || errno == EAGAIN
841 #ifdef EWOULDBLOCK
842                         || errno == EWOULDBLOCK 
843 #endif
844 #ifdef ECONNABORTED
845                         || errno == ECONNABORTED 
846 #endif
847 #ifdef EPROTO
848                         || errno == EPROTO
849 #endif /* EPROTO */
850                         )
851                         return -1;
852 #if defined(ENFILE) && defined(EMFILE)
853                 if(errno == ENFILE || errno == EMFILE) {
854                         /* out of file descriptors, likely outside of our
855                          * control. stop accept() calls for some time */
856                         if(c->ev->base->stop_accept) {
857                                 struct comm_base* b = c->ev->base;
858                                 struct timeval tv;
859                                 verbose(VERB_ALGO, "out of file descriptors: "
860                                         "slow accept");
861                                 b->eb->slow_accept_enabled = 1;
862                                 fptr_ok(fptr_whitelist_stop_accept(
863                                         b->stop_accept));
864                                 (*b->stop_accept)(b->cb_arg);
865                                 /* set timeout, no mallocs */
866                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
867                                 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
868                                 b->eb->slow_accept = ub_event_new(b->eb->base,
869                                         -1, UB_EV_TIMEOUT,
870                                         comm_base_handle_slow_accept, b);
871                                 if(b->eb->slow_accept == NULL) {
872                                         /* we do not want to log here, because
873                                          * that would spam the logfiles.
874                                          * error: "event_base_set failed." */
875                                 }
876                                 else if(ub_event_add(b->eb->slow_accept, &tv)
877                                         != 0) {
878                                         /* we do not want to log here,
879                                          * error: "event_add failed." */
880                                 }
881                         }
882                         return -1;
883                 }
884 #endif
885 #else /* USE_WINSOCK */
886                 if(WSAGetLastError() == WSAEINPROGRESS ||
887                         WSAGetLastError() == WSAECONNRESET)
888                         return -1;
889                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
890                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
891                         return -1;
892                 }
893 #endif
894                 log_err_addr("accept failed", sock_strerror(errno), addr,
895                         *addrlen);
896                 return -1;
897         }
898         if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
899                 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
900                 if(!tcl_new_connection(c->tcl_addr)) {
901                         if(verbosity >= 3)
902                                 log_err_addr("accept rejected",
903                                 "connection limit exceeded", addr, *addrlen);
904                         close(new_fd);
905                         return -1;
906                 }
907         }
908 #ifndef HAVE_ACCEPT4
909         fd_set_nonblock(new_fd);
910 #endif
911         return new_fd;
912 }
913
914 #ifdef USE_WINSOCK
915 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
916         int ATTR_UNUSED(argi), long argl, long retvalue)
917 {
918         int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
919         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
920                 (oper&BIO_CB_RETURN)?"return":"before",
921                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
922                 wsa_err==WSAEWOULDBLOCK?"wsawb":"");
923         /* on windows, check if previous operation caused EWOULDBLOCK */
924         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
925                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
926                 if(wsa_err == WSAEWOULDBLOCK)
927                         ub_winsock_tcp_wouldblock((struct ub_event*)
928                                 BIO_get_callback_arg(b), UB_EV_READ);
929         }
930         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
931                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
932                 if(wsa_err == WSAEWOULDBLOCK)
933                         ub_winsock_tcp_wouldblock((struct ub_event*)
934                                 BIO_get_callback_arg(b), UB_EV_WRITE);
935         }
936         /* return original return value */
937         return retvalue;
938 }
939
940 /** set win bio callbacks for nonblocking operations */
941 void
942 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
943 {
944         SSL* ssl = (SSL*)thessl;
945         /* set them both just in case, but usually they are the same BIO */
946         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
947         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
948         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
949         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
950 }
951 #endif
952
953 #ifdef HAVE_NGHTTP2
954 /** Create http2 session server.  Per connection, after TCP accepted.*/
955 static int http2_session_server_create(struct http2_session* h2_session)
956 {
957         log_assert(h2_session->callbacks);
958         h2_session->is_drop = 0;
959         if(nghttp2_session_server_new(&h2_session->session,
960                         h2_session->callbacks,
961                 h2_session) == NGHTTP2_ERR_NOMEM) {
962                 log_err("failed to create nghttp2 session server");
963                 return 0;
964         }
965
966         return 1;
967 }
968
969 /** Submit http2 setting to session. Once per session. */
970 static int http2_submit_settings(struct http2_session* h2_session)
971 {
972         int ret;
973         nghttp2_settings_entry settings[1] = {
974                 {NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS,
975                  h2_session->c->http2_max_streams}};
976
977         ret = nghttp2_submit_settings(h2_session->session, NGHTTP2_FLAG_NONE,
978                 settings, 1);
979         if(ret) {
980                 verbose(VERB_QUERY, "http2: submit_settings failed, "
981                         "error: %s", nghttp2_strerror(ret));
982                 return 0;
983         }
984         return 1;
985 }
986 #endif /* HAVE_NGHTTP2 */
987
988
989 void 
990 comm_point_tcp_accept_callback(int fd, short event, void* arg)
991 {
992         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
993         int new_fd;
994         log_assert(c->type == comm_tcp_accept);
995         if(!(event & UB_EV_READ)) {
996                 log_info("ignoring tcp accept event %d", (int)event);
997                 return;
998         }
999         ub_comm_base_now(c->ev->base);
1000         /* find free tcp handler. */
1001         if(!c->tcp_free) {
1002                 log_warn("accepted too many tcp, connections full");
1003                 return;
1004         }
1005         /* accept incoming connection. */
1006         c_hdl = c->tcp_free;
1007         /* clear leftover flags from previous use, and then set the
1008          * correct event base for the event structure for libevent */
1009         ub_event_free(c_hdl->ev->ev);
1010         if((c_hdl->type == comm_tcp && c_hdl->tcp_req_info) ||
1011                 c_hdl->type == comm_local || c_hdl->type == comm_raw)
1012                 c_hdl->tcp_do_toggle_rw = 0;
1013         else    c_hdl->tcp_do_toggle_rw = 1;
1014
1015         if(c_hdl->type == comm_http) {
1016 #ifdef HAVE_NGHTTP2
1017                 if(!c_hdl->h2_session ||
1018                         !http2_session_server_create(c_hdl->h2_session)) {
1019                         log_warn("failed to create nghttp2");
1020                         return;
1021                 }
1022                 if(!c_hdl->h2_session ||
1023                         !http2_submit_settings(c_hdl->h2_session)) {
1024                         log_warn("failed to submit http2 settings");
1025                         return;
1026                 }
1027                 if(!c->ssl) {
1028                         c_hdl->tcp_do_toggle_rw = 0;
1029                         c_hdl->use_h2 = 1;
1030                 }
1031 #endif
1032                 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
1033                         UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
1034                         comm_point_http_handle_callback, c_hdl);
1035         } else {
1036                 c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1,
1037                         UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT,
1038                         comm_point_tcp_handle_callback, c_hdl);
1039         }
1040         if(!c_hdl->ev->ev) {
1041                 log_warn("could not ub_event_new, dropped tcp");
1042                 return;
1043         }
1044         log_assert(fd != -1);
1045         (void)fd;
1046         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
1047                 &c_hdl->repinfo.addrlen);
1048         if(new_fd == -1)
1049                 return;
1050         if(c->ssl) {
1051                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
1052                 if(!c_hdl->ssl) {
1053                         c_hdl->fd = new_fd;
1054                         comm_point_close(c_hdl);
1055                         return;
1056                 }
1057                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
1058 #ifdef USE_WINSOCK
1059                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
1060 #endif
1061         }
1062
1063         /* grab the tcp handler buffers */
1064         c->cur_tcp_count++;
1065         c->tcp_free = c_hdl->tcp_free;
1066         if(!c->tcp_free) {
1067                 /* stop accepting incoming queries for now. */
1068                 comm_point_stop_listening(c);
1069         }
1070         setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
1071 }
1072
1073 /** Make tcp handler free for next assignment */
1074 static void
1075 reclaim_tcp_handler(struct comm_point* c)
1076 {
1077         log_assert(c->type == comm_tcp);
1078         if(c->ssl) {
1079 #ifdef HAVE_SSL
1080                 SSL_shutdown(c->ssl);
1081                 SSL_free(c->ssl);
1082                 c->ssl = NULL;
1083 #endif
1084         }
1085         comm_point_close(c);
1086         if(c->tcp_parent) {
1087                 c->tcp_parent->cur_tcp_count--;
1088                 c->tcp_free = c->tcp_parent->tcp_free;
1089                 c->tcp_parent->tcp_free = c;
1090                 if(!c->tcp_free) {
1091                         /* re-enable listening on accept socket */
1092                         comm_point_start_listening(c->tcp_parent, -1, -1);
1093                 }
1094         }
1095         c->tcp_more_read_again = NULL;
1096         c->tcp_more_write_again = NULL;
1097 }
1098
1099 /** do the callback when writing is done */
1100 static void
1101 tcp_callback_writer(struct comm_point* c)
1102 {
1103         log_assert(c->type == comm_tcp);
1104         if(!c->tcp_write_and_read) {
1105                 sldns_buffer_clear(c->buffer);
1106                 c->tcp_byte_count = 0;
1107         }
1108         if(c->tcp_do_toggle_rw)
1109                 c->tcp_is_reading = 1;
1110         /* switch from listening(write) to listening(read) */
1111         if(c->tcp_req_info) {
1112                 tcp_req_info_handle_writedone(c->tcp_req_info);
1113         } else {
1114                 comm_point_stop_listening(c);
1115                 if(c->tcp_write_and_read) {
1116                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1117                         if( (*c->callback)(c, c->cb_arg, NETEVENT_PKT_WRITTEN,
1118                                 &c->repinfo) ) {
1119                                 comm_point_start_listening(c, -1,
1120                                         c->tcp_timeout_msec);
1121                         }
1122                 } else {
1123                         comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1124                 }
1125         }
1126 }
1127
1128 /** do the callback when reading is done */
1129 static void
1130 tcp_callback_reader(struct comm_point* c)
1131 {
1132         log_assert(c->type == comm_tcp || c->type == comm_local);
1133         sldns_buffer_flip(c->buffer);
1134         if(c->tcp_do_toggle_rw)
1135                 c->tcp_is_reading = 0;
1136         c->tcp_byte_count = 0;
1137         if(c->tcp_req_info) {
1138                 tcp_req_info_handle_readdone(c->tcp_req_info);
1139         } else {
1140                 if(c->type == comm_tcp)
1141                         comm_point_stop_listening(c);
1142                 fptr_ok(fptr_whitelist_comm_point(c->callback));
1143                 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1144                         comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1145                 }
1146         }
1147 }
1148
1149 #ifdef HAVE_SSL
1150 /** true if the ssl handshake error has to be squelched from the logs */
1151 int
1152 squelch_err_ssl_handshake(unsigned long err)
1153 {
1154         if(verbosity >= VERB_QUERY)
1155                 return 0; /* only squelch on low verbosity */
1156         /* this is very specific, we could filter on ERR_GET_REASON()
1157          * (the third element in ERR_PACK) */
1158         if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) ||
1159                 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) ||
1160                 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) ||
1161                 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE)
1162 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
1163                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER)
1164 #endif
1165 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
1166                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL)
1167                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL)
1168 #  ifdef SSL_R_VERSION_TOO_LOW
1169                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW)
1170 #  endif
1171 #endif
1172                 )
1173                 return 1;
1174         return 0;
1175 }
1176 #endif /* HAVE_SSL */
1177
1178 /** continue ssl handshake */
1179 #ifdef HAVE_SSL
1180 static int
1181 ssl_handshake(struct comm_point* c)
1182 {
1183         int r;
1184         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
1185                 /* read condition satisfied back to writing */
1186                 comm_point_listen_for_rw(c, 1, 1);
1187                 c->ssl_shake_state = comm_ssl_shake_none;
1188                 return 1;
1189         }
1190         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
1191                 /* write condition satisfied, back to reading */
1192                 comm_point_listen_for_rw(c, 1, 0);
1193                 c->ssl_shake_state = comm_ssl_shake_none;
1194                 return 1;
1195         }
1196
1197         ERR_clear_error();
1198         r = SSL_do_handshake(c->ssl);
1199         if(r != 1) {
1200                 int want = SSL_get_error(c->ssl, r);
1201                 if(want == SSL_ERROR_WANT_READ) {
1202                         if(c->ssl_shake_state == comm_ssl_shake_read)
1203                                 return 1;
1204                         c->ssl_shake_state = comm_ssl_shake_read;
1205                         comm_point_listen_for_rw(c, 1, 0);
1206                         return 1;
1207                 } else if(want == SSL_ERROR_WANT_WRITE) {
1208                         if(c->ssl_shake_state == comm_ssl_shake_write)
1209                                 return 1;
1210                         c->ssl_shake_state = comm_ssl_shake_write;
1211                         comm_point_listen_for_rw(c, 0, 1);
1212                         return 1;
1213                 } else if(r == 0) {
1214                         return 0; /* closed */
1215                 } else if(want == SSL_ERROR_SYSCALL) {
1216                         /* SYSCALL and errno==0 means closed uncleanly */
1217 #ifdef EPIPE
1218                         if(errno == EPIPE && verbosity < 2)
1219                                 return 0; /* silence 'broken pipe' */
1220 #endif
1221 #ifdef ECONNRESET
1222                         if(errno == ECONNRESET && verbosity < 2)
1223                                 return 0; /* silence reset by peer */
1224 #endif
1225                         if(errno != 0)
1226                                 log_err("SSL_handshake syscall: %s",
1227                                         strerror(errno));
1228                         return 0;
1229                 } else {
1230                         unsigned long err = ERR_get_error();
1231                         if(!squelch_err_ssl_handshake(err)) {
1232                                 log_crypto_err_code("ssl handshake failed", err);
1233                                 log_addr(VERB_OPS, "ssl handshake failed", &c->repinfo.addr,
1234                                         c->repinfo.addrlen);
1235                         }
1236                         return 0;
1237                 }
1238         }
1239         /* this is where peer verification could take place */
1240         if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
1241                 /* verification */
1242                 if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
1243                         X509* x = SSL_get_peer_certificate(c->ssl);
1244                         if(!x) {
1245                                 log_addr(VERB_ALGO, "SSL connection failed: "
1246                                         "no certificate",
1247                                         &c->repinfo.addr, c->repinfo.addrlen);
1248                                 return 0;
1249                         }
1250                         log_cert(VERB_ALGO, "peer certificate", x);
1251 #ifdef HAVE_SSL_GET0_PEERNAME
1252                         if(SSL_get0_peername(c->ssl)) {
1253                                 char buf[255];
1254                                 snprintf(buf, sizeof(buf), "SSL connection "
1255                                         "to %s authenticated",
1256                                         SSL_get0_peername(c->ssl));
1257                                 log_addr(VERB_ALGO, buf, &c->repinfo.addr,
1258                                         c->repinfo.addrlen);
1259                         } else {
1260 #endif
1261                                 log_addr(VERB_ALGO, "SSL connection "
1262                                         "authenticated", &c->repinfo.addr,
1263                                         c->repinfo.addrlen);
1264 #ifdef HAVE_SSL_GET0_PEERNAME
1265                         }
1266 #endif
1267                         X509_free(x);
1268                 } else {
1269                         X509* x = SSL_get_peer_certificate(c->ssl);
1270                         if(x) {
1271                                 log_cert(VERB_ALGO, "peer certificate", x);
1272                                 X509_free(x);
1273                         }
1274                         log_addr(VERB_ALGO, "SSL connection failed: "
1275                                 "failed to authenticate",
1276                                 &c->repinfo.addr, c->repinfo.addrlen);
1277                         return 0;
1278                 }
1279         } else {
1280                 /* unauthenticated, the verify peer flag was not set
1281                  * in c->ssl when the ssl object was created from ssl_ctx */
1282                 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr,
1283                         c->repinfo.addrlen);
1284         }
1285
1286         /* check if http2 use is negotiated */
1287         if(c->type == comm_http && c->h2_session) {
1288                 const unsigned char *alpn;
1289                 unsigned int alpnlen = 0;
1290                 SSL_get0_alpn_selected(c->ssl, &alpn, &alpnlen);
1291                 if(alpnlen == 2 && memcmp("h2", alpn, 2) == 0) {
1292                         /* connection upgraded to HTTP2 */
1293                         c->tcp_do_toggle_rw = 0;
1294                         c->use_h2 = 1;
1295                 }
1296         }
1297
1298         /* setup listen rw correctly */
1299         if(c->tcp_is_reading) {
1300                 if(c->ssl_shake_state != comm_ssl_shake_read)
1301                         comm_point_listen_for_rw(c, 1, 0);
1302         } else {
1303                 comm_point_listen_for_rw(c, 1, 1);
1304         }
1305         c->ssl_shake_state = comm_ssl_shake_none;
1306         return 1;
1307 }
1308 #endif /* HAVE_SSL */
1309
1310 /** ssl read callback on TCP */
1311 static int
1312 ssl_handle_read(struct comm_point* c)
1313 {
1314 #ifdef HAVE_SSL
1315         int r;
1316         if(c->ssl_shake_state != comm_ssl_shake_none) {
1317                 if(!ssl_handshake(c))
1318                         return 0;
1319                 if(c->ssl_shake_state != comm_ssl_shake_none)
1320                         return 1;
1321         }
1322         if(c->tcp_byte_count < sizeof(uint16_t)) {
1323                 /* read length bytes */
1324                 ERR_clear_error();
1325                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1326                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1327                         c->tcp_byte_count))) <= 0) {
1328                         int want = SSL_get_error(c->ssl, r);
1329                         if(want == SSL_ERROR_ZERO_RETURN) {
1330                                 if(c->tcp_req_info)
1331                                         return tcp_req_info_handle_read_close(c->tcp_req_info);
1332                                 return 0; /* shutdown, closed */
1333                         } else if(want == SSL_ERROR_WANT_READ) {
1334                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1335                                 return 1; /* read more later */
1336                         } else if(want == SSL_ERROR_WANT_WRITE) {
1337                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1338                                 comm_point_listen_for_rw(c, 0, 1);
1339                                 return 1;
1340                         } else if(want == SSL_ERROR_SYSCALL) {
1341 #ifdef ECONNRESET
1342                                 if(errno == ECONNRESET && verbosity < 2)
1343                                         return 0; /* silence reset by peer */
1344 #endif
1345                                 if(errno != 0)
1346                                         log_err("SSL_read syscall: %s",
1347                                                 strerror(errno));
1348                                 return 0;
1349                         }
1350                         log_crypto_err("could not SSL_read");
1351                         return 0;
1352                 }
1353                 c->tcp_byte_count += r;
1354                 if(c->tcp_byte_count < sizeof(uint16_t))
1355                         return 1;
1356                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1357                         sldns_buffer_capacity(c->buffer)) {
1358                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1359                         return 0;
1360                 }
1361                 sldns_buffer_set_limit(c->buffer,
1362                         sldns_buffer_read_u16_at(c->buffer, 0));
1363                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1364                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1365                         return 0;
1366                 }
1367                 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
1368                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1369                         (int)sldns_buffer_limit(c->buffer));
1370         }
1371         if(sldns_buffer_remaining(c->buffer) > 0) {
1372                 ERR_clear_error();
1373                 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1374                         (int)sldns_buffer_remaining(c->buffer));
1375                 if(r <= 0) {
1376                         int want = SSL_get_error(c->ssl, r);
1377                         if(want == SSL_ERROR_ZERO_RETURN) {
1378                                 if(c->tcp_req_info)
1379                                         return tcp_req_info_handle_read_close(c->tcp_req_info);
1380                                 return 0; /* shutdown, closed */
1381                         } else if(want == SSL_ERROR_WANT_READ) {
1382                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1383                                 return 1; /* read more later */
1384                         } else if(want == SSL_ERROR_WANT_WRITE) {
1385                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1386                                 comm_point_listen_for_rw(c, 0, 1);
1387                                 return 1;
1388                         } else if(want == SSL_ERROR_SYSCALL) {
1389 #ifdef ECONNRESET
1390                                 if(errno == ECONNRESET && verbosity < 2)
1391                                         return 0; /* silence reset by peer */
1392 #endif
1393                                 if(errno != 0)
1394                                         log_err("SSL_read syscall: %s",
1395                                                 strerror(errno));
1396                                 return 0;
1397                         }
1398                         log_crypto_err("could not SSL_read");
1399                         return 0;
1400                 }
1401                 sldns_buffer_skip(c->buffer, (ssize_t)r);
1402         }
1403         if(sldns_buffer_remaining(c->buffer) <= 0) {
1404                 tcp_callback_reader(c);
1405         }
1406         return 1;
1407 #else
1408         (void)c;
1409         return 0;
1410 #endif /* HAVE_SSL */
1411 }
1412
1413 /** ssl write callback on TCP */
1414 static int
1415 ssl_handle_write(struct comm_point* c)
1416 {
1417 #ifdef HAVE_SSL
1418         int r;
1419         if(c->ssl_shake_state != comm_ssl_shake_none) {
1420                 if(!ssl_handshake(c))
1421                         return 0;
1422                 if(c->ssl_shake_state != comm_ssl_shake_none)
1423                         return 1;
1424         }
1425         /* ignore return, if fails we may simply block */
1426         (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE);
1427         if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
1428                 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(c->buffer));
1429                 ERR_clear_error();
1430                 if(c->tcp_write_and_read) {
1431                         if(c->tcp_write_pkt_len + 2 < LDNS_RR_BUF_SIZE) {
1432                                 /* combine the tcp length and the query for
1433                                  * write, this emulates writev */
1434                                 uint8_t buf[LDNS_RR_BUF_SIZE];
1435                                 memmove(buf, &len, sizeof(uint16_t));
1436                                 memmove(buf+sizeof(uint16_t),
1437                                         c->tcp_write_pkt,
1438                                         c->tcp_write_pkt_len);
1439                                 r = SSL_write(c->ssl,
1440                                         (void*)(buf+c->tcp_write_byte_count),
1441                                         c->tcp_write_pkt_len + 2 -
1442                                         c->tcp_write_byte_count);
1443                         } else {
1444                                 r = SSL_write(c->ssl,
1445                                         (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
1446                                         (int)(sizeof(uint16_t)-c->tcp_write_byte_count));
1447                         }
1448                 } else if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
1449                         LDNS_RR_BUF_SIZE) {
1450                         /* combine the tcp length and the query for write,
1451                          * this emulates writev */
1452                         uint8_t buf[LDNS_RR_BUF_SIZE];
1453                         memmove(buf, &len, sizeof(uint16_t));
1454                         memmove(buf+sizeof(uint16_t),
1455                                 sldns_buffer_current(c->buffer),
1456                                 sldns_buffer_remaining(c->buffer));
1457                         r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
1458                                 (int)(sizeof(uint16_t)+
1459                                 sldns_buffer_remaining(c->buffer)
1460                                 - c->tcp_byte_count));
1461                 } else {
1462                         r = SSL_write(c->ssl,
1463                                 (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1464                                 (int)(sizeof(uint16_t)-c->tcp_byte_count));
1465                 }
1466                 if(r <= 0) {
1467                         int want = SSL_get_error(c->ssl, r);
1468                         if(want == SSL_ERROR_ZERO_RETURN) {
1469                                 return 0; /* closed */
1470                         } else if(want == SSL_ERROR_WANT_READ) {
1471                                 c->ssl_shake_state = comm_ssl_shake_hs_read;
1472                                 comm_point_listen_for_rw(c, 1, 0);
1473                                 return 1; /* wait for read condition */
1474                         } else if(want == SSL_ERROR_WANT_WRITE) {
1475                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1476                                 return 1; /* write more later */
1477                         } else if(want == SSL_ERROR_SYSCALL) {
1478 #ifdef EPIPE
1479                                 if(errno == EPIPE && verbosity < 2)
1480                                         return 0; /* silence 'broken pipe' */
1481 #endif
1482                                 if(errno != 0)
1483                                         log_err("SSL_write syscall: %s",
1484                                                 strerror(errno));
1485                                 return 0;
1486                         }
1487                         log_crypto_err("could not SSL_write");
1488                         return 0;
1489                 }
1490                 if(c->tcp_write_and_read) {
1491                         c->tcp_write_byte_count += r;
1492                         if(c->tcp_write_byte_count < sizeof(uint16_t))
1493                                 return 1;
1494                 } else {
1495                         c->tcp_byte_count += r;
1496                         if(c->tcp_byte_count < sizeof(uint16_t))
1497                                 return 1;
1498                         sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1499                                 sizeof(uint16_t));
1500                 }
1501                 if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
1502                         tcp_callback_writer(c);
1503                         return 1;
1504                 }
1505         }
1506         log_assert(c->tcp_write_and_read || sldns_buffer_remaining(c->buffer) > 0);
1507         log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
1508         ERR_clear_error();
1509         if(c->tcp_write_and_read) {
1510                 r = SSL_write(c->ssl, (void*)(c->tcp_write_pkt + c->tcp_write_byte_count - 2),
1511                         (int)(c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count));
1512         } else {
1513                 r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1514                         (int)sldns_buffer_remaining(c->buffer));
1515         }
1516         if(r <= 0) {
1517                 int want = SSL_get_error(c->ssl, r);
1518                 if(want == SSL_ERROR_ZERO_RETURN) {
1519                         return 0; /* closed */
1520                 } else if(want == SSL_ERROR_WANT_READ) {
1521                         c->ssl_shake_state = comm_ssl_shake_hs_read;
1522                         comm_point_listen_for_rw(c, 1, 0);
1523                         return 1; /* wait for read condition */
1524                 } else if(want == SSL_ERROR_WANT_WRITE) {
1525                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1526                         return 1; /* write more later */
1527                 } else if(want == SSL_ERROR_SYSCALL) {
1528 #ifdef EPIPE
1529                         if(errno == EPIPE && verbosity < 2)
1530                                 return 0; /* silence 'broken pipe' */
1531 #endif
1532                         if(errno != 0)
1533                                 log_err("SSL_write syscall: %s",
1534                                         strerror(errno));
1535                         return 0;
1536                 }
1537                 log_crypto_err("could not SSL_write");
1538                 return 0;
1539         }
1540         if(c->tcp_write_and_read) {
1541                 c->tcp_write_byte_count += r;
1542         } else {
1543                 sldns_buffer_skip(c->buffer, (ssize_t)r);
1544         }
1545
1546         if((!c->tcp_write_and_read && sldns_buffer_remaining(c->buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
1547                 tcp_callback_writer(c);
1548         }
1549         return 1;
1550 #else
1551         (void)c;
1552         return 0;
1553 #endif /* HAVE_SSL */
1554 }
1555
1556 /** handle ssl tcp connection with dns contents */
1557 static int
1558 ssl_handle_it(struct comm_point* c, int is_write)
1559 {
1560         /* handle case where renegotiation wants read during write call
1561          * or write during read calls */
1562         if(is_write && c->ssl_shake_state == comm_ssl_shake_hs_write)
1563                 return ssl_handle_read(c);
1564         else if(!is_write && c->ssl_shake_state == comm_ssl_shake_hs_read)
1565                 return ssl_handle_write(c);
1566         /* handle read events for read operation and write events for a
1567          * write operation */
1568         else if(!is_write)
1569                 return ssl_handle_read(c);
1570         return ssl_handle_write(c);
1571 }
1572
1573 /** Handle tcp reading callback. 
1574  * @param fd: file descriptor of socket.
1575  * @param c: comm point to read from into buffer.
1576  * @param short_ok: if true, very short packets are OK (for comm_local).
1577  * @return: 0 on error 
1578  */
1579 static int
1580 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1581 {
1582         ssize_t r;
1583         log_assert(c->type == comm_tcp || c->type == comm_local);
1584         if(c->ssl)
1585                 return ssl_handle_it(c, 0);
1586         if(!c->tcp_is_reading && !c->tcp_write_and_read)
1587                 return 0;
1588
1589         log_assert(fd != -1);
1590         if(c->tcp_byte_count < sizeof(uint16_t)) {
1591                 /* read length bytes */
1592                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1593                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1594                 if(r == 0) {
1595                         if(c->tcp_req_info)
1596                                 return tcp_req_info_handle_read_close(c->tcp_req_info);
1597                         return 0;
1598                 } else if(r == -1) {
1599 #ifndef USE_WINSOCK
1600                         if(errno == EINTR || errno == EAGAIN)
1601                                 return 1;
1602 #ifdef ECONNRESET
1603                         if(errno == ECONNRESET && verbosity < 2)
1604                                 return 0; /* silence reset by peer */
1605 #endif
1606 #else /* USE_WINSOCK */
1607                         if(WSAGetLastError() == WSAECONNRESET)
1608                                 return 0;
1609                         if(WSAGetLastError() == WSAEINPROGRESS)
1610                                 return 1;
1611                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1612                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1613                                         UB_EV_READ);
1614                                 return 1;
1615                         }
1616 #endif
1617                         log_err_addr("read (in tcp s)", sock_strerror(errno),
1618                                 &c->repinfo.addr, c->repinfo.addrlen);
1619                         return 0;
1620                 } 
1621                 c->tcp_byte_count += r;
1622                 if(c->tcp_byte_count != sizeof(uint16_t))
1623                         return 1;
1624                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1625                         sldns_buffer_capacity(c->buffer)) {
1626                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1627                         return 0;
1628                 }
1629                 sldns_buffer_set_limit(c->buffer, 
1630                         sldns_buffer_read_u16_at(c->buffer, 0));
1631                 if(!short_ok && 
1632                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1633                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1634                         return 0;
1635                 }
1636                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1637                         (int)sldns_buffer_limit(c->buffer));
1638         }
1639
1640         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1641         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1642                 sldns_buffer_remaining(c->buffer), 0);
1643         if(r == 0) {
1644                 if(c->tcp_req_info)
1645                         return tcp_req_info_handle_read_close(c->tcp_req_info);
1646                 return 0;
1647         } else if(r == -1) {
1648 #ifndef USE_WINSOCK
1649                 if(errno == EINTR || errno == EAGAIN)
1650                         return 1;
1651 #else /* USE_WINSOCK */
1652                 if(WSAGetLastError() == WSAECONNRESET)
1653                         return 0;
1654                 if(WSAGetLastError() == WSAEINPROGRESS)
1655                         return 1;
1656                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1657                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1658                         return 1;
1659                 }
1660 #endif
1661                 log_err_addr("read (in tcp r)", sock_strerror(errno),
1662                         &c->repinfo.addr, c->repinfo.addrlen);
1663                 return 0;
1664         }
1665         sldns_buffer_skip(c->buffer, r);
1666         if(sldns_buffer_remaining(c->buffer) <= 0) {
1667                 tcp_callback_reader(c);
1668         }
1669         return 1;
1670 }
1671
1672 /** 
1673  * Handle tcp writing callback. 
1674  * @param fd: file descriptor of socket.
1675  * @param c: comm point to write buffer out of.
1676  * @return: 0 on error
1677  */
1678 static int
1679 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1680 {
1681         ssize_t r;
1682         struct sldns_buffer *buffer;
1683         log_assert(c->type == comm_tcp);
1684 #ifdef USE_DNSCRYPT
1685         buffer = c->dnscrypt_buffer;
1686 #else
1687         buffer = c->buffer;
1688 #endif
1689         if(c->tcp_is_reading && !c->ssl && !c->tcp_write_and_read)
1690                 return 0;
1691         log_assert(fd != -1);
1692         if(((!c->tcp_write_and_read && c->tcp_byte_count == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == 0)) && c->tcp_check_nb_connect) {
1693                 /* check for pending error from nonblocking connect */
1694                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1695                 int error = 0;
1696                 socklen_t len = (socklen_t)sizeof(error);
1697                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1698                         &len) < 0){
1699 #ifndef USE_WINSOCK
1700                         error = errno; /* on solaris errno is error */
1701 #else /* USE_WINSOCK */
1702                         error = WSAGetLastError();
1703 #endif
1704                 }
1705 #ifndef USE_WINSOCK
1706 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1707                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1708                         return 1; /* try again later */
1709                 else
1710 #endif
1711                 if(error != 0 && verbosity < 2)
1712                         return 0; /* silence lots of chatter in the logs */
1713                 else if(error != 0) {
1714                         log_err_addr("tcp connect", strerror(error),
1715                                 &c->repinfo.addr, c->repinfo.addrlen);
1716 #else /* USE_WINSOCK */
1717                 /* examine error */
1718                 if(error == WSAEINPROGRESS)
1719                         return 1;
1720                 else if(error == WSAEWOULDBLOCK) {
1721                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1722                         return 1;
1723                 } else if(error != 0 && verbosity < 2)
1724                         return 0;
1725                 else if(error != 0) {
1726                         log_err_addr("tcp connect", wsa_strerror(error),
1727                                 &c->repinfo.addr, c->repinfo.addrlen);
1728 #endif /* USE_WINSOCK */
1729                         return 0;
1730                 }
1731         }
1732         if(c->ssl)
1733                 return ssl_handle_it(c, 1);
1734
1735 #ifdef USE_MSG_FASTOPEN
1736         /* Only try this on first use of a connection that uses tfo, 
1737            otherwise fall through to normal write */
1738         /* Also, TFO support on WINDOWS not implemented at the moment */
1739         if(c->tcp_do_fastopen == 1) {
1740                 /* this form of sendmsg() does both a connect() and send() so need to
1741                    look for various flavours of error*/
1742                 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
1743                 struct msghdr msg;
1744                 struct iovec iov[2];
1745                 c->tcp_do_fastopen = 0;
1746                 memset(&msg, 0, sizeof(msg));
1747                 if(c->tcp_write_and_read) {
1748                         iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
1749                         iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
1750                         iov[1].iov_base = c->tcp_write_pkt;
1751                         iov[1].iov_len = c->tcp_write_pkt_len;
1752                 } else {
1753                         iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1754                         iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1755                         iov[1].iov_base = sldns_buffer_begin(buffer);
1756                         iov[1].iov_len = sldns_buffer_limit(buffer);
1757                 }
1758                 log_assert(iov[0].iov_len > 0);
1759                 msg.msg_name = &c->repinfo.addr;
1760                 msg.msg_namelen = c->repinfo.addrlen;
1761                 msg.msg_iov = iov;
1762                 msg.msg_iovlen = 2;
1763                 r = sendmsg(fd, &msg, MSG_FASTOPEN);
1764                 if (r == -1) {
1765 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1766                         /* Handshake is underway, maybe because no TFO cookie available.
1767                            Come back to write the message*/
1768                         if(errno == EINPROGRESS || errno == EWOULDBLOCK)
1769                                 return 1;
1770 #endif
1771                         if(errno == EINTR || errno == EAGAIN)
1772                                 return 1;
1773                         /* Not handling EISCONN here as shouldn't ever hit that case.*/
1774                         if(errno != EPIPE && errno != 0 && verbosity < 2)
1775                                 return 0; /* silence lots of chatter in the logs */
1776                         if(errno != EPIPE && errno != 0) {
1777                                 log_err_addr("tcp sendmsg", strerror(errno),
1778                                         &c->repinfo.addr, c->repinfo.addrlen);
1779                                 return 0;
1780                         }
1781                         /* fallthrough to nonFASTOPEN
1782                          * (MSG_FASTOPEN on Linux 3 produces EPIPE)
1783                          * we need to perform connect() */
1784                         if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) {
1785 #ifdef EINPROGRESS
1786                                 if(errno == EINPROGRESS)
1787                                         return 1; /* wait until connect done*/
1788 #endif
1789 #ifdef USE_WINSOCK
1790                                 if(WSAGetLastError() == WSAEINPROGRESS ||
1791                                         WSAGetLastError() == WSAEWOULDBLOCK)
1792                                         return 1; /* wait until connect done*/
1793 #endif
1794                                 if(tcp_connect_errno_needs_log(
1795                                         (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) {
1796                                         log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
1797                                                 strerror(errno), &c->repinfo.addr, c->repinfo.addrlen);
1798                                 }
1799                                 return 0;
1800                         }
1801
1802                 } else {
1803                         if(c->tcp_write_and_read) {
1804                                 c->tcp_write_byte_count += r;
1805                                 if(c->tcp_write_byte_count < sizeof(uint16_t))
1806                                         return 1;
1807                         } else {
1808                                 c->tcp_byte_count += r;
1809                                 if(c->tcp_byte_count < sizeof(uint16_t))
1810                                         return 1;
1811                                 sldns_buffer_set_position(buffer, c->tcp_byte_count -
1812                                         sizeof(uint16_t));
1813                         }
1814                         if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
1815                                 tcp_callback_writer(c);
1816                                 return 1;
1817                         }
1818                 }
1819         }
1820 #endif /* USE_MSG_FASTOPEN */
1821
1822         if((c->tcp_write_and_read?c->tcp_write_byte_count:c->tcp_byte_count) < sizeof(uint16_t)) {
1823                 uint16_t len = htons(c->tcp_write_and_read?c->tcp_write_pkt_len:sldns_buffer_limit(buffer));
1824 #ifdef HAVE_WRITEV
1825                 struct iovec iov[2];
1826                 if(c->tcp_write_and_read) {
1827                         iov[0].iov_base = (uint8_t*)&len + c->tcp_write_byte_count;
1828                         iov[0].iov_len = sizeof(uint16_t) - c->tcp_write_byte_count;
1829                         iov[1].iov_base = c->tcp_write_pkt;
1830                         iov[1].iov_len = c->tcp_write_pkt_len;
1831                 } else {
1832                         iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1833                         iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1834                         iov[1].iov_base = sldns_buffer_begin(buffer);
1835                         iov[1].iov_len = sldns_buffer_limit(buffer);
1836                 }
1837                 log_assert(iov[0].iov_len > 0);
1838                 r = writev(fd, iov, 2);
1839 #else /* HAVE_WRITEV */
1840                 if(c->tcp_write_and_read) {
1841                         r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_write_byte_count),
1842                                 sizeof(uint16_t)-c->tcp_write_byte_count, 0);
1843                 } else {
1844                         r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1845                                 sizeof(uint16_t)-c->tcp_byte_count, 0);
1846                 }
1847 #endif /* HAVE_WRITEV */
1848                 if(r == -1) {
1849 #ifndef USE_WINSOCK
1850 #  ifdef EPIPE
1851                         if(errno == EPIPE && verbosity < 2)
1852                                 return 0; /* silence 'broken pipe' */
1853   #endif
1854                         if(errno == EINTR || errno == EAGAIN)
1855                                 return 1;
1856 #ifdef ECONNRESET
1857                         if(errno == ECONNRESET && verbosity < 2)
1858                                 return 0; /* silence reset by peer */
1859 #endif
1860 #  ifdef HAVE_WRITEV
1861                         log_err_addr("tcp writev", strerror(errno),
1862                                 &c->repinfo.addr, c->repinfo.addrlen);
1863 #  else /* HAVE_WRITEV */
1864                         log_err_addr("tcp send s", strerror(errno),
1865                                 &c->repinfo.addr, c->repinfo.addrlen);
1866 #  endif /* HAVE_WRITEV */
1867 #else
1868                         if(WSAGetLastError() == WSAENOTCONN)
1869                                 return 1;
1870                         if(WSAGetLastError() == WSAEINPROGRESS)
1871                                 return 1;
1872                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1873                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1874                                         UB_EV_WRITE);
1875                                 return 1; 
1876                         }
1877                         if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
1878                                 return 0; /* silence reset by peer */
1879                         log_err_addr("tcp send s",
1880                                 wsa_strerror(WSAGetLastError()),
1881                                 &c->repinfo.addr, c->repinfo.addrlen);
1882 #endif
1883                         return 0;
1884                 }
1885                 if(c->tcp_write_and_read) {
1886                         c->tcp_write_byte_count += r;
1887                         if(c->tcp_write_byte_count < sizeof(uint16_t))
1888                                 return 1;
1889                 } else {
1890                         c->tcp_byte_count += r;
1891                         if(c->tcp_byte_count < sizeof(uint16_t))
1892                                 return 1;
1893                         sldns_buffer_set_position(buffer, c->tcp_byte_count -
1894                                 sizeof(uint16_t));
1895                 }
1896                 if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
1897                         tcp_callback_writer(c);
1898                         return 1;
1899                 }
1900         }
1901         log_assert(c->tcp_write_and_read || sldns_buffer_remaining(buffer) > 0);
1902         log_assert(!c->tcp_write_and_read || c->tcp_write_byte_count < c->tcp_write_pkt_len + 2);
1903         if(c->tcp_write_and_read) {
1904                 r = send(fd, (void*)c->tcp_write_pkt + c->tcp_write_byte_count - 2,
1905                         c->tcp_write_pkt_len + 2 - c->tcp_write_byte_count, 0);
1906         } else {
1907                 r = send(fd, (void*)sldns_buffer_current(buffer),
1908                         sldns_buffer_remaining(buffer), 0);
1909         }
1910         if(r == -1) {
1911 #ifndef USE_WINSOCK
1912                 if(errno == EINTR || errno == EAGAIN)
1913                         return 1;
1914 #ifdef ECONNRESET
1915                 if(errno == ECONNRESET && verbosity < 2)
1916                         return 0; /* silence reset by peer */
1917 #endif
1918 #else
1919                 if(WSAGetLastError() == WSAEINPROGRESS)
1920                         return 1;
1921                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1922                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1923                         return 1; 
1924                 }
1925                 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
1926                         return 0; /* silence reset by peer */
1927 #endif
1928                 log_err_addr("tcp send r", sock_strerror(errno),
1929                         &c->repinfo.addr, c->repinfo.addrlen);
1930                 return 0;
1931         }
1932         if(c->tcp_write_and_read) {
1933                 c->tcp_write_byte_count += r;
1934         } else {
1935                 sldns_buffer_skip(buffer, r);
1936         }
1937
1938         if((!c->tcp_write_and_read && sldns_buffer_remaining(buffer) == 0) || (c->tcp_write_and_read && c->tcp_write_byte_count == c->tcp_write_pkt_len + 2)) {
1939                 tcp_callback_writer(c);
1940         }
1941         
1942         return 1;
1943 }
1944
1945 /** read again to drain buffers when there could be more to read */
1946 static void
1947 tcp_req_info_read_again(int fd, struct comm_point* c)
1948 {
1949         while(c->tcp_req_info->read_again) {
1950                 int r;
1951                 c->tcp_req_info->read_again = 0;
1952                 if(c->tcp_is_reading)
1953                         r = comm_point_tcp_handle_read(fd, c, 0);
1954                 else    r = comm_point_tcp_handle_write(fd, c);
1955                 if(!r) {
1956                         reclaim_tcp_handler(c);
1957                         if(!c->tcp_do_close) {
1958                                 fptr_ok(fptr_whitelist_comm_point(
1959                                         c->callback));
1960                                 (void)(*c->callback)(c, c->cb_arg, 
1961                                         NETEVENT_CLOSED, NULL);
1962                         }
1963                         return;
1964                 }
1965         }
1966 }
1967
1968 /** read again to drain buffers when there could be more to read */
1969 static void
1970 tcp_more_read_again(int fd, struct comm_point* c)
1971 {
1972         /* if the packet is done, but another one could be waiting on
1973          * the connection, the callback signals this, and we try again */
1974         /* this continues until the read routines get EAGAIN or so,
1975          * and thus does not call the callback, and the bool is 0 */
1976         int* moreread = c->tcp_more_read_again;
1977         while(moreread && *moreread) {
1978                 *moreread = 0;
1979                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1980                         reclaim_tcp_handler(c);
1981                         if(!c->tcp_do_close) {
1982                                 fptr_ok(fptr_whitelist_comm_point(
1983                                         c->callback));
1984                                 (void)(*c->callback)(c, c->cb_arg,
1985                                         NETEVENT_CLOSED, NULL);
1986                         }
1987                         return;
1988                 }
1989         }
1990 }
1991
1992 /** write again to fill up when there could be more to write */
1993 static void
1994 tcp_more_write_again(int fd, struct comm_point* c)
1995 {
1996         /* if the packet is done, but another is waiting to be written,
1997          * the callback signals it and we try again. */
1998         /* this continues until the write routines get EAGAIN or so,
1999          * and thus does not call the callback, and the bool is 0 */
2000         int* morewrite = c->tcp_more_write_again;
2001         while(morewrite && *morewrite) {
2002                 *morewrite = 0;
2003                 if(!comm_point_tcp_handle_write(fd, c)) {
2004                         reclaim_tcp_handler(c);
2005                         if(!c->tcp_do_close) {
2006                                 fptr_ok(fptr_whitelist_comm_point(
2007                                         c->callback));
2008                                 (void)(*c->callback)(c, c->cb_arg,
2009                                         NETEVENT_CLOSED, NULL);
2010                         }
2011                         return;
2012                 }
2013         }
2014 }
2015
2016 void 
2017 comm_point_tcp_handle_callback(int fd, short event, void* arg)
2018 {
2019         struct comm_point* c = (struct comm_point*)arg;
2020         log_assert(c->type == comm_tcp);
2021         ub_comm_base_now(c->ev->base);
2022
2023 #ifdef USE_DNSCRYPT
2024         /* Initialize if this is a dnscrypt socket */
2025         if(c->tcp_parent) {
2026                 c->dnscrypt = c->tcp_parent->dnscrypt;
2027         }
2028         if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
2029                 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
2030                 if(!c->dnscrypt_buffer) {
2031                         log_err("Could not allocate dnscrypt buffer");
2032                         reclaim_tcp_handler(c);
2033                         if(!c->tcp_do_close) {
2034                                 fptr_ok(fptr_whitelist_comm_point(
2035                                         c->callback));
2036                                 (void)(*c->callback)(c, c->cb_arg,
2037                                         NETEVENT_CLOSED, NULL);
2038                         }
2039                         return;
2040                 }
2041         }
2042 #endif
2043
2044         if(event&UB_EV_TIMEOUT) {
2045                 verbose(VERB_QUERY, "tcp took too long, dropped");
2046                 reclaim_tcp_handler(c);
2047                 if(!c->tcp_do_close) {
2048                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2049                         (void)(*c->callback)(c, c->cb_arg,
2050                                 NETEVENT_TIMEOUT, NULL);
2051                 }
2052                 return;
2053         }
2054         if(event&UB_EV_READ
2055 #ifdef USE_MSG_FASTOPEN
2056                 && !(c->tcp_do_fastopen && (event&UB_EV_WRITE))
2057 #endif
2058                 ) {
2059                 int has_tcpq = (c->tcp_req_info != NULL);
2060                 int* moreread = c->tcp_more_read_again;
2061                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
2062                         reclaim_tcp_handler(c);
2063                         if(!c->tcp_do_close) {
2064                                 fptr_ok(fptr_whitelist_comm_point(
2065                                         c->callback));
2066                                 (void)(*c->callback)(c, c->cb_arg,
2067                                         NETEVENT_CLOSED, NULL);
2068                         }
2069                         return;
2070                 }
2071                 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again)
2072                         tcp_req_info_read_again(fd, c);
2073                 if(moreread && *moreread)
2074                         tcp_more_read_again(fd, c);
2075                 return;
2076         }
2077         if(event&UB_EV_WRITE) {
2078                 int has_tcpq = (c->tcp_req_info != NULL);
2079                 int* morewrite = c->tcp_more_write_again;
2080                 if(!comm_point_tcp_handle_write(fd, c)) {
2081                         reclaim_tcp_handler(c);
2082                         if(!c->tcp_do_close) {
2083                                 fptr_ok(fptr_whitelist_comm_point(
2084                                         c->callback));
2085                                 (void)(*c->callback)(c, c->cb_arg,
2086                                         NETEVENT_CLOSED, NULL);
2087                         }
2088                         return;
2089                 }
2090                 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again)
2091                         tcp_req_info_read_again(fd, c);
2092                 if(morewrite && *morewrite)
2093                         tcp_more_write_again(fd, c);
2094                 return;
2095         }
2096         log_err("Ignored event %d for tcphdl.", event);
2097 }
2098
2099 /** Make http handler free for next assignment */
2100 static void
2101 reclaim_http_handler(struct comm_point* c)
2102 {
2103         log_assert(c->type == comm_http);
2104         if(c->ssl) {
2105 #ifdef HAVE_SSL
2106                 SSL_shutdown(c->ssl);
2107                 SSL_free(c->ssl);
2108                 c->ssl = NULL;
2109 #endif
2110         }
2111         comm_point_close(c);
2112         if(c->tcp_parent) {
2113                 c->tcp_parent->cur_tcp_count--;
2114                 c->tcp_free = c->tcp_parent->tcp_free;
2115                 c->tcp_parent->tcp_free = c;
2116                 if(!c->tcp_free) {
2117                         /* re-enable listening on accept socket */
2118                         comm_point_start_listening(c->tcp_parent, -1, -1);
2119                 }
2120         }
2121 }
2122
2123 /** read more data for http (with ssl) */
2124 static int
2125 ssl_http_read_more(struct comm_point* c)
2126 {
2127 #ifdef HAVE_SSL
2128         int r;
2129         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2130         ERR_clear_error();
2131         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
2132                 (int)sldns_buffer_remaining(c->buffer));
2133         if(r <= 0) {
2134                 int want = SSL_get_error(c->ssl, r);
2135                 if(want == SSL_ERROR_ZERO_RETURN) {
2136                         return 0; /* shutdown, closed */
2137                 } else if(want == SSL_ERROR_WANT_READ) {
2138                         return 1; /* read more later */
2139                 } else if(want == SSL_ERROR_WANT_WRITE) {
2140                         c->ssl_shake_state = comm_ssl_shake_hs_write;
2141                         comm_point_listen_for_rw(c, 0, 1);
2142                         return 1;
2143                 } else if(want == SSL_ERROR_SYSCALL) {
2144 #ifdef ECONNRESET
2145                         if(errno == ECONNRESET && verbosity < 2)
2146                                 return 0; /* silence reset by peer */
2147 #endif
2148                         if(errno != 0)
2149                                 log_err("SSL_read syscall: %s",
2150                                         strerror(errno));
2151                         return 0;
2152                 }
2153                 log_crypto_err("could not SSL_read");
2154                 return 0;
2155         }
2156         sldns_buffer_skip(c->buffer, (ssize_t)r);
2157         return 1;
2158 #else
2159         (void)c;
2160         return 0;
2161 #endif /* HAVE_SSL */
2162 }
2163
2164 /** read more data for http */
2165 static int
2166 http_read_more(int fd, struct comm_point* c)
2167 {
2168         ssize_t r;
2169         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2170         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
2171                 sldns_buffer_remaining(c->buffer), 0);
2172         if(r == 0) {
2173                 return 0;
2174         } else if(r == -1) {
2175 #ifndef USE_WINSOCK
2176                 if(errno == EINTR || errno == EAGAIN)
2177                         return 1;
2178 #else /* USE_WINSOCK */
2179                 if(WSAGetLastError() == WSAECONNRESET)
2180                         return 0;
2181                 if(WSAGetLastError() == WSAEINPROGRESS)
2182                         return 1;
2183                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2184                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
2185                         return 1;
2186                 }
2187 #endif
2188                 log_err_addr("read (in http r)", sock_strerror(errno),
2189                         &c->repinfo.addr, c->repinfo.addrlen);
2190                 return 0;
2191         }
2192         sldns_buffer_skip(c->buffer, r);
2193         return 1;
2194 }
2195
2196 /** return true if http header has been read (one line complete) */
2197 static int
2198 http_header_done(sldns_buffer* buf)
2199 {
2200         size_t i;
2201         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
2202                 /* there was a \r before the \n, but we ignore that */
2203                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
2204                         return 1;
2205         }
2206         return 0;
2207 }
2208
2209 /** return character string into buffer for header line, moves buffer
2210  * past that line and puts zero terminator into linefeed-newline */
2211 static char*
2212 http_header_line(sldns_buffer* buf)
2213 {
2214         char* result = (char*)sldns_buffer_current(buf);
2215         size_t i;
2216         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
2217                 /* terminate the string on the \r */
2218                 if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
2219                         sldns_buffer_write_u8_at(buf, i, 0);
2220                 /* terminate on the \n and skip past the it and done */
2221                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
2222                         sldns_buffer_write_u8_at(buf, i, 0);
2223                         sldns_buffer_set_position(buf, i+1);
2224                         return result;
2225                 }
2226         }
2227         return NULL;
2228 }
2229
2230 /** move unread buffer to start and clear rest for putting the rest into it */
2231 static void
2232 http_moveover_buffer(sldns_buffer* buf)
2233 {
2234         size_t pos = sldns_buffer_position(buf);
2235         size_t len = sldns_buffer_remaining(buf);
2236         sldns_buffer_clear(buf);
2237         memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
2238         sldns_buffer_set_position(buf, len);
2239 }
2240
2241 /** a http header is complete, process it */
2242 static int
2243 http_process_initial_header(struct comm_point* c)
2244 {
2245         char* line = http_header_line(c->buffer);
2246         if(!line) return 1;
2247         verbose(VERB_ALGO, "http header: %s", line);
2248         if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
2249                 /* check returncode */
2250                 if(line[9] != '2') {
2251                         verbose(VERB_ALGO, "http bad status %s", line+9);
2252                         return 0;
2253                 }
2254         } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
2255                 if(!c->http_is_chunked)
2256                         c->tcp_byte_count = (size_t)atoi(line+16);
2257         } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
2258                 c->tcp_byte_count = 0;
2259                 c->http_is_chunked = 1;
2260         } else if(line[0] == 0) {
2261                 /* end of initial headers */
2262                 c->http_in_headers = 0;
2263                 if(c->http_is_chunked)
2264                         c->http_in_chunk_headers = 1;
2265                 /* remove header text from front of buffer
2266                  * the buffer is going to be used to return the data segment
2267                  * itself and we don't want the header to get returned
2268                  * prepended with it */
2269                 http_moveover_buffer(c->buffer);
2270                 sldns_buffer_flip(c->buffer);
2271                 return 1;
2272         }
2273         /* ignore other headers */
2274         return 1;
2275 }
2276
2277 /** a chunk header is complete, process it, return 0=fail, 1=continue next
2278  * header line, 2=done with chunked transfer*/
2279 static int
2280 http_process_chunk_header(struct comm_point* c)
2281 {
2282         char* line = http_header_line(c->buffer);
2283         if(!line) return 1;
2284         if(c->http_in_chunk_headers == 3) {
2285                 verbose(VERB_ALGO, "http chunk trailer: %s", line);
2286                 /* are we done ? */
2287                 if(line[0] == 0 && c->tcp_byte_count == 0) {
2288                         /* callback of http reader when NETEVENT_DONE,
2289                          * end of data, with no data in buffer */
2290                         sldns_buffer_set_position(c->buffer, 0);
2291                         sldns_buffer_set_limit(c->buffer, 0);
2292                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2293                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
2294                         /* return that we are done */
2295                         return 2;
2296                 }
2297                 if(line[0] == 0) {
2298                         /* continue with header of the next chunk */
2299                         c->http_in_chunk_headers = 1;
2300                         /* remove header text from front of buffer */
2301                         http_moveover_buffer(c->buffer);
2302                         sldns_buffer_flip(c->buffer);
2303                         return 1;
2304                 }
2305                 /* ignore further trail headers */
2306                 return 1;
2307         }
2308         verbose(VERB_ALGO, "http chunk header: %s", line);
2309         if(c->http_in_chunk_headers == 1) {
2310                 /* read chunked start line */
2311                 char* end = NULL;
2312                 c->tcp_byte_count = (size_t)strtol(line, &end, 16);
2313                 if(end == line)
2314                         return 0;
2315                 c->http_in_chunk_headers = 0;
2316                 /* remove header text from front of buffer */
2317                 http_moveover_buffer(c->buffer);
2318                 sldns_buffer_flip(c->buffer);
2319                 if(c->tcp_byte_count == 0) {
2320                         /* done with chunks, process chunk_trailer lines */
2321                         c->http_in_chunk_headers = 3;
2322                 }
2323                 return 1;
2324         }
2325         /* ignore other headers */
2326         return 1;
2327 }
2328
2329 /** handle nonchunked data segment */
2330 static int
2331 http_nonchunk_segment(struct comm_point* c)
2332 {
2333         /* c->buffer at position..limit has new data we read in.
2334          * the buffer itself is full of nonchunked data.
2335          * we are looking to read tcp_byte_count more data
2336          * and then the transfer is done. */
2337         size_t remainbufferlen;
2338         size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
2339         if(c->tcp_byte_count <= got_now) {
2340                 /* done, this is the last data fragment */
2341                 c->http_stored = 0;
2342                 sldns_buffer_set_position(c->buffer, 0);
2343                 fptr_ok(fptr_whitelist_comm_point(c->callback));
2344                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
2345                 return 1;
2346         }
2347         c->tcp_byte_count -= got_now;
2348         /* if we have the buffer space,
2349          * read more data collected into the buffer */
2350         remainbufferlen = sldns_buffer_capacity(c->buffer) -
2351                 sldns_buffer_limit(c->buffer);
2352         if(remainbufferlen >= c->tcp_byte_count ||
2353                 remainbufferlen >= 2048) {
2354                 size_t total = sldns_buffer_limit(c->buffer);
2355                 sldns_buffer_clear(c->buffer);
2356                 sldns_buffer_set_position(c->buffer, total);
2357                 c->http_stored = total;
2358                 /* return and wait to read more */
2359                 return 1;
2360         }
2361         /* call callback with this data amount, then
2362          * wait for more */
2363         c->http_stored = 0;
2364         sldns_buffer_set_position(c->buffer, 0);
2365         fptr_ok(fptr_whitelist_comm_point(c->callback));
2366         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2367         /* c->callback has to buffer_clear(c->buffer). */
2368         /* return and wait to read more */
2369         return 1;
2370 }
2371
2372 /** handle nonchunked data segment, return 0=fail, 1=wait, 2=process more */
2373 static int
2374 http_chunked_segment(struct comm_point* c)
2375 {
2376         /* the c->buffer has from position..limit new data we read. */
2377         /* the current chunk has length tcp_byte_count.
2378          * once we read that read more chunk headers.
2379          */
2380         size_t remainbufferlen;
2381         size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
2382         if(c->tcp_byte_count <= got_now) {
2383                 /* the chunk has completed (with perhaps some extra data
2384                  * from next chunk header and next chunk) */
2385                 /* save too much info into temp buffer */
2386                 size_t fraglen;
2387                 struct comm_reply repinfo;
2388                 c->http_stored = 0;
2389                 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
2390                 sldns_buffer_clear(c->http_temp);
2391                 sldns_buffer_write(c->http_temp,
2392                         sldns_buffer_current(c->buffer),
2393                         sldns_buffer_remaining(c->buffer));
2394                 sldns_buffer_flip(c->http_temp);
2395
2396                 /* callback with this fragment */
2397                 fraglen = sldns_buffer_position(c->buffer);
2398                 sldns_buffer_set_position(c->buffer, 0);
2399                 sldns_buffer_set_limit(c->buffer, fraglen);
2400                 repinfo = c->repinfo;
2401                 fptr_ok(fptr_whitelist_comm_point(c->callback));
2402                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
2403                 /* c->callback has to buffer_clear(). */
2404
2405                 /* is commpoint deleted? */
2406                 if(!repinfo.c) {
2407                         return 1;
2408                 }
2409                 /* copy waiting info */
2410                 sldns_buffer_clear(c->buffer);
2411                 sldns_buffer_write(c->buffer,
2412                         sldns_buffer_begin(c->http_temp),
2413                         sldns_buffer_remaining(c->http_temp));
2414                 sldns_buffer_flip(c->buffer);
2415                 /* process end of chunk trailer header lines, until
2416                  * an empty line */
2417                 c->http_in_chunk_headers = 3;
2418                 /* process more data in buffer (if any) */
2419                 return 2;
2420         }
2421         c->tcp_byte_count -= got_now;
2422
2423         /* if we have the buffer space,
2424          * read more data collected into the buffer */
2425         remainbufferlen = sldns_buffer_capacity(c->buffer) -
2426                 sldns_buffer_limit(c->buffer);
2427         if(remainbufferlen >= c->tcp_byte_count ||
2428                 remainbufferlen >= 2048) {
2429                 size_t total = sldns_buffer_limit(c->buffer);
2430                 sldns_buffer_clear(c->buffer);
2431                 sldns_buffer_set_position(c->buffer, total);
2432                 c->http_stored = total;
2433                 /* return and wait to read more */
2434                 return 1;
2435         }
2436         
2437         /* callback of http reader for a new part of the data */
2438         c->http_stored = 0;
2439         sldns_buffer_set_position(c->buffer, 0);
2440         fptr_ok(fptr_whitelist_comm_point(c->callback));
2441         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2442         /* c->callback has to buffer_clear(c->buffer). */
2443         /* return and wait to read more */
2444         return 1;
2445 }
2446
2447 #ifdef HAVE_NGHTTP2
2448 /** Create new http2 session. Called when creating handling comm point. */
2449 struct http2_session* http2_session_create(struct comm_point* c)
2450 {
2451         struct http2_session* session = calloc(1, sizeof(*session));
2452         if(!session) {
2453                 log_err("malloc failure while creating http2 session");
2454                 return NULL;
2455         }
2456         session->c = c;
2457
2458         return session;
2459 }
2460 #endif
2461
2462 /** Delete http2 session. After closing connection or on error */
2463 void http2_session_delete(struct http2_session* h2_session)
2464 {
2465 #ifdef HAVE_NGHTTP2
2466         if(h2_session->callbacks)
2467                 nghttp2_session_callbacks_del(h2_session->callbacks);
2468         free(h2_session);
2469 #else
2470         (void)h2_session;
2471 #endif
2472 }
2473
2474 #ifdef HAVE_NGHTTP2
2475 struct http2_stream* http2_stream_create(int32_t stream_id)
2476 {
2477         struct http2_stream* h2_stream = calloc(1, sizeof(*h2_stream));
2478         if(!h2_stream) {
2479                 log_err("malloc failure while creating http2 stream");
2480                 return NULL;
2481         }
2482         h2_stream->stream_id = stream_id;
2483         return h2_stream;
2484 }
2485
2486 /** Delete http2 stream. After session delete or stream close callback */
2487 static void http2_stream_delete(struct http2_session* h2_session,
2488         struct http2_stream* h2_stream)
2489 {
2490         if(h2_stream->mesh_state) {
2491                 mesh_state_remove_reply(h2_stream->mesh, h2_stream->mesh_state,
2492                         h2_session->c);
2493                 h2_stream->mesh_state = NULL;
2494         }
2495         http2_req_stream_clear(h2_stream);
2496         free(h2_stream);
2497 }
2498 #endif
2499
2500 void http2_stream_add_meshstate(struct http2_stream* h2_stream,
2501         struct mesh_area* mesh, struct mesh_state* m)
2502 {
2503         h2_stream->mesh = mesh;
2504         h2_stream->mesh_state = m;
2505 }
2506
2507 /** delete http2 session server. After closing connection. */
2508 static void http2_session_server_delete(struct http2_session* h2_session)
2509 {
2510 #ifdef HAVE_NGHTTP2
2511         struct http2_stream* h2_stream, *next;
2512         nghttp2_session_del(h2_session->session); /* NULL input is fine */
2513         h2_session->session = NULL;
2514         for(h2_stream = h2_session->first_stream; h2_stream;) {
2515                 next = h2_stream->next;
2516                 http2_stream_delete(h2_session, h2_stream);
2517                 h2_stream = next;
2518         }
2519         h2_session->first_stream = NULL;
2520         h2_session->is_drop = 0;
2521         h2_session->postpone_drop = 0;
2522         h2_session->c->h2_stream = NULL;
2523 #endif
2524         (void)h2_session;
2525 }
2526
2527 #ifdef HAVE_NGHTTP2
2528 void http2_session_add_stream(struct http2_session* h2_session,
2529         struct http2_stream* h2_stream)
2530 {
2531         if(h2_session->first_stream)
2532                 h2_session->first_stream->prev = h2_stream;
2533         h2_stream->next = h2_session->first_stream;
2534         h2_session->first_stream = h2_stream;
2535 }
2536
2537 /** remove stream from session linked list. After stream close callback or
2538  * closing connection */
2539 void http2_session_remove_stream(struct http2_session* h2_session,
2540         struct http2_stream* h2_stream)
2541 {
2542         if(h2_stream->prev)
2543                 h2_stream->prev->next = h2_stream->next;
2544         else
2545                 h2_session->first_stream = h2_stream->next;
2546         if(h2_stream->next)
2547                 h2_stream->next->prev = h2_stream->prev;
2548
2549 }
2550
2551 int http2_stream_close_cb(nghttp2_session* ATTR_UNUSED(session),
2552         int32_t stream_id, uint32_t ATTR_UNUSED(error_code), void* cb_arg)
2553 {
2554         struct http2_stream* h2_stream;
2555         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2556         if(!(h2_stream = nghttp2_session_get_stream_user_data(
2557                 h2_session->session, stream_id))) {
2558                 return 0;
2559         }
2560         http2_session_remove_stream(h2_session, h2_stream);
2561         http2_stream_delete(h2_session, h2_stream);
2562         return 0;
2563 }
2564
2565 ssize_t http2_recv_cb(nghttp2_session* ATTR_UNUSED(session), uint8_t* buf,
2566         size_t len, int ATTR_UNUSED(flags), void* cb_arg)
2567 {
2568         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2569         ssize_t ret;
2570
2571         log_assert(h2_session->c->type == comm_http);
2572         log_assert(h2_session->c->h2_session);
2573
2574 #ifdef HAVE_SSL
2575         if(h2_session->c->ssl) {
2576                 int r;
2577                 ERR_clear_error();
2578                 r = SSL_read(h2_session->c->ssl, buf, len);
2579                 if(r <= 0) {
2580                         int want = SSL_get_error(h2_session->c->ssl, r);
2581                         if(want == SSL_ERROR_ZERO_RETURN) {
2582                                 return NGHTTP2_ERR_EOF;
2583                         } else if(want == SSL_ERROR_WANT_READ) {
2584                                 return NGHTTP2_ERR_WOULDBLOCK;
2585                         } else if(want == SSL_ERROR_WANT_WRITE) {
2586                                 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_write;
2587                                 comm_point_listen_for_rw(h2_session->c, 0, 1);
2588                                 return NGHTTP2_ERR_WOULDBLOCK;
2589                         } else if(want == SSL_ERROR_SYSCALL) {
2590 #ifdef ECONNRESET
2591                                 if(errno == ECONNRESET && verbosity < 2)
2592                                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2593 #endif
2594                                 if(errno != 0)
2595                                         log_err("SSL_read syscall: %s",
2596                                                 strerror(errno));
2597                                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2598                         }
2599                         log_crypto_err("could not SSL_read");
2600                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2601                 }
2602                 return r;
2603         }
2604 #endif /* HAVE_SSL */
2605
2606         ret = recv(h2_session->c->fd, buf, len, 0);
2607         if(ret == 0) {
2608                 return NGHTTP2_ERR_EOF;
2609         } else if(ret < 0) {
2610 #ifndef USE_WINSOCK
2611                 if(errno == EINTR || errno == EAGAIN)
2612                         return NGHTTP2_ERR_WOULDBLOCK;
2613 #ifdef ECONNRESET
2614                 if(errno == ECONNRESET && verbosity < 2)
2615                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2616 #endif
2617                 log_err_addr("could not http2 recv: %s", strerror(errno),
2618                         &h2_session->c->repinfo.addr,
2619                         h2_session->c->repinfo.addrlen);
2620 #else /* USE_WINSOCK */
2621                 if(WSAGetLastError() == WSAECONNRESET)
2622                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2623                 if(WSAGetLastError() == WSAEINPROGRESS)
2624                         return NGHTTP2_ERR_WOULDBLOCK;
2625                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2626                         ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
2627                                 UB_EV_READ);
2628                         return NGHTTP2_ERR_WOULDBLOCK;
2629                 }
2630                 log_err_addr("could not http2 recv: %s",
2631                         wsa_strerror(WSAGetLastError()),
2632                         &h2_session->c->repinfo.addr,
2633                         h2_session->c->repinfo.addrlen);
2634 #endif
2635                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2636         }
2637         return ret;
2638 }
2639 #endif /* HAVE_NGHTTP2 */
2640
2641 /** Handle http2 read */
2642 static int
2643 comm_point_http2_handle_read(int ATTR_UNUSED(fd), struct comm_point* c)
2644 {
2645 #ifdef HAVE_NGHTTP2
2646         int ret;
2647         log_assert(c->h2_session);
2648
2649         /* reading until recv cb returns NGHTTP2_ERR_WOULDBLOCK */
2650         ret = nghttp2_session_recv(c->h2_session->session);
2651         if(ret) {
2652                 if(ret != NGHTTP2_ERR_EOF &&
2653                         ret != NGHTTP2_ERR_CALLBACK_FAILURE) {
2654                         char a[256];
2655                         addr_to_str(&c->repinfo.addr, c->repinfo.addrlen,
2656                                 a, sizeof(a));
2657                         verbose(VERB_QUERY, "http2: session_recv from %s failed, "
2658                                 "error: %s", a, nghttp2_strerror(ret));
2659                 }
2660                 return 0;
2661         }
2662         if(nghttp2_session_want_write(c->h2_session->session)) {
2663                 c->tcp_is_reading = 0;
2664                 comm_point_stop_listening(c);
2665                 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
2666         } else if(!nghttp2_session_want_read(c->h2_session->session))
2667                 return 0; /* connection can be closed */
2668         return 1;
2669 #else
2670         (void)c;
2671         return 0;
2672 #endif
2673 }
2674
2675 /**
2676  * Handle http reading callback.
2677  * @param fd: file descriptor of socket.
2678  * @param c: comm point to read from into buffer.
2679  * @return: 0 on error
2680  */
2681 static int
2682 comm_point_http_handle_read(int fd, struct comm_point* c)
2683 {
2684         log_assert(c->type == comm_http);
2685         log_assert(fd != -1);
2686
2687         /* if we are in ssl handshake, handle SSL handshake */
2688 #ifdef HAVE_SSL
2689         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2690                 if(!ssl_handshake(c))
2691                         return 0;
2692                 if(c->ssl_shake_state != comm_ssl_shake_none)
2693                         return 1;
2694         }
2695 #endif /* HAVE_SSL */
2696
2697         if(!c->tcp_is_reading)
2698                 return 1;
2699
2700         if(c->use_h2) {
2701                 return comm_point_http2_handle_read(fd, c);
2702         }
2703
2704         /* http version is <= http/1.1 */
2705
2706         if(c->http_min_version >= http_version_2) {
2707                 /* HTTP/2 failed, not allowed to use lower version. */
2708                 return 0;
2709         }
2710
2711         /* read more data */
2712         if(c->ssl) {
2713                 if(!ssl_http_read_more(c))
2714                         return 0;
2715         } else {
2716                 if(!http_read_more(fd, c))
2717                         return 0;
2718         }
2719
2720         sldns_buffer_flip(c->buffer);
2721
2722         while(sldns_buffer_remaining(c->buffer) > 0) {
2723                 /* Handle HTTP/1.x data */
2724                 /* if we are reading headers, read more headers */
2725                 if(c->http_in_headers || c->http_in_chunk_headers) {
2726                         /* if header is done, process the header */
2727                         if(!http_header_done(c->buffer)) {
2728                                 /* copy remaining data to front of buffer
2729                                  * and set rest for writing into it */
2730                                 http_moveover_buffer(c->buffer);
2731                                 /* return and wait to read more */
2732                                 return 1;
2733                         }
2734                         if(!c->http_in_chunk_headers) {
2735                                 /* process initial headers */
2736                                 if(!http_process_initial_header(c))
2737                                         return 0;
2738                         } else {
2739                                 /* process chunk headers */
2740                                 int r = http_process_chunk_header(c);
2741                                 if(r == 0) return 0;
2742                                 if(r == 2) return 1; /* done */
2743                                 /* r == 1, continue */
2744                         }
2745                         /* see if we have more to process */
2746                         continue;
2747                 }
2748
2749                 if(!c->http_is_chunked) {
2750                         /* if we are reading nonchunks, process that*/
2751                         return http_nonchunk_segment(c);
2752                 } else {
2753                         /* if we are reading chunks, read the chunk */
2754                         int r = http_chunked_segment(c);
2755                         if(r == 0) return 0;
2756                         if(r == 1) return 1;
2757                         continue;
2758                 }
2759         }
2760         /* broke out of the loop; could not process header instead need
2761          * to read more */
2762         /* moveover any remaining data and read more data */
2763         http_moveover_buffer(c->buffer);
2764         /* return and wait to read more */
2765         return 1;
2766 }
2767
2768 /** check pending connect for http */
2769 static int
2770 http_check_connect(int fd, struct comm_point* c)
2771 {
2772         /* check for pending error from nonblocking connect */
2773         /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
2774         int error = 0;
2775         socklen_t len = (socklen_t)sizeof(error);
2776         if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
2777                 &len) < 0){
2778 #ifndef USE_WINSOCK
2779                 error = errno; /* on solaris errno is error */
2780 #else /* USE_WINSOCK */
2781                 error = WSAGetLastError();
2782 #endif
2783         }
2784 #ifndef USE_WINSOCK
2785 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
2786         if(error == EINPROGRESS || error == EWOULDBLOCK)
2787                 return 1; /* try again later */
2788         else
2789 #endif
2790         if(error != 0 && verbosity < 2)
2791                 return 0; /* silence lots of chatter in the logs */
2792         else if(error != 0) {
2793                 log_err_addr("http connect", strerror(error),
2794                         &c->repinfo.addr, c->repinfo.addrlen);
2795 #else /* USE_WINSOCK */
2796         /* examine error */
2797         if(error == WSAEINPROGRESS)
2798                 return 1;
2799         else if(error == WSAEWOULDBLOCK) {
2800                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2801                 return 1;
2802         } else if(error != 0 && verbosity < 2)
2803                 return 0;
2804         else if(error != 0) {
2805                 log_err_addr("http connect", wsa_strerror(error),
2806                         &c->repinfo.addr, c->repinfo.addrlen);
2807 #endif /* USE_WINSOCK */
2808                 return 0;
2809         }
2810         /* keep on processing this socket */
2811         return 2;
2812 }
2813
2814 /** write more data for http (with ssl) */
2815 static int
2816 ssl_http_write_more(struct comm_point* c)
2817 {
2818 #ifdef HAVE_SSL
2819         int r;
2820         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2821         ERR_clear_error();
2822         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
2823                 (int)sldns_buffer_remaining(c->buffer));
2824         if(r <= 0) {
2825                 int want = SSL_get_error(c->ssl, r);
2826                 if(want == SSL_ERROR_ZERO_RETURN) {
2827                         return 0; /* closed */
2828                 } else if(want == SSL_ERROR_WANT_READ) {
2829                         c->ssl_shake_state = comm_ssl_shake_hs_read;
2830                         comm_point_listen_for_rw(c, 1, 0);
2831                         return 1; /* wait for read condition */
2832                 } else if(want == SSL_ERROR_WANT_WRITE) {
2833                         return 1; /* write more later */
2834                 } else if(want == SSL_ERROR_SYSCALL) {
2835 #ifdef EPIPE
2836                         if(errno == EPIPE && verbosity < 2)
2837                                 return 0; /* silence 'broken pipe' */
2838 #endif
2839                         if(errno != 0)
2840                                 log_err("SSL_write syscall: %s",
2841                                         strerror(errno));
2842                         return 0;
2843                 }
2844                 log_crypto_err("could not SSL_write");
2845                 return 0;
2846         }
2847         sldns_buffer_skip(c->buffer, (ssize_t)r);
2848         return 1;
2849 #else
2850         (void)c;
2851         return 0;
2852 #endif /* HAVE_SSL */
2853 }
2854
2855 /** write more data for http */
2856 static int
2857 http_write_more(int fd, struct comm_point* c)
2858 {
2859         ssize_t r;
2860         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2861         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
2862                 sldns_buffer_remaining(c->buffer), 0);
2863         if(r == -1) {
2864 #ifndef USE_WINSOCK
2865                 if(errno == EINTR || errno == EAGAIN)
2866                         return 1;
2867 #else
2868                 if(WSAGetLastError() == WSAEINPROGRESS)
2869                         return 1;
2870                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2871                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2872                         return 1; 
2873                 }
2874 #endif
2875                 log_err_addr("http send r", sock_strerror(errno),
2876                         &c->repinfo.addr, c->repinfo.addrlen);
2877                 return 0;
2878         }
2879         sldns_buffer_skip(c->buffer, r);
2880         return 1;
2881 }
2882
2883 #ifdef HAVE_NGHTTP2
2884 ssize_t http2_send_cb(nghttp2_session* ATTR_UNUSED(session), const uint8_t* buf,
2885         size_t len, int ATTR_UNUSED(flags), void* cb_arg)
2886 {
2887         ssize_t ret;
2888         struct http2_session* h2_session = (struct http2_session*)cb_arg;
2889         log_assert(h2_session->c->type == comm_http);
2890         log_assert(h2_session->c->h2_session);
2891
2892 #ifdef HAVE_SSL
2893         if(h2_session->c->ssl) {
2894                 int r;
2895                 ERR_clear_error();
2896                 r = SSL_write(h2_session->c->ssl, buf, len);
2897                 if(r <= 0) {
2898                         int want = SSL_get_error(h2_session->c->ssl, r);
2899                         if(want == SSL_ERROR_ZERO_RETURN) {
2900                                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2901                         } else if(want == SSL_ERROR_WANT_READ) {
2902                                 h2_session->c->ssl_shake_state = comm_ssl_shake_hs_read;
2903                                 comm_point_listen_for_rw(h2_session->c, 1, 0);
2904                                 return NGHTTP2_ERR_WOULDBLOCK;
2905                         } else if(want == SSL_ERROR_WANT_WRITE) {
2906                                 return NGHTTP2_ERR_WOULDBLOCK;
2907                         } else if(want == SSL_ERROR_SYSCALL) {
2908 #ifdef EPIPE
2909                                 if(errno == EPIPE && verbosity < 2)
2910                                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2911 #endif
2912                                 if(errno != 0)
2913                                         log_err("SSL_write syscall: %s",
2914                                                 strerror(errno));
2915                                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2916                         }
2917                         log_crypto_err("could not SSL_write");
2918                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2919                 }
2920                 return r;
2921         }
2922 #endif /* HAVE_SSL */
2923
2924         ret = send(h2_session->c->fd, buf, len, 0);
2925         if(ret == 0) {
2926                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2927         } else if(ret < 0) {
2928 #ifndef USE_WINSOCK
2929                 if(errno == EINTR || errno == EAGAIN)
2930                         return NGHTTP2_ERR_WOULDBLOCK;
2931 #ifdef EPIPE
2932                 if(errno == EPIPE && verbosity < 2)
2933                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2934 #endif
2935 #ifdef ECONNRESET
2936                 if(errno == ECONNRESET && verbosity < 2)
2937                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2938 #endif
2939                 log_err_addr("could not http2 write: %s", strerror(errno),
2940                         &h2_session->c->repinfo.addr,
2941                         h2_session->c->repinfo.addrlen);
2942 #else /* USE_WINSOCK */
2943                 if(WSAGetLastError() == WSAENOTCONN)
2944                         return NGHTTP2_ERR_WOULDBLOCK;
2945                 if(WSAGetLastError() == WSAEINPROGRESS)
2946                         return NGHTTP2_ERR_WOULDBLOCK;
2947                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2948                         ub_winsock_tcp_wouldblock(h2_session->c->ev->ev,
2949                                 UB_EV_WRITE);
2950                         return NGHTTP2_ERR_WOULDBLOCK;
2951                 }
2952                 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
2953                         return NGHTTP2_ERR_CALLBACK_FAILURE;
2954                 log_err_addr("could not http2 write: %s",
2955                         wsa_strerror(WSAGetLastError()),
2956                         &h2_session->c->repinfo.addr,
2957                         h2_session->c->repinfo.addrlen);
2958 #endif
2959                 return NGHTTP2_ERR_CALLBACK_FAILURE;
2960         }
2961         return ret;
2962 }
2963 #endif /* HAVE_NGHTTP2 */
2964
2965 /** Handle http2 writing */
2966 static int
2967 comm_point_http2_handle_write(int ATTR_UNUSED(fd), struct comm_point* c)
2968 {
2969 #ifdef HAVE_NGHTTP2
2970         int ret;
2971         log_assert(c->h2_session);
2972
2973         ret = nghttp2_session_send(c->h2_session->session);
2974         if(ret) {
2975                 verbose(VERB_QUERY, "http2: session_send failed, "
2976                         "error: %s", nghttp2_strerror(ret));
2977                 return 0;
2978         }
2979
2980         if(nghttp2_session_want_read(c->h2_session->session)) {
2981                 c->tcp_is_reading = 1;
2982                 comm_point_stop_listening(c);
2983                 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
2984         } else if(!nghttp2_session_want_write(c->h2_session->session))
2985                 return 0; /* connection can be closed */
2986         return 1;
2987 #else
2988         (void)c;
2989         return 0;
2990 #endif
2991 }
2992
2993 /** 
2994  * Handle http writing callback. 
2995  * @param fd: file descriptor of socket.
2996  * @param c: comm point to write buffer out of.
2997  * @return: 0 on error
2998  */
2999 static int
3000 comm_point_http_handle_write(int fd, struct comm_point* c)
3001 {
3002         log_assert(c->type == comm_http);
3003         log_assert(fd != -1);
3004
3005         /* check pending connect errors, if that fails, we wait for more,
3006          * or we can continue to write contents */
3007         if(c->tcp_check_nb_connect) {
3008                 int r = http_check_connect(fd, c);
3009                 if(r == 0) return 0;
3010                 if(r == 1) return 1;
3011                 c->tcp_check_nb_connect = 0;
3012         }
3013         /* if we are in ssl handshake, handle SSL handshake */
3014 #ifdef HAVE_SSL
3015         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
3016                 if(!ssl_handshake(c))
3017                         return 0;
3018                 if(c->ssl_shake_state != comm_ssl_shake_none)
3019                         return 1;
3020         }
3021 #endif /* HAVE_SSL */
3022         if(c->tcp_is_reading)
3023                 return 1;
3024
3025         if(c->use_h2) {
3026                 return comm_point_http2_handle_write(fd, c);
3027         }
3028
3029         /* http version is <= http/1.1 */
3030
3031         if(c->http_min_version >= http_version_2) {
3032                 /* HTTP/2 failed, not allowed to use lower version. */
3033                 return 0;
3034         }
3035
3036         /* if we are writing, write more */
3037         if(c->ssl) {
3038                 if(!ssl_http_write_more(c))
3039                         return 0;
3040         } else {
3041                 if(!http_write_more(fd, c))
3042                         return 0;
3043         }
3044
3045         /* we write a single buffer contents, that can contain
3046          * the http request, and then flip to read the results */
3047         /* see if write is done */
3048         if(sldns_buffer_remaining(c->buffer) == 0) {
3049                 sldns_buffer_clear(c->buffer);
3050                 if(c->tcp_do_toggle_rw)
3051                         c->tcp_is_reading = 1;
3052                 c->tcp_byte_count = 0;
3053                 /* switch from listening(write) to listening(read) */
3054                 comm_point_stop_listening(c);
3055                 comm_point_start_listening(c, -1, -1);
3056         }
3057         return 1;
3058 }
3059
3060 void 
3061 comm_point_http_handle_callback(int fd, short event, void* arg)
3062 {
3063         struct comm_point* c = (struct comm_point*)arg;
3064         log_assert(c->type == comm_http);
3065         ub_comm_base_now(c->ev->base);
3066
3067         if(event&UB_EV_TIMEOUT) {
3068                 verbose(VERB_QUERY, "http took too long, dropped");
3069                 reclaim_http_handler(c);
3070                 if(!c->tcp_do_close) {
3071                         fptr_ok(fptr_whitelist_comm_point(c->callback));
3072                         (void)(*c->callback)(c, c->cb_arg,
3073                                 NETEVENT_TIMEOUT, NULL);
3074                 }
3075                 return;
3076         }
3077         if(event&UB_EV_READ) {
3078                 if(!comm_point_http_handle_read(fd, c)) {
3079                         reclaim_http_handler(c);
3080                         if(!c->tcp_do_close) {
3081                                 fptr_ok(fptr_whitelist_comm_point(
3082                                         c->callback));
3083                                 (void)(*c->callback)(c, c->cb_arg,
3084                                         NETEVENT_CLOSED, NULL);
3085                         }
3086                 }
3087                 return;
3088         }
3089         if(event&UB_EV_WRITE) {
3090                 if(!comm_point_http_handle_write(fd, c)) {
3091                         reclaim_http_handler(c);
3092                         if(!c->tcp_do_close) {
3093                                 fptr_ok(fptr_whitelist_comm_point(
3094                                         c->callback));
3095                                 (void)(*c->callback)(c, c->cb_arg,
3096                                         NETEVENT_CLOSED, NULL);
3097                         }
3098                 }
3099                 return;
3100         }
3101         log_err("Ignored event %d for httphdl.", event);
3102 }
3103
3104 void comm_point_local_handle_callback(int fd, short event, void* arg)
3105 {
3106         struct comm_point* c = (struct comm_point*)arg;
3107         log_assert(c->type == comm_local);
3108         ub_comm_base_now(c->ev->base);
3109
3110         if(event&UB_EV_READ) {
3111                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
3112                         fptr_ok(fptr_whitelist_comm_point(c->callback));
3113                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
3114                                 NULL);
3115                 }
3116                 return;
3117         }
3118         log_err("Ignored event %d for localhdl.", event);
3119 }
3120
3121 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
3122         short event, void* arg)
3123 {
3124         struct comm_point* c = (struct comm_point*)arg;
3125         int err = NETEVENT_NOERROR;
3126         log_assert(c->type == comm_raw);
3127         ub_comm_base_now(c->ev->base);
3128         
3129         if(event&UB_EV_TIMEOUT)
3130                 err = NETEVENT_TIMEOUT;
3131         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
3132         (void)(*c->callback)(c, c->cb_arg, err, NULL);
3133 }
3134
3135 struct comm_point* 
3136 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
3137         comm_point_callback_type* callback, void* callback_arg)
3138 {
3139         struct comm_point* c = (struct comm_point*)calloc(1,
3140                 sizeof(struct comm_point));
3141         short evbits;
3142         if(!c)
3143                 return NULL;
3144         c->ev = (struct internal_event*)calloc(1,
3145                 sizeof(struct internal_event));
3146         if(!c->ev) {
3147                 free(c);
3148                 return NULL;
3149         }
3150         c->ev->base = base;
3151         c->fd = fd;
3152         c->buffer = buffer;
3153         c->timeout = NULL;
3154         c->tcp_is_reading = 0;
3155         c->tcp_byte_count = 0;
3156         c->tcp_parent = NULL;
3157         c->max_tcp_count = 0;
3158         c->cur_tcp_count = 0;
3159         c->tcp_handlers = NULL;
3160         c->tcp_free = NULL;
3161         c->type = comm_udp;
3162         c->tcp_do_close = 0;
3163         c->do_not_close = 0;
3164         c->tcp_do_toggle_rw = 0;
3165         c->tcp_check_nb_connect = 0;
3166 #ifdef USE_MSG_FASTOPEN
3167         c->tcp_do_fastopen = 0;
3168 #endif
3169 #ifdef USE_DNSCRYPT
3170         c->dnscrypt = 0;
3171         c->dnscrypt_buffer = buffer;
3172 #endif
3173         c->inuse = 0;
3174         c->callback = callback;
3175         c->cb_arg = callback_arg;
3176         evbits = UB_EV_READ | UB_EV_PERSIST;
3177         /* ub_event stuff */
3178         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3179                 comm_point_udp_callback, c);
3180         if(c->ev->ev == NULL) {
3181                 log_err("could not baseset udp event");
3182                 comm_point_delete(c);
3183                 return NULL;
3184         }
3185         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
3186                 log_err("could not add udp event");
3187                 comm_point_delete(c);
3188                 return NULL;
3189         }
3190         return c;
3191 }
3192
3193 struct comm_point* 
3194 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
3195         sldns_buffer* buffer, 
3196         comm_point_callback_type* callback, void* callback_arg)
3197 {
3198         struct comm_point* c = (struct comm_point*)calloc(1,
3199                 sizeof(struct comm_point));
3200         short evbits;
3201         if(!c)
3202                 return NULL;
3203         c->ev = (struct internal_event*)calloc(1,
3204                 sizeof(struct internal_event));
3205         if(!c->ev) {
3206                 free(c);
3207                 return NULL;
3208         }
3209         c->ev->base = base;
3210         c->fd = fd;
3211         c->buffer = buffer;
3212         c->timeout = NULL;
3213         c->tcp_is_reading = 0;
3214         c->tcp_byte_count = 0;
3215         c->tcp_parent = NULL;
3216         c->max_tcp_count = 0;
3217         c->cur_tcp_count = 0;
3218         c->tcp_handlers = NULL;
3219         c->tcp_free = NULL;
3220         c->type = comm_udp;
3221         c->tcp_do_close = 0;
3222         c->do_not_close = 0;
3223 #ifdef USE_DNSCRYPT
3224         c->dnscrypt = 0;
3225         c->dnscrypt_buffer = buffer;
3226 #endif
3227         c->inuse = 0;
3228         c->tcp_do_toggle_rw = 0;
3229         c->tcp_check_nb_connect = 0;
3230 #ifdef USE_MSG_FASTOPEN
3231         c->tcp_do_fastopen = 0;
3232 #endif
3233         c->callback = callback;
3234         c->cb_arg = callback_arg;
3235         evbits = UB_EV_READ | UB_EV_PERSIST;
3236         /* ub_event stuff */
3237         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3238                 comm_point_udp_ancil_callback, c);
3239         if(c->ev->ev == NULL) {
3240                 log_err("could not baseset udp event");
3241                 comm_point_delete(c);
3242                 return NULL;
3243         }
3244         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
3245                 log_err("could not add udp event");
3246                 comm_point_delete(c);
3247                 return NULL;
3248         }
3249         return c;
3250 }
3251
3252 static struct comm_point* 
3253 comm_point_create_tcp_handler(struct comm_base *base, 
3254         struct comm_point* parent, size_t bufsize,
3255         struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
3256         void* callback_arg)
3257 {
3258         struct comm_point* c = (struct comm_point*)calloc(1,
3259                 sizeof(struct comm_point));
3260         short evbits;
3261         if(!c)
3262                 return NULL;
3263         c->ev = (struct internal_event*)calloc(1,
3264                 sizeof(struct internal_event));
3265         if(!c->ev) {
3266                 free(c);
3267                 return NULL;
3268         }
3269         c->ev->base = base;
3270         c->fd = -1;
3271         c->buffer = sldns_buffer_new(bufsize);
3272         if(!c->buffer) {
3273                 free(c->ev);
3274                 free(c);
3275                 return NULL;
3276         }
3277         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
3278         if(!c->timeout) {
3279                 sldns_buffer_free(c->buffer);
3280                 free(c->ev);
3281                 free(c);
3282                 return NULL;
3283         }
3284         c->tcp_is_reading = 0;
3285         c->tcp_byte_count = 0;
3286         c->tcp_parent = parent;
3287         c->tcp_timeout_msec = parent->tcp_timeout_msec;
3288         c->tcp_conn_limit = parent->tcp_conn_limit;
3289         c->tcl_addr = NULL;
3290         c->tcp_keepalive = 0;
3291         c->max_tcp_count = 0;
3292         c->cur_tcp_count = 0;
3293         c->tcp_handlers = NULL;
3294         c->tcp_free = NULL;
3295         c->type = comm_tcp;
3296         c->tcp_do_close = 0;
3297         c->do_not_close = 0;
3298         c->tcp_do_toggle_rw = 1;
3299         c->tcp_check_nb_connect = 0;
3300 #ifdef USE_MSG_FASTOPEN
3301         c->tcp_do_fastopen = 0;
3302 #endif
3303 #ifdef USE_DNSCRYPT
3304         c->dnscrypt = 0;
3305         /* We don't know just yet if this is a dnscrypt channel. Allocation
3306          * will be done when handling the callback. */
3307         c->dnscrypt_buffer = c->buffer;
3308 #endif
3309         c->repinfo.c = c;
3310         c->callback = callback;
3311         c->cb_arg = callback_arg;
3312         if(spoolbuf) {
3313                 c->tcp_req_info = tcp_req_info_create(spoolbuf);
3314                 if(!c->tcp_req_info) {
3315                         log_err("could not create tcp commpoint");
3316                         sldns_buffer_free(c->buffer);
3317                         free(c->timeout);
3318                         free(c->ev);
3319                         free(c);
3320                         return NULL;
3321                 }
3322                 c->tcp_req_info->cp = c;
3323                 c->tcp_do_close = 1;
3324                 c->tcp_do_toggle_rw = 0;
3325         }
3326         /* add to parent free list */
3327         c->tcp_free = parent->tcp_free;
3328         parent->tcp_free = c;
3329         /* ub_event stuff */
3330         evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
3331         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3332                 comm_point_tcp_handle_callback, c);
3333         if(c->ev->ev == NULL)
3334         {
3335                 log_err("could not basetset tcphdl event");
3336                 parent->tcp_free = c->tcp_free;
3337                 tcp_req_info_delete(c->tcp_req_info);
3338                 sldns_buffer_free(c->buffer);
3339                 free(c->timeout);
3340                 free(c->ev);
3341                 free(c);
3342                 return NULL;
3343         }
3344         return c;
3345 }
3346
3347 static struct comm_point* 
3348 comm_point_create_http_handler(struct comm_base *base, 
3349         struct comm_point* parent, size_t bufsize, int harden_large_queries,
3350         uint32_t http_max_streams, char* http_endpoint,
3351         comm_point_callback_type* callback, void* callback_arg)
3352 {
3353         struct comm_point* c = (struct comm_point*)calloc(1,
3354                 sizeof(struct comm_point));
3355         short evbits;
3356         if(!c)
3357                 return NULL;
3358         c->ev = (struct internal_event*)calloc(1,
3359                 sizeof(struct internal_event));
3360         if(!c->ev) {
3361                 free(c);
3362                 return NULL;
3363         }
3364         c->ev->base = base;
3365         c->fd = -1;
3366         c->buffer = sldns_buffer_new(bufsize);
3367         if(!c->buffer) {
3368                 free(c->ev);
3369                 free(c);
3370                 return NULL;
3371         }
3372         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
3373         if(!c->timeout) {
3374                 sldns_buffer_free(c->buffer);
3375                 free(c->ev);
3376                 free(c);
3377                 return NULL;
3378         }
3379         c->tcp_is_reading = 0;
3380         c->tcp_byte_count = 0;
3381         c->tcp_parent = parent;
3382         c->tcp_timeout_msec = parent->tcp_timeout_msec;
3383         c->tcp_conn_limit = parent->tcp_conn_limit;
3384         c->tcl_addr = NULL;
3385         c->tcp_keepalive = 0;
3386         c->max_tcp_count = 0;
3387         c->cur_tcp_count = 0;
3388         c->tcp_handlers = NULL;
3389         c->tcp_free = NULL;
3390         c->type = comm_http;
3391         c->tcp_do_close = 1;
3392         c->do_not_close = 0;
3393         c->tcp_do_toggle_rw = 1; /* will be set to 0 after http2 upgrade */
3394         c->tcp_check_nb_connect = 0;
3395 #ifdef USE_MSG_FASTOPEN
3396         c->tcp_do_fastopen = 0;
3397 #endif
3398 #ifdef USE_DNSCRYPT
3399         c->dnscrypt = 0;
3400         c->dnscrypt_buffer = NULL;
3401 #endif
3402         c->repinfo.c = c;
3403         c->callback = callback;
3404         c->cb_arg = callback_arg;
3405
3406         c->http_min_version = http_version_2;
3407         c->http2_stream_max_qbuffer_size = bufsize;
3408         if(harden_large_queries && bufsize > 512)
3409                 c->http2_stream_max_qbuffer_size = 512;
3410         c->http2_max_streams = http_max_streams;
3411         if(!(c->http_endpoint = strdup(http_endpoint))) {
3412                 log_err("could not strdup http_endpoint");
3413                 sldns_buffer_free(c->buffer);
3414                 free(c->timeout);
3415                 free(c->ev);
3416                 free(c);
3417                 return NULL;
3418         }
3419         c->use_h2 = 0;
3420 #ifdef HAVE_NGHTTP2
3421         if(!(c->h2_session = http2_session_create(c))) {
3422                 log_err("could not create http2 session");
3423                 free(c->http_endpoint);
3424                 sldns_buffer_free(c->buffer);
3425                 free(c->timeout);
3426                 free(c->ev);
3427                 free(c);
3428                 return NULL;
3429         }
3430         if(!(c->h2_session->callbacks = http2_req_callbacks_create())) {
3431                 log_err("could not create http2 callbacks");
3432                 http2_session_delete(c->h2_session);
3433                 free(c->http_endpoint);
3434                 sldns_buffer_free(c->buffer);
3435                 free(c->timeout);
3436                 free(c->ev);
3437                 free(c);
3438                 return NULL;
3439         }
3440 #endif
3441         
3442         /* add to parent free list */
3443         c->tcp_free = parent->tcp_free;
3444         parent->tcp_free = c;
3445         /* ub_event stuff */
3446         evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
3447         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3448                 comm_point_http_handle_callback, c);
3449         if(c->ev->ev == NULL)
3450         {
3451                 log_err("could not set http handler event");
3452                 parent->tcp_free = c->tcp_free;
3453                 http2_session_delete(c->h2_session);
3454                 sldns_buffer_free(c->buffer);
3455                 free(c->timeout);
3456                 free(c->ev);
3457                 free(c);
3458                 return NULL;
3459         }
3460         return c;
3461 }
3462
3463 struct comm_point* 
3464 comm_point_create_tcp(struct comm_base *base, int fd, int num,
3465         int idle_timeout, int harden_large_queries,
3466         uint32_t http_max_streams, char* http_endpoint,
3467         struct tcl_list* tcp_conn_limit, size_t bufsize,
3468         struct sldns_buffer* spoolbuf, enum listen_type port_type,
3469         comm_point_callback_type* callback, void* callback_arg)
3470 {
3471         struct comm_point* c = (struct comm_point*)calloc(1,
3472                 sizeof(struct comm_point));
3473         short evbits;
3474         int i;
3475         /* first allocate the TCP accept listener */
3476         if(!c)
3477                 return NULL;
3478         c->ev = (struct internal_event*)calloc(1,
3479                 sizeof(struct internal_event));
3480         if(!c->ev) {
3481                 free(c);
3482                 return NULL;
3483         }
3484         c->ev->base = base;
3485         c->fd = fd;
3486         c->buffer = NULL;
3487         c->timeout = NULL;
3488         c->tcp_is_reading = 0;
3489         c->tcp_byte_count = 0;
3490         c->tcp_timeout_msec = idle_timeout;
3491         c->tcp_conn_limit = tcp_conn_limit;
3492         c->tcl_addr = NULL;
3493         c->tcp_keepalive = 0;
3494         c->tcp_parent = NULL;
3495         c->max_tcp_count = num;
3496         c->cur_tcp_count = 0;
3497         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
3498                 sizeof(struct comm_point*));
3499         if(!c->tcp_handlers) {
3500                 free(c->ev);
3501                 free(c);
3502                 return NULL;
3503         }
3504         c->tcp_free = NULL;
3505         c->type = comm_tcp_accept;
3506         c->tcp_do_close = 0;
3507         c->do_not_close = 0;
3508         c->tcp_do_toggle_rw = 0;
3509         c->tcp_check_nb_connect = 0;
3510 #ifdef USE_MSG_FASTOPEN
3511         c->tcp_do_fastopen = 0;
3512 #endif
3513 #ifdef USE_DNSCRYPT
3514         c->dnscrypt = 0;
3515         c->dnscrypt_buffer = NULL;
3516 #endif
3517         c->callback = NULL;
3518         c->cb_arg = NULL;
3519         evbits = UB_EV_READ | UB_EV_PERSIST;
3520         /* ub_event stuff */
3521         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3522                 comm_point_tcp_accept_callback, c);
3523         if(c->ev->ev == NULL) {
3524                 log_err("could not baseset tcpacc event");
3525                 comm_point_delete(c);
3526                 return NULL;
3527         }
3528         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
3529                 log_err("could not add tcpacc event");
3530                 comm_point_delete(c);
3531                 return NULL;
3532         }
3533         /* now prealloc the handlers */
3534         for(i=0; i<num; i++) {
3535                 if(port_type == listen_type_tcp ||
3536                         port_type == listen_type_ssl ||
3537                         port_type == listen_type_tcp_dnscrypt) {
3538                         c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
3539                                 c, bufsize, spoolbuf, callback, callback_arg);
3540                 } else if(port_type == listen_type_http) {
3541                         c->tcp_handlers[i] = comm_point_create_http_handler(
3542                                 base, c, bufsize, harden_large_queries,
3543                                 http_max_streams, http_endpoint,
3544                                 callback, callback_arg);
3545                 }
3546                 else {
3547                         log_err("could not create tcp handler, unknown listen "
3548                                 "type");
3549                         return NULL;
3550                 }
3551                 if(!c->tcp_handlers[i]) {
3552                         comm_point_delete(c);
3553                         return NULL;
3554                 }
3555         }
3556         
3557         return c;
3558 }
3559
3560 struct comm_point* 
3561 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
3562         comm_point_callback_type* callback, void* callback_arg)
3563 {
3564         struct comm_point* c = (struct comm_point*)calloc(1,
3565                 sizeof(struct comm_point));
3566         short evbits;
3567         if(!c)
3568                 return NULL;
3569         c->ev = (struct internal_event*)calloc(1,
3570                 sizeof(struct internal_event));
3571         if(!c->ev) {
3572                 free(c);
3573                 return NULL;
3574         }
3575         c->ev->base = base;
3576         c->fd = -1;
3577         c->buffer = sldns_buffer_new(bufsize);
3578         if(!c->buffer) {
3579                 free(c->ev);
3580                 free(c);
3581                 return NULL;
3582         }
3583         c->timeout = NULL;
3584         c->tcp_is_reading = 0;
3585         c->tcp_byte_count = 0;
3586         c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
3587         c->tcp_conn_limit = NULL;
3588         c->tcl_addr = NULL;
3589         c->tcp_keepalive = 0;
3590         c->tcp_parent = NULL;
3591         c->max_tcp_count = 0;
3592         c->cur_tcp_count = 0;
3593         c->tcp_handlers = NULL;
3594         c->tcp_free = NULL;
3595         c->type = comm_tcp;
3596         c->tcp_do_close = 0;
3597         c->do_not_close = 0;
3598         c->tcp_do_toggle_rw = 1;
3599         c->tcp_check_nb_connect = 1;
3600 #ifdef USE_MSG_FASTOPEN
3601         c->tcp_do_fastopen = 1;
3602 #endif
3603 #ifdef USE_DNSCRYPT
3604         c->dnscrypt = 0;
3605         c->dnscrypt_buffer = c->buffer;
3606 #endif
3607         c->repinfo.c = c;
3608         c->callback = callback;
3609         c->cb_arg = callback_arg;
3610         evbits = UB_EV_PERSIST | UB_EV_WRITE;
3611         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3612                 comm_point_tcp_handle_callback, c);
3613         if(c->ev->ev == NULL)
3614         {
3615                 log_err("could not baseset tcpout event");
3616                 sldns_buffer_free(c->buffer);
3617                 free(c->ev);
3618                 free(c);
3619                 return NULL;
3620         }
3621
3622         return c;
3623 }
3624
3625 struct comm_point* 
3626 comm_point_create_http_out(struct comm_base *base, size_t bufsize,
3627         comm_point_callback_type* callback, void* callback_arg,
3628         sldns_buffer* temp)
3629 {
3630         struct comm_point* c = (struct comm_point*)calloc(1,
3631                 sizeof(struct comm_point));
3632         short evbits;
3633         if(!c)
3634                 return NULL;
3635         c->ev = (struct internal_event*)calloc(1,
3636                 sizeof(struct internal_event));
3637         if(!c->ev) {
3638                 free(c);
3639                 return NULL;
3640         }
3641         c->ev->base = base;
3642         c->fd = -1;
3643         c->buffer = sldns_buffer_new(bufsize);
3644         if(!c->buffer) {
3645                 free(c->ev);
3646                 free(c);
3647                 return NULL;
3648         }
3649         c->timeout = NULL;
3650         c->tcp_is_reading = 0;
3651         c->tcp_byte_count = 0;
3652         c->tcp_parent = NULL;
3653         c->max_tcp_count = 0;
3654         c->cur_tcp_count = 0;
3655         c->tcp_handlers = NULL;
3656         c->tcp_free = NULL;
3657         c->type = comm_http;
3658         c->tcp_do_close = 0;
3659         c->do_not_close = 0;
3660         c->tcp_do_toggle_rw = 1;
3661         c->tcp_check_nb_connect = 1;
3662         c->http_in_headers = 1;
3663         c->http_in_chunk_headers = 0;
3664         c->http_is_chunked = 0;
3665         c->http_temp = temp;
3666 #ifdef USE_MSG_FASTOPEN
3667         c->tcp_do_fastopen = 1;
3668 #endif
3669 #ifdef USE_DNSCRYPT
3670         c->dnscrypt = 0;
3671         c->dnscrypt_buffer = c->buffer;
3672 #endif
3673         c->repinfo.c = c;
3674         c->callback = callback;
3675         c->cb_arg = callback_arg;
3676         evbits = UB_EV_PERSIST | UB_EV_WRITE;
3677         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3678                 comm_point_http_handle_callback, c);
3679         if(c->ev->ev == NULL)
3680         {
3681                 log_err("could not baseset tcpout event");
3682 #ifdef HAVE_SSL
3683                 SSL_free(c->ssl);
3684 #endif
3685                 sldns_buffer_free(c->buffer);
3686                 free(c->ev);
3687                 free(c);
3688                 return NULL;
3689         }
3690
3691         return c;
3692 }
3693
3694 struct comm_point* 
3695 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
3696         comm_point_callback_type* callback, void* callback_arg)
3697 {
3698         struct comm_point* c = (struct comm_point*)calloc(1,
3699                 sizeof(struct comm_point));
3700         short evbits;
3701         if(!c)
3702                 return NULL;
3703         c->ev = (struct internal_event*)calloc(1,
3704                 sizeof(struct internal_event));
3705         if(!c->ev) {
3706                 free(c);
3707                 return NULL;
3708         }
3709         c->ev->base = base;
3710         c->fd = fd;
3711         c->buffer = sldns_buffer_new(bufsize);
3712         if(!c->buffer) {
3713                 free(c->ev);
3714                 free(c);
3715                 return NULL;
3716         }
3717         c->timeout = NULL;
3718         c->tcp_is_reading = 1;
3719         c->tcp_byte_count = 0;
3720         c->tcp_parent = NULL;
3721         c->max_tcp_count = 0;
3722         c->cur_tcp_count = 0;
3723         c->tcp_handlers = NULL;
3724         c->tcp_free = NULL;
3725         c->type = comm_local;
3726         c->tcp_do_close = 0;
3727         c->do_not_close = 1;
3728         c->tcp_do_toggle_rw = 0;
3729         c->tcp_check_nb_connect = 0;
3730 #ifdef USE_MSG_FASTOPEN
3731         c->tcp_do_fastopen = 0;
3732 #endif
3733 #ifdef USE_DNSCRYPT
3734         c->dnscrypt = 0;
3735         c->dnscrypt_buffer = c->buffer;
3736 #endif
3737         c->callback = callback;
3738         c->cb_arg = callback_arg;
3739         /* ub_event stuff */
3740         evbits = UB_EV_PERSIST | UB_EV_READ;
3741         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3742                 comm_point_local_handle_callback, c);
3743         if(c->ev->ev == NULL) {
3744                 log_err("could not baseset localhdl event");
3745                 free(c->ev);
3746                 free(c);
3747                 return NULL;
3748         }
3749         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
3750                 log_err("could not add localhdl event");
3751                 ub_event_free(c->ev->ev);
3752                 free(c->ev);
3753                 free(c);
3754                 return NULL;
3755         }
3756         return c;
3757 }
3758
3759 struct comm_point* 
3760 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
3761         comm_point_callback_type* callback, void* callback_arg)
3762 {
3763         struct comm_point* c = (struct comm_point*)calloc(1,
3764                 sizeof(struct comm_point));
3765         short evbits;
3766         if(!c)
3767                 return NULL;
3768         c->ev = (struct internal_event*)calloc(1,
3769                 sizeof(struct internal_event));
3770         if(!c->ev) {
3771                 free(c);
3772                 return NULL;
3773         }
3774         c->ev->base = base;
3775         c->fd = fd;
3776         c->buffer = NULL;
3777         c->timeout = NULL;
3778         c->tcp_is_reading = 0;
3779         c->tcp_byte_count = 0;
3780         c->tcp_parent = NULL;
3781         c->max_tcp_count = 0;
3782         c->cur_tcp_count = 0;
3783         c->tcp_handlers = NULL;
3784         c->tcp_free = NULL;
3785         c->type = comm_raw;
3786         c->tcp_do_close = 0;
3787         c->do_not_close = 1;
3788         c->tcp_do_toggle_rw = 0;
3789         c->tcp_check_nb_connect = 0;
3790 #ifdef USE_MSG_FASTOPEN
3791         c->tcp_do_fastopen = 0;
3792 #endif
3793 #ifdef USE_DNSCRYPT
3794         c->dnscrypt = 0;
3795         c->dnscrypt_buffer = c->buffer;
3796 #endif
3797         c->callback = callback;
3798         c->cb_arg = callback_arg;
3799         /* ub_event stuff */
3800         if(writing)
3801                 evbits = UB_EV_PERSIST | UB_EV_WRITE;
3802         else    evbits = UB_EV_PERSIST | UB_EV_READ;
3803         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3804                 comm_point_raw_handle_callback, c);
3805         if(c->ev->ev == NULL) {
3806                 log_err("could not baseset rawhdl event");
3807                 free(c->ev);
3808                 free(c);
3809                 return NULL;
3810         }
3811         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
3812                 log_err("could not add rawhdl event");
3813                 ub_event_free(c->ev->ev);
3814                 free(c->ev);
3815                 free(c);
3816                 return NULL;
3817         }
3818         return c;
3819 }
3820
3821 void 
3822 comm_point_close(struct comm_point* c)
3823 {
3824         if(!c)
3825                 return;
3826         if(c->fd != -1) {
3827                 verbose(5, "comm_point_close of %d: event_del", c->fd);
3828                 if(ub_event_del(c->ev->ev) != 0) {
3829                         log_err("could not event_del on close");
3830                 }
3831         }
3832         tcl_close_connection(c->tcl_addr);
3833         if(c->tcp_req_info)
3834                 tcp_req_info_clear(c->tcp_req_info);
3835         if(c->h2_session)
3836                 http2_session_server_delete(c->h2_session);
3837
3838         /* close fd after removing from event lists, or epoll.. is messed up */
3839         if(c->fd != -1 && !c->do_not_close) {
3840                 if(c->type == comm_tcp || c->type == comm_http) {
3841                         /* delete sticky events for the fd, it gets closed */
3842                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3843                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3844                 }
3845                 verbose(VERB_ALGO, "close fd %d", c->fd);
3846                 sock_close(c->fd);
3847         }
3848         c->fd = -1;
3849 }
3850
3851 void 
3852 comm_point_delete(struct comm_point* c)
3853 {
3854         if(!c) 
3855                 return;
3856         if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
3857 #ifdef HAVE_SSL
3858                 SSL_shutdown(c->ssl);
3859                 SSL_free(c->ssl);
3860 #endif
3861         }
3862         if(c->type == comm_http && c->http_endpoint) {
3863                 free(c->http_endpoint);
3864                 c->http_endpoint = NULL;
3865         }
3866         comm_point_close(c);
3867         if(c->tcp_handlers) {
3868                 int i;
3869                 for(i=0; i<c->max_tcp_count; i++)
3870                         comm_point_delete(c->tcp_handlers[i]);
3871                 free(c->tcp_handlers);
3872         }
3873         free(c->timeout);
3874         if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
3875                 sldns_buffer_free(c->buffer);
3876 #ifdef USE_DNSCRYPT
3877                 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
3878                         sldns_buffer_free(c->dnscrypt_buffer);
3879                 }
3880 #endif
3881                 if(c->tcp_req_info) {
3882                         tcp_req_info_delete(c->tcp_req_info);
3883                 }
3884                 if(c->h2_session) {
3885                         http2_session_delete(c->h2_session);
3886                 }
3887         }
3888         ub_event_free(c->ev->ev);
3889         free(c->ev);
3890         free(c);
3891 }
3892
3893 void 
3894 comm_point_send_reply(struct comm_reply *repinfo)
3895 {
3896         struct sldns_buffer* buffer;
3897         log_assert(repinfo && repinfo->c);
3898 #ifdef USE_DNSCRYPT
3899         buffer = repinfo->c->dnscrypt_buffer;
3900         if(!dnsc_handle_uncurved_request(repinfo)) {
3901                 return;
3902         }
3903 #else
3904         buffer = repinfo->c->buffer;
3905 #endif
3906         if(repinfo->c->type == comm_udp) {
3907                 if(repinfo->srctype)
3908                         comm_point_send_udp_msg_if(repinfo->c, 
3909                         buffer, (struct sockaddr*)&repinfo->addr, 
3910                         repinfo->addrlen, repinfo);
3911                 else
3912                         comm_point_send_udp_msg(repinfo->c, buffer,
3913                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen, 0);
3914 #ifdef USE_DNSTAP
3915                 if(repinfo->c->dtenv != NULL &&
3916                    repinfo->c->dtenv->log_client_response_messages)
3917                         dt_msg_send_client_response(repinfo->c->dtenv,
3918                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
3919 #endif
3920         } else {
3921 #ifdef USE_DNSTAP
3922                 if(repinfo->c->tcp_parent->dtenv != NULL &&
3923                    repinfo->c->tcp_parent->dtenv->log_client_response_messages)
3924                         dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
3925                         &repinfo->addr, repinfo->c->type,
3926                         ( repinfo->c->tcp_req_info
3927                         ? repinfo->c->tcp_req_info->spool_buffer
3928                         : repinfo->c->buffer ));
3929 #endif
3930                 if(repinfo->c->tcp_req_info) {
3931                         tcp_req_info_send_reply(repinfo->c->tcp_req_info);
3932                 } else if(repinfo->c->use_h2) {
3933                         if(!http2_submit_dns_response(repinfo->c->h2_session)) {
3934                                 comm_point_drop_reply(repinfo);
3935                                 return;
3936                         }
3937                         repinfo->c->h2_stream = NULL;
3938                         repinfo->c->tcp_is_reading = 0;
3939                         comm_point_stop_listening(repinfo->c);
3940                         comm_point_start_listening(repinfo->c, -1,
3941                                 repinfo->c->tcp_timeout_msec);
3942                         return;
3943                 } else {
3944                         comm_point_start_listening(repinfo->c, -1,
3945                                 repinfo->c->tcp_timeout_msec);
3946                 }
3947         }
3948 }
3949
3950 void 
3951 comm_point_drop_reply(struct comm_reply* repinfo)
3952 {
3953         if(!repinfo)
3954                 return;
3955         log_assert(repinfo->c);
3956         log_assert(repinfo->c->type != comm_tcp_accept);
3957         if(repinfo->c->type == comm_udp)
3958                 return;
3959         if(repinfo->c->tcp_req_info)
3960                 repinfo->c->tcp_req_info->is_drop = 1;
3961         if(repinfo->c->type == comm_http) {
3962                 if(repinfo->c->h2_session) {
3963                         repinfo->c->h2_session->is_drop = 1;
3964                         if(!repinfo->c->h2_session->postpone_drop)
3965                                 reclaim_http_handler(repinfo->c);
3966                         return;
3967                 }
3968                 reclaim_http_handler(repinfo->c);
3969                 return;
3970         }
3971         reclaim_tcp_handler(repinfo->c);
3972 }
3973
3974 void 
3975 comm_point_stop_listening(struct comm_point* c)
3976 {
3977         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
3978         if(ub_event_del(c->ev->ev) != 0) {
3979                 log_err("event_del error to stoplisten");
3980         }
3981 }
3982
3983 void 
3984 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
3985 {
3986         verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 
3987                 c->fd==-1?newfd:c->fd, msec);
3988         if(c->type == comm_tcp_accept && !c->tcp_free) {
3989                 /* no use to start listening no free slots. */
3990                 return;
3991         }
3992         if(msec != -1 && msec != 0) {
3993                 if(!c->timeout) {
3994                         c->timeout = (struct timeval*)malloc(sizeof(
3995                                 struct timeval));
3996                         if(!c->timeout) {
3997                                 log_err("cpsl: malloc failed. No net read.");
3998                                 return;
3999                         }
4000                 }
4001                 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
4002 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
4003                 c->timeout->tv_sec = msec/1000;
4004                 c->timeout->tv_usec = (msec%1000)*1000;
4005 #endif /* S_SPLINT_S */
4006         }
4007         if(c->type == comm_tcp || c->type == comm_http) {
4008                 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
4009                 if(c->tcp_write_and_read) {
4010                         verbose(5, "startlistening %d mode rw", (newfd==-1?c->fd:newfd));
4011                         ub_event_add_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
4012                 } else if(c->tcp_is_reading) {
4013                         verbose(5, "startlistening %d mode r", (newfd==-1?c->fd:newfd));
4014                         ub_event_add_bits(c->ev->ev, UB_EV_READ);
4015                 } else  {
4016                         verbose(5, "startlistening %d mode w", (newfd==-1?c->fd:newfd));
4017                         ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
4018                 }
4019         }
4020         if(newfd != -1) {
4021                 if(c->fd != -1 && c->fd != newfd) {
4022                         verbose(5, "cpsl close of fd %d for %d", c->fd, newfd);
4023                         sock_close(c->fd);
4024                 }
4025                 c->fd = newfd;
4026                 ub_event_set_fd(c->ev->ev, c->fd);
4027         }
4028         if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
4029                 log_err("event_add failed. in cpsl.");
4030         }
4031 }
4032
4033 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
4034 {
4035         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
4036         if(ub_event_del(c->ev->ev) != 0) {
4037                 log_err("event_del error to cplf");
4038         }
4039         ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
4040         if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
4041         if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
4042         if(ub_event_add(c->ev->ev, c->timeout) != 0) {
4043                 log_err("event_add failed. in cplf.");
4044         }
4045 }
4046
4047 size_t comm_point_get_mem(struct comm_point* c)
4048 {
4049         size_t s;
4050         if(!c) 
4051                 return 0;
4052         s = sizeof(*c) + sizeof(*c->ev);
4053         if(c->timeout) 
4054                 s += sizeof(*c->timeout);
4055         if(c->type == comm_tcp || c->type == comm_local) {
4056                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
4057 #ifdef USE_DNSCRYPT
4058                 s += sizeof(*c->dnscrypt_buffer);
4059                 if(c->buffer != c->dnscrypt_buffer) {
4060                         s += sldns_buffer_capacity(c->dnscrypt_buffer);
4061                 }
4062 #endif
4063         }
4064         if(c->type == comm_tcp_accept) {
4065                 int i;
4066                 for(i=0; i<c->max_tcp_count; i++)
4067                         s += comm_point_get_mem(c->tcp_handlers[i]);
4068         }
4069         return s;
4070 }
4071
4072 struct comm_timer* 
4073 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
4074 {
4075         struct internal_timer *tm = (struct internal_timer*)calloc(1,
4076                 sizeof(struct internal_timer));
4077         if(!tm) {
4078                 log_err("malloc failed");
4079                 return NULL;
4080         }
4081         tm->super.ev_timer = tm;
4082         tm->base = base;
4083         tm->super.callback = cb;
4084         tm->super.cb_arg = cb_arg;
4085         tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 
4086                 comm_timer_callback, &tm->super);
4087         if(tm->ev == NULL) {
4088                 log_err("timer_create: event_base_set failed.");
4089                 free(tm);
4090                 return NULL;
4091         }
4092         return &tm->super;
4093 }
4094
4095 void 
4096 comm_timer_disable(struct comm_timer* timer)
4097 {
4098         if(!timer)
4099                 return;
4100         ub_timer_del(timer->ev_timer->ev);
4101         timer->ev_timer->enabled = 0;
4102 }
4103
4104 void 
4105 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
4106 {
4107         log_assert(tv);
4108         if(timer->ev_timer->enabled)
4109                 comm_timer_disable(timer);
4110         if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
4111                 comm_timer_callback, timer, tv) != 0)
4112                 log_err("comm_timer_set: evtimer_add failed.");
4113         timer->ev_timer->enabled = 1;
4114 }
4115
4116 void 
4117 comm_timer_delete(struct comm_timer* timer)
4118 {
4119         if(!timer)
4120                 return;
4121         comm_timer_disable(timer);
4122         /* Free the sub struct timer->ev_timer derived from the super struct timer.
4123          * i.e. assert(timer == timer->ev_timer)
4124          */
4125         ub_event_free(timer->ev_timer->ev);
4126         free(timer->ev_timer);
4127 }
4128
4129 void 
4130 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
4131 {
4132         struct comm_timer* tm = (struct comm_timer*)arg;
4133         if(!(event&UB_EV_TIMEOUT))
4134                 return;
4135         ub_comm_base_now(tm->ev_timer->base);
4136         tm->ev_timer->enabled = 0;
4137         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
4138         (*tm->callback)(tm->cb_arg);
4139 }
4140
4141 int 
4142 comm_timer_is_set(struct comm_timer* timer)
4143 {
4144         return (int)timer->ev_timer->enabled;
4145 }
4146
4147 size_t 
4148 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer))
4149 {
4150         return sizeof(struct internal_timer);
4151 }
4152
4153 struct comm_signal* 
4154 comm_signal_create(struct comm_base* base,
4155         void (*callback)(int, void*), void* cb_arg)
4156 {
4157         struct comm_signal* com = (struct comm_signal*)malloc(
4158                 sizeof(struct comm_signal));
4159         if(!com) {
4160                 log_err("malloc failed");
4161                 return NULL;
4162         }
4163         com->base = base;
4164         com->callback = callback;
4165         com->cb_arg = cb_arg;
4166         com->ev_signal = NULL;
4167         return com;
4168 }
4169
4170 void 
4171 comm_signal_callback(int sig, short event, void* arg)
4172 {
4173         struct comm_signal* comsig = (struct comm_signal*)arg;
4174         if(!(event & UB_EV_SIGNAL))
4175                 return;
4176         ub_comm_base_now(comsig->base);
4177         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
4178         (*comsig->callback)(sig, comsig->cb_arg);
4179 }
4180
4181 int 
4182 comm_signal_bind(struct comm_signal* comsig, int sig)
4183 {
4184         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
4185                 sizeof(struct internal_signal));
4186         if(!entry) {
4187                 log_err("malloc failed");
4188                 return 0;
4189         }
4190         log_assert(comsig);
4191         /* add signal event */
4192         entry->ev = ub_signal_new(comsig->base->eb->base, sig,
4193                 comm_signal_callback, comsig);
4194         if(entry->ev == NULL) {
4195                 log_err("Could not create signal event");
4196                 free(entry);
4197                 return 0;
4198         }
4199         if(ub_signal_add(entry->ev, NULL) != 0) {
4200                 log_err("Could not add signal handler");
4201                 ub_event_free(entry->ev);
4202                 free(entry);
4203                 return 0;
4204         }
4205         /* link into list */
4206         entry->next = comsig->ev_signal;
4207         comsig->ev_signal = entry;
4208         return 1;
4209 }
4210
4211 void 
4212 comm_signal_delete(struct comm_signal* comsig)
4213 {
4214         struct internal_signal* p, *np;
4215         if(!comsig)
4216                 return;
4217         p=comsig->ev_signal;
4218         while(p) {
4219                 np = p->next;
4220                 ub_signal_del(p->ev);
4221                 ub_event_free(p->ev);
4222                 free(p);
4223                 p = np;
4224         }
4225         free(comsig);
4226 }