]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - util/netevent.c
Vendor import of Unbound 1.8.0.
[FreeBSD/FreeBSD.git] / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/ub_event.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/tcp_conn_limit.h"
47 #include "util/fptr_wlist.h"
48 #include "sldns/pkthdr.h"
49 #include "sldns/sbuffer.h"
50 #include "sldns/str2wire.h"
51 #include "dnstap/dnstap.h"
52 #include "dnscrypt/dnscrypt.h"
53 #ifdef HAVE_OPENSSL_SSL_H
54 #include <openssl/ssl.h>
55 #endif
56 #ifdef HAVE_OPENSSL_ERR_H
57 #include <openssl/err.h>
58 #endif
59
60 /* -------- Start of local definitions -------- */
61 /** if CMSG_ALIGN is not defined on this platform, a workaround */
62 #ifndef CMSG_ALIGN
63 #  ifdef __CMSG_ALIGN
64 #    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
65 #  elif defined(CMSG_DATA_ALIGN)
66 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
67 #  else
68 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
69 #  endif
70 #endif
71
72 /** if CMSG_LEN is not defined on this platform, a workaround */
73 #ifndef CMSG_LEN
74 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
75 #endif
76
77 /** if CMSG_SPACE is not defined on this platform, a workaround */
78 #ifndef CMSG_SPACE
79 #  ifdef _CMSG_HDR_ALIGN
80 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
81 #  else
82 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
83 #  endif
84 #endif
85
86 /** The TCP writing query timeout in milliseconds */
87 #define TCP_QUERY_TIMEOUT 120000
88 /** The minimum actual TCP timeout to use, regardless of what we advertise,
89  * in msec */
90 #define TCP_QUERY_TIMEOUT_MINIMUM 200
91
92 #ifndef NONBLOCKING_IS_BROKEN
93 /** number of UDP reads to perform per read indication from select */
94 #define NUM_UDP_PER_SELECT 100
95 #else
96 #define NUM_UDP_PER_SELECT 1
97 #endif
98
99 /**
100  * The internal event structure for keeping ub_event info for the event.
101  * Possibly other structures (list, tree) this is part of.
102  */
103 struct internal_event {
104         /** the comm base */
105         struct comm_base* base;
106         /** ub_event event type */
107         struct ub_event* ev;
108 };
109
110 /**
111  * Internal base structure, so that every thread has its own events.
112  */
113 struct internal_base {
114         /** ub_event event_base type. */
115         struct ub_event_base* base;
116         /** seconds time pointer points here */
117         time_t secs;
118         /** timeval with current time */
119         struct timeval now;
120         /** the event used for slow_accept timeouts */
121         struct ub_event* slow_accept;
122         /** true if slow_accept is enabled */
123         int slow_accept_enabled;
124 };
125
126 /**
127  * Internal timer structure, to store timer event in.
128  */
129 struct internal_timer {
130         /** the super struct from which derived */
131         struct comm_timer super;
132         /** the comm base */
133         struct comm_base* base;
134         /** ub_event event type */
135         struct ub_event* ev;
136         /** is timer enabled */
137         uint8_t enabled;
138 };
139
140 /**
141  * Internal signal structure, to store signal event in.
142  */
143 struct internal_signal {
144         /** ub_event event type */
145         struct ub_event* ev;
146         /** next in signal list */
147         struct internal_signal* next;
148 };
149
150 /** create a tcp handler with a parent */
151 static struct comm_point* comm_point_create_tcp_handler(
152         struct comm_base *base, struct comm_point* parent, size_t bufsize,
153         comm_point_callback_type* callback, void* callback_arg);
154
155 /* -------- End of local definitions -------- */
156
157 struct comm_base* 
158 comm_base_create(int sigs)
159 {
160         struct comm_base* b = (struct comm_base*)calloc(1,
161                 sizeof(struct comm_base));
162         const char *evnm="event", *evsys="", *evmethod="";
163
164         if(!b)
165                 return NULL;
166         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
167         if(!b->eb) {
168                 free(b);
169                 return NULL;
170         }
171         b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
172         if(!b->eb->base) {
173                 free(b->eb);
174                 free(b);
175                 return NULL;
176         }
177         ub_comm_base_now(b);
178         ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
179         verbose(VERB_ALGO, "%s %s user %s method.", evnm, evsys, evmethod);
180         return b;
181 }
182
183 struct comm_base*
184 comm_base_create_event(struct ub_event_base* base)
185 {
186         struct comm_base* b = (struct comm_base*)calloc(1,
187                 sizeof(struct comm_base));
188         if(!b)
189                 return NULL;
190         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
191         if(!b->eb) {
192                 free(b);
193                 return NULL;
194         }
195         b->eb->base = base;
196         ub_comm_base_now(b);
197         return b;
198 }
199
200 void 
201 comm_base_delete(struct comm_base* b)
202 {
203         if(!b)
204                 return;
205         if(b->eb->slow_accept_enabled) {
206                 if(ub_event_del(b->eb->slow_accept) != 0) {
207                         log_err("could not event_del slow_accept");
208                 }
209                 ub_event_free(b->eb->slow_accept);
210         }
211         ub_event_base_free(b->eb->base);
212         b->eb->base = NULL;
213         free(b->eb);
214         free(b);
215 }
216
217 void 
218 comm_base_delete_no_base(struct comm_base* b)
219 {
220         if(!b)
221                 return;
222         if(b->eb->slow_accept_enabled) {
223                 if(ub_event_del(b->eb->slow_accept) != 0) {
224                         log_err("could not event_del slow_accept");
225                 }
226                 ub_event_free(b->eb->slow_accept);
227         }
228         b->eb->base = NULL;
229         free(b->eb);
230         free(b);
231 }
232
233 void 
234 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
235 {
236         *tt = &b->eb->secs;
237         *tv = &b->eb->now;
238 }
239
240 void 
241 comm_base_dispatch(struct comm_base* b)
242 {
243         int retval;
244         retval = ub_event_base_dispatch(b->eb->base);
245         if(retval < 0) {
246                 fatal_exit("event_dispatch returned error %d, "
247                         "errno is %s", retval, strerror(errno));
248         }
249 }
250
251 void comm_base_exit(struct comm_base* b)
252 {
253         if(ub_event_base_loopexit(b->eb->base) != 0) {
254                 log_err("Could not loopexit");
255         }
256 }
257
258 void comm_base_set_slow_accept_handlers(struct comm_base* b,
259         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
260 {
261         b->stop_accept = stop_acc;
262         b->start_accept = start_acc;
263         b->cb_arg = arg;
264 }
265
266 struct ub_event_base* comm_base_internal(struct comm_base* b)
267 {
268         return b->eb->base;
269 }
270
271 /** see if errno for udp has to be logged or not uses globals */
272 static int
273 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
274 {
275         /* do not log transient errors (unless high verbosity) */
276 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
277         switch(errno) {
278 #  ifdef ENETUNREACH
279                 case ENETUNREACH:
280 #  endif
281 #  ifdef EHOSTDOWN
282                 case EHOSTDOWN:
283 #  endif
284 #  ifdef EHOSTUNREACH
285                 case EHOSTUNREACH:
286 #  endif
287 #  ifdef ENETDOWN
288                 case ENETDOWN:
289 #  endif
290                         if(verbosity < VERB_ALGO)
291                                 return 0;
292                 default:
293                         break;
294         }
295 #endif
296         /* permission denied is gotten for every send if the
297          * network is disconnected (on some OS), squelch it */
298         if( ((errno == EPERM)
299 #  ifdef EADDRNOTAVAIL
300                 /* 'Cannot assign requested address' also when disconnected */
301                 || (errno == EADDRNOTAVAIL)
302 #  endif
303                 ) && verbosity < VERB_DETAIL)
304                 return 0;
305 #  ifdef EADDRINUSE
306         /* If SO_REUSEADDR is set, we could try to connect to the same server
307          * from the same source port twice. */
308         if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
309                 return 0;
310 #  endif
311         /* squelch errors where people deploy AAAA ::ffff:bla for
312          * authority servers, which we try for intranets. */
313         if(errno == EINVAL && addr_is_ip4mapped(
314                 (struct sockaddr_storage*)addr, addrlen) &&
315                 verbosity < VERB_DETAIL)
316                 return 0;
317         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
318          * but a dns cache does not need it. */
319         if(errno == EACCES && addr_is_broadcast(
320                 (struct sockaddr_storage*)addr, addrlen) &&
321                 verbosity < VERB_DETAIL)
322                 return 0;
323         return 1;
324 }
325
326 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
327 {
328         return udp_send_errno_needs_log(addr, addrlen);
329 }
330
331 /* send a UDP reply */
332 int
333 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
334         struct sockaddr* addr, socklen_t addrlen) 
335 {
336         ssize_t sent;
337         log_assert(c->fd != -1);
338 #ifdef UNBOUND_DEBUG
339         if(sldns_buffer_remaining(packet) == 0)
340                 log_err("error: send empty UDP packet");
341 #endif
342         log_assert(addr && addrlen > 0);
343         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
344                 sldns_buffer_remaining(packet), 0,
345                 addr, addrlen);
346         if(sent == -1) {
347                 /* try again and block, waiting for IO to complete,
348                  * we want to send the answer, and we will wait for
349                  * the ethernet interface buffer to have space. */
350 #ifndef USE_WINSOCK
351                 if(errno == EAGAIN || 
352 #  ifdef EWOULDBLOCK
353                         errno == EWOULDBLOCK ||
354 #  endif
355                         errno == ENOBUFS) {
356 #else
357                 if(WSAGetLastError() == WSAEINPROGRESS ||
358                         WSAGetLastError() == WSAENOBUFS ||
359                         WSAGetLastError() == WSAEWOULDBLOCK) {
360 #endif
361                         int e;
362                         fd_set_block(c->fd);
363                         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
364                                 sldns_buffer_remaining(packet), 0,
365                                 addr, addrlen);
366                         e = errno;
367                         fd_set_nonblock(c->fd);
368                         errno = e;
369                 }
370         }
371         if(sent == -1) {
372                 if(!udp_send_errno_needs_log(addr, addrlen))
373                         return 0;
374 #ifndef USE_WINSOCK
375                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
376 #else
377                 verbose(VERB_OPS, "sendto failed: %s", 
378                         wsa_strerror(WSAGetLastError()));
379 #endif
380                 log_addr(VERB_OPS, "remote address is", 
381                         (struct sockaddr_storage*)addr, addrlen);
382                 return 0;
383         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
384                 log_err("sent %d in place of %d bytes", 
385                         (int)sent, (int)sldns_buffer_remaining(packet));
386                 return 0;
387         }
388         return 1;
389 }
390
391 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
392 /** print debug ancillary info */
393 static void p_ancil(const char* str, struct comm_reply* r)
394 {
395         if(r->srctype != 4 && r->srctype != 6) {
396                 log_info("%s: unknown srctype %d", str, r->srctype);
397                 return;
398         }
399         if(r->srctype == 6) {
400                 char buf[1024];
401                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
402                         buf, (socklen_t)sizeof(buf)) == 0) {
403                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
404                 }
405                 buf[sizeof(buf)-1]=0;
406                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
407         } else if(r->srctype == 4) {
408 #ifdef IP_PKTINFO
409                 char buf1[1024], buf2[1024];
410                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
411                         buf1, (socklen_t)sizeof(buf1)) == 0) {
412                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
413                 }
414                 buf1[sizeof(buf1)-1]=0;
415 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
416                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
417                         buf2, (socklen_t)sizeof(buf2)) == 0) {
418                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
419                 }
420                 buf2[sizeof(buf2)-1]=0;
421 #else
422                 buf2[0]=0;
423 #endif
424                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
425                         buf1, buf2);
426 #elif defined(IP_RECVDSTADDR)
427                 char buf1[1024];
428                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
429                         buf1, (socklen_t)sizeof(buf1)) == 0) {
430                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
431                 }
432                 buf1[sizeof(buf1)-1]=0;
433                 log_info("%s: %s", str, buf1);
434 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
435         }
436 }
437 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
438
439 /** send a UDP reply over specified interface*/
440 static int
441 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
442         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
443 {
444 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
445         ssize_t sent;
446         struct msghdr msg;
447         struct iovec iov[1];
448         char control[256];
449 #ifndef S_SPLINT_S
450         struct cmsghdr *cmsg;
451 #endif /* S_SPLINT_S */
452
453         log_assert(c->fd != -1);
454 #ifdef UNBOUND_DEBUG
455         if(sldns_buffer_remaining(packet) == 0)
456                 log_err("error: send empty UDP packet");
457 #endif
458         log_assert(addr && addrlen > 0);
459
460         msg.msg_name = addr;
461         msg.msg_namelen = addrlen;
462         iov[0].iov_base = sldns_buffer_begin(packet);
463         iov[0].iov_len = sldns_buffer_remaining(packet);
464         msg.msg_iov = iov;
465         msg.msg_iovlen = 1;
466         msg.msg_control = control;
467 #ifndef S_SPLINT_S
468         msg.msg_controllen = sizeof(control);
469 #endif /* S_SPLINT_S */
470         msg.msg_flags = 0;
471
472 #ifndef S_SPLINT_S
473         cmsg = CMSG_FIRSTHDR(&msg);
474         if(r->srctype == 4) {
475 #ifdef IP_PKTINFO
476                 void* cmsg_data;
477                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
478                 log_assert(msg.msg_controllen <= sizeof(control));
479                 cmsg->cmsg_level = IPPROTO_IP;
480                 cmsg->cmsg_type = IP_PKTINFO;
481                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
482                         sizeof(struct in_pktinfo));
483                 /* unset the ifindex to not bypass the routing tables */
484                 cmsg_data = CMSG_DATA(cmsg);
485                 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
486                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
487 #elif defined(IP_SENDSRCADDR)
488                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
489                 log_assert(msg.msg_controllen <= sizeof(control));
490                 cmsg->cmsg_level = IPPROTO_IP;
491                 cmsg->cmsg_type = IP_SENDSRCADDR;
492                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
493                         sizeof(struct in_addr));
494                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
495 #else
496                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
497                 msg.msg_control = NULL;
498 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
499         } else if(r->srctype == 6) {
500                 void* cmsg_data;
501                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
502                 log_assert(msg.msg_controllen <= sizeof(control));
503                 cmsg->cmsg_level = IPPROTO_IPV6;
504                 cmsg->cmsg_type = IPV6_PKTINFO;
505                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
506                         sizeof(struct in6_pktinfo));
507                 /* unset the ifindex to not bypass the routing tables */
508                 cmsg_data = CMSG_DATA(cmsg);
509                 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
510                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
511         } else {
512                 /* try to pass all 0 to use default route */
513                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
514                 log_assert(msg.msg_controllen <= sizeof(control));
515                 cmsg->cmsg_level = IPPROTO_IPV6;
516                 cmsg->cmsg_type = IPV6_PKTINFO;
517                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
518                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
519         }
520 #endif /* S_SPLINT_S */
521         if(verbosity >= VERB_ALGO)
522                 p_ancil("send_udp over interface", r);
523         sent = sendmsg(c->fd, &msg, 0);
524         if(sent == -1) {
525                 /* try again and block, waiting for IO to complete,
526                  * we want to send the answer, and we will wait for
527                  * the ethernet interface buffer to have space. */
528 #ifndef USE_WINSOCK
529                 if(errno == EAGAIN || 
530 #  ifdef EWOULDBLOCK
531                         errno == EWOULDBLOCK ||
532 #  endif
533                         errno == ENOBUFS) {
534 #else
535                 if(WSAGetLastError() == WSAEINPROGRESS ||
536                         WSAGetLastError() == WSAENOBUFS ||
537                         WSAGetLastError() == WSAEWOULDBLOCK) {
538 #endif
539                         int e;
540                         fd_set_block(c->fd);
541                         sent = sendmsg(c->fd, &msg, 0);
542                         e = errno;
543                         fd_set_nonblock(c->fd);
544                         errno = e;
545                 }
546         }
547         if(sent == -1) {
548                 if(!udp_send_errno_needs_log(addr, addrlen))
549                         return 0;
550                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
551                 log_addr(VERB_OPS, "remote address is", 
552                         (struct sockaddr_storage*)addr, addrlen);
553 #ifdef __NetBSD__
554                 /* netbsd 7 has IP_PKTINFO for recv but not send */
555                 if(errno == EINVAL && r->srctype == 4)
556                         log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
557                                 "Please disable interface-automatic");
558 #endif
559                 return 0;
560         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
561                 log_err("sent %d in place of %d bytes", 
562                         (int)sent, (int)sldns_buffer_remaining(packet));
563                 return 0;
564         }
565         return 1;
566 #else
567         (void)c;
568         (void)packet;
569         (void)addr;
570         (void)addrlen;
571         (void)r;
572         log_err("sendmsg: IPV6_PKTINFO not supported");
573         return 0;
574 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
575 }
576
577 void 
578 comm_point_udp_ancil_callback(int fd, short event, void* arg)
579 {
580 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
581         struct comm_reply rep;
582         struct msghdr msg;
583         struct iovec iov[1];
584         ssize_t rcv;
585         char ancil[256];
586         int i;
587 #ifndef S_SPLINT_S
588         struct cmsghdr* cmsg;
589 #endif /* S_SPLINT_S */
590
591         rep.c = (struct comm_point*)arg;
592         log_assert(rep.c->type == comm_udp);
593
594         if(!(event&UB_EV_READ))
595                 return;
596         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
597         ub_comm_base_now(rep.c->ev->base);
598         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
599                 sldns_buffer_clear(rep.c->buffer);
600                 rep.addrlen = (socklen_t)sizeof(rep.addr);
601                 log_assert(fd != -1);
602                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
603                 msg.msg_name = &rep.addr;
604                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
605                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
606                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
607                 msg.msg_iov = iov;
608                 msg.msg_iovlen = 1;
609                 msg.msg_control = ancil;
610 #ifndef S_SPLINT_S
611                 msg.msg_controllen = sizeof(ancil);
612 #endif /* S_SPLINT_S */
613                 msg.msg_flags = 0;
614                 rcv = recvmsg(fd, &msg, 0);
615                 if(rcv == -1) {
616                         if(errno != EAGAIN && errno != EINTR) {
617                                 log_err("recvmsg failed: %s", strerror(errno));
618                         }
619                         return;
620                 }
621                 rep.addrlen = msg.msg_namelen;
622                 sldns_buffer_skip(rep.c->buffer, rcv);
623                 sldns_buffer_flip(rep.c->buffer);
624                 rep.srctype = 0;
625 #ifndef S_SPLINT_S
626                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
627                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
628                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
629                                 cmsg->cmsg_type == IPV6_PKTINFO) {
630                                 rep.srctype = 6;
631                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
632                                         sizeof(struct in6_pktinfo));
633                                 break;
634 #ifdef IP_PKTINFO
635                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
636                                 cmsg->cmsg_type == IP_PKTINFO) {
637                                 rep.srctype = 4;
638                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
639                                         sizeof(struct in_pktinfo));
640                                 break;
641 #elif defined(IP_RECVDSTADDR)
642                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
643                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
644                                 rep.srctype = 4;
645                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
646                                         sizeof(struct in_addr));
647                                 break;
648 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
649                         }
650                 }
651                 if(verbosity >= VERB_ALGO)
652                         p_ancil("receive_udp on interface", &rep);
653 #endif /* S_SPLINT_S */
654                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
655                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
656                         /* send back immediate reply */
657                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
658                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
659                 }
660                 if(!rep.c || rep.c->fd == -1) /* commpoint closed */
661                         break;
662         }
663 #else
664         (void)fd;
665         (void)event;
666         (void)arg;
667         fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. "
668                 "Please disable interface-automatic");
669 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
670 }
671
672 void 
673 comm_point_udp_callback(int fd, short event, void* arg)
674 {
675         struct comm_reply rep;
676         ssize_t rcv;
677         int i;
678         struct sldns_buffer *buffer;
679
680         rep.c = (struct comm_point*)arg;
681         log_assert(rep.c->type == comm_udp);
682
683         if(!(event&UB_EV_READ))
684                 return;
685         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
686         ub_comm_base_now(rep.c->ev->base);
687         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
688                 sldns_buffer_clear(rep.c->buffer);
689                 rep.addrlen = (socklen_t)sizeof(rep.addr);
690                 log_assert(fd != -1);
691                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
692                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
693                         sldns_buffer_remaining(rep.c->buffer), 0, 
694                         (struct sockaddr*)&rep.addr, &rep.addrlen);
695                 if(rcv == -1) {
696 #ifndef USE_WINSOCK
697                         if(errno != EAGAIN && errno != EINTR)
698                                 log_err("recvfrom %d failed: %s", 
699                                         fd, strerror(errno));
700 #else
701                         if(WSAGetLastError() != WSAEINPROGRESS &&
702                                 WSAGetLastError() != WSAECONNRESET &&
703                                 WSAGetLastError()!= WSAEWOULDBLOCK)
704                                 log_err("recvfrom failed: %s",
705                                         wsa_strerror(WSAGetLastError()));
706 #endif
707                         return;
708                 }
709                 sldns_buffer_skip(rep.c->buffer, rcv);
710                 sldns_buffer_flip(rep.c->buffer);
711                 rep.srctype = 0;
712                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
713                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
714                         /* send back immediate reply */
715 #ifdef USE_DNSCRYPT
716                         buffer = rep.c->dnscrypt_buffer;
717 #else
718                         buffer = rep.c->buffer;
719 #endif
720                         (void)comm_point_send_udp_msg(rep.c, buffer,
721                                 (struct sockaddr*)&rep.addr, rep.addrlen);
722                 }
723                 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
724                 another UDP port. Note rep.c cannot be reused with TCP fd. */
725                         break;
726         }
727 }
728
729 /** Use a new tcp handler for new query fd, set to read query */
730 static void
731 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 
732 {
733         int handler_usage;
734         log_assert(c->type == comm_tcp);
735         log_assert(c->fd == -1);
736         sldns_buffer_clear(c->buffer);
737 #ifdef USE_DNSCRYPT
738         if (c->dnscrypt)
739                 sldns_buffer_clear(c->dnscrypt_buffer);
740 #endif
741         c->tcp_is_reading = 1;
742         c->tcp_byte_count = 0;
743         /* if more than half the tcp handlers are in use, use a shorter
744          * timeout for this TCP connection, we need to make space for
745          * other connections to be able to get attention */
746         /* If > 50% TCP handler structures in use, set timeout to 1/100th
747          *      configured value.
748          * If > 65%TCP handler structures in use, set to 1/500th configured
749          *      value.
750          * If > 80% TCP handler structures in use, set to 0.
751          *
752          * If the timeout to use falls below 200 milliseconds, an actual
753          * timeout of 200ms is used.
754          */
755         handler_usage = (cur * 100) / max;
756         if(handler_usage > 50 && handler_usage <= 65)
757                 c->tcp_timeout_msec /= 100;
758         else if (handler_usage > 65 && handler_usage <= 80)
759                 c->tcp_timeout_msec /= 500;
760         else if (handler_usage > 80)
761                 c->tcp_timeout_msec = 0;
762         comm_point_start_listening(c, fd,
763                 c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM
764                         ? TCP_QUERY_TIMEOUT_MINIMUM
765                         : c->tcp_timeout_msec);
766 }
767
768 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
769         short ATTR_UNUSED(event), void* arg)
770 {
771         struct comm_base* b = (struct comm_base*)arg;
772         /* timeout for the slow accept, re-enable accepts again */
773         if(b->start_accept) {
774                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
775                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
776                 (*b->start_accept)(b->cb_arg);
777                 b->eb->slow_accept_enabled = 0;
778         }
779 }
780
781 int comm_point_perform_accept(struct comm_point* c,
782         struct sockaddr_storage* addr, socklen_t* addrlen)
783 {
784         int new_fd;
785         *addrlen = (socklen_t)sizeof(*addr);
786 #ifndef HAVE_ACCEPT4
787         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
788 #else
789         /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
790         new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
791 #endif
792         if(new_fd == -1) {
793 #ifndef USE_WINSOCK
794                 /* EINTR is signal interrupt. others are closed connection. */
795                 if(     errno == EINTR || errno == EAGAIN
796 #ifdef EWOULDBLOCK
797                         || errno == EWOULDBLOCK 
798 #endif
799 #ifdef ECONNABORTED
800                         || errno == ECONNABORTED 
801 #endif
802 #ifdef EPROTO
803                         || errno == EPROTO
804 #endif /* EPROTO */
805                         )
806                         return -1;
807 #if defined(ENFILE) && defined(EMFILE)
808                 if(errno == ENFILE || errno == EMFILE) {
809                         /* out of file descriptors, likely outside of our
810                          * control. stop accept() calls for some time */
811                         if(c->ev->base->stop_accept) {
812                                 struct comm_base* b = c->ev->base;
813                                 struct timeval tv;
814                                 verbose(VERB_ALGO, "out of file descriptors: "
815                                         "slow accept");
816                                 b->eb->slow_accept_enabled = 1;
817                                 fptr_ok(fptr_whitelist_stop_accept(
818                                         b->stop_accept));
819                                 (*b->stop_accept)(b->cb_arg);
820                                 /* set timeout, no mallocs */
821                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
822                                 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
823                                 b->eb->slow_accept = ub_event_new(b->eb->base,
824                                         -1, UB_EV_TIMEOUT,
825                                         comm_base_handle_slow_accept, b);
826                                 if(b->eb->slow_accept == NULL) {
827                                         /* we do not want to log here, because
828                                          * that would spam the logfiles.
829                                          * error: "event_base_set failed." */
830                                 }
831                                 else if(ub_event_add(b->eb->slow_accept, &tv)
832                                         != 0) {
833                                         /* we do not want to log here,
834                                          * error: "event_add failed." */
835                                 }
836                         }
837                         return -1;
838                 }
839 #endif
840                 log_err_addr("accept failed", strerror(errno), addr, *addrlen);
841 #else /* USE_WINSOCK */
842                 if(WSAGetLastError() == WSAEINPROGRESS ||
843                         WSAGetLastError() == WSAECONNRESET)
844                         return -1;
845                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
846                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
847                         return -1;
848                 }
849                 log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
850                         addr, *addrlen);
851 #endif
852                 return -1;
853         }
854         if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
855                 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
856                 if(!tcl_new_connection(c->tcl_addr)) {
857                         if(verbosity >= 3)
858                                 log_err_addr("accept rejected",
859                                 "connection limit exceeded", addr, *addrlen);
860                         close(new_fd);
861                         return -1;
862                 }
863         }
864 #ifndef HAVE_ACCEPT4
865         fd_set_nonblock(new_fd);
866 #endif
867         return new_fd;
868 }
869
870 #ifdef USE_WINSOCK
871 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
872         int ATTR_UNUSED(argi), long argl, long retvalue)
873 {
874         int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
875         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
876                 (oper&BIO_CB_RETURN)?"return":"before",
877                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
878                 wsa_err==WSAEWOULDBLOCK?"wsawb":"");
879         /* on windows, check if previous operation caused EWOULDBLOCK */
880         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
881                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
882                 if(wsa_err == WSAEWOULDBLOCK)
883                         ub_winsock_tcp_wouldblock((struct ub_event*)
884                                 BIO_get_callback_arg(b), UB_EV_READ);
885         }
886         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
887                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
888                 if(wsa_err == WSAEWOULDBLOCK)
889                         ub_winsock_tcp_wouldblock((struct ub_event*)
890                                 BIO_get_callback_arg(b), UB_EV_WRITE);
891         }
892         /* return original return value */
893         return retvalue;
894 }
895
896 /** set win bio callbacks for nonblocking operations */
897 void
898 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
899 {
900         SSL* ssl = (SSL*)thessl;
901         /* set them both just in case, but usually they are the same BIO */
902         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
903         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
904         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
905         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
906 }
907 #endif
908
909 void 
910 comm_point_tcp_accept_callback(int fd, short event, void* arg)
911 {
912         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
913         int new_fd;
914         log_assert(c->type == comm_tcp_accept);
915         if(!(event & UB_EV_READ)) {
916                 log_info("ignoring tcp accept event %d", (int)event);
917                 return;
918         }
919         ub_comm_base_now(c->ev->base);
920         /* find free tcp handler. */
921         if(!c->tcp_free) {
922                 log_warn("accepted too many tcp, connections full");
923                 return;
924         }
925         /* accept incoming connection. */
926         c_hdl = c->tcp_free;
927         log_assert(fd != -1);
928         (void)fd;
929         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
930                 &c_hdl->repinfo.addrlen);
931         if(new_fd == -1)
932                 return;
933         if(c->ssl) {
934                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
935                 if(!c_hdl->ssl) {
936                         c_hdl->fd = new_fd;
937                         comm_point_close(c_hdl);
938                         return;
939                 }
940                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
941 #ifdef USE_WINSOCK
942                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
943 #endif
944         }
945
946         /* grab the tcp handler buffers */
947         c->cur_tcp_count++;
948         c->tcp_free = c_hdl->tcp_free;
949         if(!c->tcp_free) {
950                 /* stop accepting incoming queries for now. */
951                 comm_point_stop_listening(c);
952         }
953         setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
954 }
955
956 /** Make tcp handler free for next assignment */
957 static void
958 reclaim_tcp_handler(struct comm_point* c)
959 {
960         log_assert(c->type == comm_tcp);
961         if(c->ssl) {
962 #ifdef HAVE_SSL
963                 SSL_shutdown(c->ssl);
964                 SSL_free(c->ssl);
965                 c->ssl = NULL;
966 #endif
967         }
968         comm_point_close(c);
969         if(c->tcp_parent) {
970                 c->tcp_parent->cur_tcp_count--;
971                 c->tcp_free = c->tcp_parent->tcp_free;
972                 c->tcp_parent->tcp_free = c;
973                 if(!c->tcp_free) {
974                         /* re-enable listening on accept socket */
975                         comm_point_start_listening(c->tcp_parent, -1, -1);
976                 }
977         }
978 }
979
980 /** do the callback when writing is done */
981 static void
982 tcp_callback_writer(struct comm_point* c)
983 {
984         log_assert(c->type == comm_tcp);
985         sldns_buffer_clear(c->buffer);
986         if(c->tcp_do_toggle_rw)
987                 c->tcp_is_reading = 1;
988         c->tcp_byte_count = 0;
989         /* switch from listening(write) to listening(read) */
990         comm_point_stop_listening(c);
991         comm_point_start_listening(c, -1, -1);
992 }
993
994 /** do the callback when reading is done */
995 static void
996 tcp_callback_reader(struct comm_point* c)
997 {
998         log_assert(c->type == comm_tcp || c->type == comm_local);
999         sldns_buffer_flip(c->buffer);
1000         if(c->tcp_do_toggle_rw)
1001                 c->tcp_is_reading = 0;
1002         c->tcp_byte_count = 0;
1003         if(c->type == comm_tcp)
1004                 comm_point_stop_listening(c);
1005         fptr_ok(fptr_whitelist_comm_point(c->callback));
1006         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1007                 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1008         }
1009 }
1010
1011 #ifdef HAVE_SSL
1012 /** log certificate details */
1013 static void
1014 log_cert(unsigned level, const char* str, X509* cert)
1015 {
1016         BIO* bio;
1017         char nul = 0;
1018         char* pp = NULL;
1019         long len;
1020         if(verbosity < level) return;
1021         bio = BIO_new(BIO_s_mem());
1022         if(!bio) return;
1023         X509_print_ex(bio, cert, 0, (unsigned long)-1
1024                 ^(X509_FLAG_NO_SUBJECT
1025                         |X509_FLAG_NO_ISSUER|X509_FLAG_NO_VALIDITY
1026                         |X509_FLAG_NO_EXTENSIONS|X509_FLAG_NO_AUX
1027                         |X509_FLAG_NO_ATTRIBUTES));
1028         BIO_write(bio, &nul, (int)sizeof(nul));
1029         len = BIO_get_mem_data(bio, &pp);
1030         if(len != 0 && pp) {
1031                 verbose(level, "%s: \n%s", str, pp);
1032         }
1033         BIO_free(bio);
1034 }
1035 #endif /* HAVE_SSL */
1036
1037 /** continue ssl handshake */
1038 #ifdef HAVE_SSL
1039 static int
1040 ssl_handshake(struct comm_point* c)
1041 {
1042         int r;
1043         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
1044                 /* read condition satisfied back to writing */
1045                 comm_point_listen_for_rw(c, 1, 1);
1046                 c->ssl_shake_state = comm_ssl_shake_none;
1047                 return 1;
1048         }
1049         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
1050                 /* write condition satisfied, back to reading */
1051                 comm_point_listen_for_rw(c, 1, 0);
1052                 c->ssl_shake_state = comm_ssl_shake_none;
1053                 return 1;
1054         }
1055
1056         ERR_clear_error();
1057         r = SSL_do_handshake(c->ssl);
1058         if(r != 1) {
1059                 int want = SSL_get_error(c->ssl, r);
1060                 if(want == SSL_ERROR_WANT_READ) {
1061                         if(c->ssl_shake_state == comm_ssl_shake_read)
1062                                 return 1;
1063                         c->ssl_shake_state = comm_ssl_shake_read;
1064                         comm_point_listen_for_rw(c, 1, 0);
1065                         return 1;
1066                 } else if(want == SSL_ERROR_WANT_WRITE) {
1067                         if(c->ssl_shake_state == comm_ssl_shake_write)
1068                                 return 1;
1069                         c->ssl_shake_state = comm_ssl_shake_write;
1070                         comm_point_listen_for_rw(c, 0, 1);
1071                         return 1;
1072                 } else if(r == 0) {
1073                         return 0; /* closed */
1074                 } else if(want == SSL_ERROR_SYSCALL) {
1075                         /* SYSCALL and errno==0 means closed uncleanly */
1076                         if(errno != 0)
1077                                 log_err("SSL_handshake syscall: %s",
1078                                         strerror(errno));
1079                         return 0;
1080                 } else {
1081                         log_crypto_err("ssl handshake failed");
1082                         log_addr(1, "ssl handshake failed", &c->repinfo.addr,
1083                                 c->repinfo.addrlen);
1084                         return 0;
1085                 }
1086         }
1087         /* this is where peer verification could take place */
1088         if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
1089                 /* verification */
1090                 if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
1091                         X509* x = SSL_get_peer_certificate(c->ssl);
1092                         if(!x) {
1093                                 log_addr(VERB_ALGO, "SSL connection failed: "
1094                                         "no certificate",
1095                                         &c->repinfo.addr, c->repinfo.addrlen);
1096                                 return 0;
1097                         }
1098                         log_cert(VERB_ALGO, "peer certificate", x);
1099 #ifdef HAVE_SSL_GET0_PEERNAME
1100                         if(SSL_get0_peername(c->ssl)) {
1101                                 char buf[255];
1102                                 snprintf(buf, sizeof(buf), "SSL connection "
1103                                         "to %s authenticated",
1104                                         SSL_get0_peername(c->ssl));
1105                                 log_addr(VERB_ALGO, buf, &c->repinfo.addr,
1106                                         c->repinfo.addrlen);
1107                         } else {
1108 #endif
1109                                 log_addr(VERB_ALGO, "SSL connection "
1110                                         "authenticated", &c->repinfo.addr,
1111                                         c->repinfo.addrlen);
1112 #ifdef HAVE_SSL_GET0_PEERNAME
1113                         }
1114 #endif
1115                         X509_free(x);
1116                 } else {
1117                         X509* x = SSL_get_peer_certificate(c->ssl);
1118                         if(x) {
1119                                 log_cert(VERB_ALGO, "peer certificate", x);
1120                                 X509_free(x);
1121                         }
1122                         log_addr(VERB_ALGO, "SSL connection failed: "
1123                                 "failed to authenticate",
1124                                 &c->repinfo.addr, c->repinfo.addrlen);
1125                         return 0;
1126                 }
1127         } else {
1128                 /* unauthenticated, the verify peer flag was not set
1129                  * in c->ssl when the ssl object was created from ssl_ctx */
1130                 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr,
1131                         c->repinfo.addrlen);
1132         }
1133
1134         /* setup listen rw correctly */
1135         if(c->tcp_is_reading) {
1136                 if(c->ssl_shake_state != comm_ssl_shake_read)
1137                         comm_point_listen_for_rw(c, 1, 0);
1138         } else {
1139                 comm_point_listen_for_rw(c, 1, 1);
1140         }
1141         c->ssl_shake_state = comm_ssl_shake_none;
1142         return 1;
1143 }
1144 #endif /* HAVE_SSL */
1145
1146 /** ssl read callback on TCP */
1147 static int
1148 ssl_handle_read(struct comm_point* c)
1149 {
1150 #ifdef HAVE_SSL
1151         int r;
1152         if(c->ssl_shake_state != comm_ssl_shake_none) {
1153                 if(!ssl_handshake(c))
1154                         return 0;
1155                 if(c->ssl_shake_state != comm_ssl_shake_none)
1156                         return 1;
1157         }
1158         if(c->tcp_byte_count < sizeof(uint16_t)) {
1159                 /* read length bytes */
1160                 ERR_clear_error();
1161                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1162                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1163                         c->tcp_byte_count))) <= 0) {
1164                         int want = SSL_get_error(c->ssl, r);
1165                         if(want == SSL_ERROR_ZERO_RETURN) {
1166                                 return 0; /* shutdown, closed */
1167                         } else if(want == SSL_ERROR_WANT_READ) {
1168                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1169                                 return 1; /* read more later */
1170                         } else if(want == SSL_ERROR_WANT_WRITE) {
1171                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1172                                 comm_point_listen_for_rw(c, 0, 1);
1173                                 return 1;
1174                         } else if(want == SSL_ERROR_SYSCALL) {
1175                                 if(errno != 0)
1176                                         log_err("SSL_read syscall: %s",
1177                                                 strerror(errno));
1178                                 return 0;
1179                         }
1180                         log_crypto_err("could not SSL_read");
1181                         return 0;
1182                 }
1183                 c->tcp_byte_count += r;
1184                 if(c->tcp_byte_count < sizeof(uint16_t))
1185                         return 1;
1186                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1187                         sldns_buffer_capacity(c->buffer)) {
1188                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1189                         return 0;
1190                 }
1191                 sldns_buffer_set_limit(c->buffer,
1192                         sldns_buffer_read_u16_at(c->buffer, 0));
1193                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1194                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1195                         return 0;
1196                 }
1197                 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
1198                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1199                         (int)sldns_buffer_limit(c->buffer));
1200         }
1201         if(sldns_buffer_remaining(c->buffer) > 0) {
1202                 ERR_clear_error();
1203                 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1204                         (int)sldns_buffer_remaining(c->buffer));
1205                 if(r <= 0) {
1206                         int want = SSL_get_error(c->ssl, r);
1207                         if(want == SSL_ERROR_ZERO_RETURN) {
1208                                 return 0; /* shutdown, closed */
1209                         } else if(want == SSL_ERROR_WANT_READ) {
1210                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1211                                 return 1; /* read more later */
1212                         } else if(want == SSL_ERROR_WANT_WRITE) {
1213                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1214                                 comm_point_listen_for_rw(c, 0, 1);
1215                                 return 1;
1216                         } else if(want == SSL_ERROR_SYSCALL) {
1217                                 if(errno != 0)
1218                                         log_err("SSL_read syscall: %s",
1219                                                 strerror(errno));
1220                                 return 0;
1221                         }
1222                         log_crypto_err("could not SSL_read");
1223                         return 0;
1224                 }
1225                 sldns_buffer_skip(c->buffer, (ssize_t)r);
1226         }
1227         if(sldns_buffer_remaining(c->buffer) <= 0) {
1228                 tcp_callback_reader(c);
1229         }
1230         return 1;
1231 #else
1232         (void)c;
1233         return 0;
1234 #endif /* HAVE_SSL */
1235 }
1236
1237 /** ssl write callback on TCP */
1238 static int
1239 ssl_handle_write(struct comm_point* c)
1240 {
1241 #ifdef HAVE_SSL
1242         int r;
1243         if(c->ssl_shake_state != comm_ssl_shake_none) {
1244                 if(!ssl_handshake(c))
1245                         return 0;
1246                 if(c->ssl_shake_state != comm_ssl_shake_none)
1247                         return 1;
1248         }
1249         /* ignore return, if fails we may simply block */
1250         (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1251         if(c->tcp_byte_count < sizeof(uint16_t)) {
1252                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1253                 ERR_clear_error();
1254                 if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
1255                         LDNS_RR_BUF_SIZE) {
1256                         /* combine the tcp length and the query for write,
1257                          * this emulates writev */
1258                         uint8_t buf[LDNS_RR_BUF_SIZE];
1259                         memmove(buf, &len, sizeof(uint16_t));
1260                         memmove(buf+sizeof(uint16_t),
1261                                 sldns_buffer_current(c->buffer),
1262                                 sldns_buffer_remaining(c->buffer));
1263                         r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
1264                                 (int)(sizeof(uint16_t)+
1265                                 sldns_buffer_remaining(c->buffer)
1266                                 - c->tcp_byte_count));
1267                 } else {
1268                         r = SSL_write(c->ssl,
1269                                 (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1270                                 (int)(sizeof(uint16_t)-c->tcp_byte_count));
1271                 }
1272                 if(r <= 0) {
1273                         int want = SSL_get_error(c->ssl, r);
1274                         if(want == SSL_ERROR_ZERO_RETURN) {
1275                                 return 0; /* closed */
1276                         } else if(want == SSL_ERROR_WANT_READ) {
1277                                 c->ssl_shake_state = comm_ssl_shake_read;
1278                                 comm_point_listen_for_rw(c, 1, 0);
1279                                 return 1; /* wait for read condition */
1280                         } else if(want == SSL_ERROR_WANT_WRITE) {
1281                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1282                                 return 1; /* write more later */
1283                         } else if(want == SSL_ERROR_SYSCALL) {
1284                                 if(errno != 0)
1285                                         log_err("SSL_write syscall: %s",
1286                                                 strerror(errno));
1287                                 return 0;
1288                         }
1289                         log_crypto_err("could not SSL_write");
1290                         return 0;
1291                 }
1292                 c->tcp_byte_count += r;
1293                 if(c->tcp_byte_count < sizeof(uint16_t))
1294                         return 1;
1295                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1296                         sizeof(uint16_t));
1297                 if(sldns_buffer_remaining(c->buffer) == 0) {
1298                         tcp_callback_writer(c);
1299                         return 1;
1300                 }
1301         }
1302         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1303         ERR_clear_error();
1304         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1305                 (int)sldns_buffer_remaining(c->buffer));
1306         if(r <= 0) {
1307                 int want = SSL_get_error(c->ssl, r);
1308                 if(want == SSL_ERROR_ZERO_RETURN) {
1309                         return 0; /* closed */
1310                 } else if(want == SSL_ERROR_WANT_READ) {
1311                         c->ssl_shake_state = comm_ssl_shake_read;
1312                         comm_point_listen_for_rw(c, 1, 0);
1313                         return 1; /* wait for read condition */
1314                 } else if(want == SSL_ERROR_WANT_WRITE) {
1315                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1316                         return 1; /* write more later */
1317                 } else if(want == SSL_ERROR_SYSCALL) {
1318                         if(errno != 0)
1319                                 log_err("SSL_write syscall: %s",
1320                                         strerror(errno));
1321                         return 0;
1322                 }
1323                 log_crypto_err("could not SSL_write");
1324                 return 0;
1325         }
1326         sldns_buffer_skip(c->buffer, (ssize_t)r);
1327
1328         if(sldns_buffer_remaining(c->buffer) == 0) {
1329                 tcp_callback_writer(c);
1330         }
1331         return 1;
1332 #else
1333         (void)c;
1334         return 0;
1335 #endif /* HAVE_SSL */
1336 }
1337
1338 /** handle ssl tcp connection with dns contents */
1339 static int
1340 ssl_handle_it(struct comm_point* c)
1341 {
1342         if(c->tcp_is_reading)
1343                 return ssl_handle_read(c);
1344         return ssl_handle_write(c);
1345 }
1346
1347 /** Handle tcp reading callback. 
1348  * @param fd: file descriptor of socket.
1349  * @param c: comm point to read from into buffer.
1350  * @param short_ok: if true, very short packets are OK (for comm_local).
1351  * @return: 0 on error 
1352  */
1353 static int
1354 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1355 {
1356         ssize_t r;
1357         log_assert(c->type == comm_tcp || c->type == comm_local);
1358         if(c->ssl)
1359                 return ssl_handle_it(c);
1360         if(!c->tcp_is_reading)
1361                 return 0;
1362
1363         log_assert(fd != -1);
1364         if(c->tcp_byte_count < sizeof(uint16_t)) {
1365                 /* read length bytes */
1366                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1367                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1368                 if(r == 0)
1369                         return 0;
1370                 else if(r == -1) {
1371 #ifndef USE_WINSOCK
1372                         if(errno == EINTR || errno == EAGAIN)
1373                                 return 1;
1374 #ifdef ECONNRESET
1375                         if(errno == ECONNRESET && verbosity < 2)
1376                                 return 0; /* silence reset by peer */
1377 #endif
1378                         log_err_addr("read (in tcp s)", strerror(errno),
1379                                 &c->repinfo.addr, c->repinfo.addrlen);
1380 #else /* USE_WINSOCK */
1381                         if(WSAGetLastError() == WSAECONNRESET)
1382                                 return 0;
1383                         if(WSAGetLastError() == WSAEINPROGRESS)
1384                                 return 1;
1385                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1386                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1387                                         UB_EV_READ);
1388                                 return 1;
1389                         }
1390                         log_err_addr("read (in tcp s)", 
1391                                 wsa_strerror(WSAGetLastError()),
1392                                 &c->repinfo.addr, c->repinfo.addrlen);
1393 #endif
1394                         return 0;
1395                 } 
1396                 c->tcp_byte_count += r;
1397                 if(c->tcp_byte_count != sizeof(uint16_t))
1398                         return 1;
1399                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1400                         sldns_buffer_capacity(c->buffer)) {
1401                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1402                         return 0;
1403                 }
1404                 sldns_buffer_set_limit(c->buffer, 
1405                         sldns_buffer_read_u16_at(c->buffer, 0));
1406                 if(!short_ok && 
1407                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1408                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1409                         return 0;
1410                 }
1411                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1412                         (int)sldns_buffer_limit(c->buffer));
1413         }
1414
1415         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1416         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1417                 sldns_buffer_remaining(c->buffer), 0);
1418         if(r == 0) {
1419                 return 0;
1420         } else if(r == -1) {
1421 #ifndef USE_WINSOCK
1422                 if(errno == EINTR || errno == EAGAIN)
1423                         return 1;
1424                 log_err_addr("read (in tcp r)", strerror(errno),
1425                         &c->repinfo.addr, c->repinfo.addrlen);
1426 #else /* USE_WINSOCK */
1427                 if(WSAGetLastError() == WSAECONNRESET)
1428                         return 0;
1429                 if(WSAGetLastError() == WSAEINPROGRESS)
1430                         return 1;
1431                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1432                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1433                         return 1;
1434                 }
1435                 log_err_addr("read (in tcp r)",
1436                         wsa_strerror(WSAGetLastError()),
1437                         &c->repinfo.addr, c->repinfo.addrlen);
1438 #endif
1439                 return 0;
1440         }
1441         sldns_buffer_skip(c->buffer, r);
1442         if(sldns_buffer_remaining(c->buffer) <= 0) {
1443                 tcp_callback_reader(c);
1444         }
1445         return 1;
1446 }
1447
1448 /** 
1449  * Handle tcp writing callback. 
1450  * @param fd: file descriptor of socket.
1451  * @param c: comm point to write buffer out of.
1452  * @return: 0 on error
1453  */
1454 static int
1455 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1456 {
1457         ssize_t r;
1458         struct sldns_buffer *buffer;
1459         log_assert(c->type == comm_tcp);
1460 #ifdef USE_DNSCRYPT
1461         buffer = c->dnscrypt_buffer;
1462 #else
1463         buffer = c->buffer;
1464 #endif
1465         if(c->tcp_is_reading && !c->ssl)
1466                 return 0;
1467         log_assert(fd != -1);
1468         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1469                 /* check for pending error from nonblocking connect */
1470                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1471                 int error = 0;
1472                 socklen_t len = (socklen_t)sizeof(error);
1473                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1474                         &len) < 0){
1475 #ifndef USE_WINSOCK
1476                         error = errno; /* on solaris errno is error */
1477 #else /* USE_WINSOCK */
1478                         error = WSAGetLastError();
1479 #endif
1480                 }
1481 #ifndef USE_WINSOCK
1482 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1483                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1484                         return 1; /* try again later */
1485                 else
1486 #endif
1487                 if(error != 0 && verbosity < 2)
1488                         return 0; /* silence lots of chatter in the logs */
1489                 else if(error != 0) {
1490                         log_err_addr("tcp connect", strerror(error),
1491                                 &c->repinfo.addr, c->repinfo.addrlen);
1492 #else /* USE_WINSOCK */
1493                 /* examine error */
1494                 if(error == WSAEINPROGRESS)
1495                         return 1;
1496                 else if(error == WSAEWOULDBLOCK) {
1497                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1498                         return 1;
1499                 } else if(error != 0 && verbosity < 2)
1500                         return 0;
1501                 else if(error != 0) {
1502                         log_err_addr("tcp connect", wsa_strerror(error),
1503                                 &c->repinfo.addr, c->repinfo.addrlen);
1504 #endif /* USE_WINSOCK */
1505                         return 0;
1506                 }
1507         }
1508         if(c->ssl)
1509                 return ssl_handle_it(c);
1510
1511 #ifdef USE_MSG_FASTOPEN
1512         /* Only try this on first use of a connection that uses tfo, 
1513            otherwise fall through to normal write */
1514         /* Also, TFO support on WINDOWS not implemented at the moment */
1515         if(c->tcp_do_fastopen == 1) {
1516                 /* this form of sendmsg() does both a connect() and send() so need to
1517                    look for various flavours of error*/
1518                 uint16_t len = htons(sldns_buffer_limit(buffer));
1519                 struct msghdr msg;
1520                 struct iovec iov[2];
1521                 c->tcp_do_fastopen = 0;
1522                 memset(&msg, 0, sizeof(msg));
1523                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1524                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1525                 iov[1].iov_base = sldns_buffer_begin(buffer);
1526                 iov[1].iov_len = sldns_buffer_limit(buffer);
1527                 log_assert(iov[0].iov_len > 0);
1528                 log_assert(iov[1].iov_len > 0);
1529                 msg.msg_name = &c->repinfo.addr;
1530                 msg.msg_namelen = c->repinfo.addrlen;
1531                 msg.msg_iov = iov;
1532                 msg.msg_iovlen = 2;
1533                 r = sendmsg(fd, &msg, MSG_FASTOPEN);
1534                 if (r == -1) {
1535 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1536                         /* Handshake is underway, maybe because no TFO cookie available.
1537                            Come back to write the message*/
1538                         if(errno == EINPROGRESS || errno == EWOULDBLOCK)
1539                                 return 1;
1540 #endif
1541                         if(errno == EINTR || errno == EAGAIN)
1542                                 return 1;
1543                         /* Not handling EISCONN here as shouldn't ever hit that case.*/
1544                         if(errno != EPIPE && errno != 0 && verbosity < 2)
1545                                 return 0; /* silence lots of chatter in the logs */
1546                         if(errno != EPIPE && errno != 0) {
1547                                 log_err_addr("tcp sendmsg", strerror(errno),
1548                                         &c->repinfo.addr, c->repinfo.addrlen);
1549                                 return 0;
1550                         }
1551                         /* fallthrough to nonFASTOPEN
1552                          * (MSG_FASTOPEN on Linux 3 produces EPIPE)
1553                          * we need to perform connect() */
1554                         if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) {
1555 #ifdef EINPROGRESS
1556                                 if(errno == EINPROGRESS)
1557                                         return 1; /* wait until connect done*/
1558 #endif
1559 #ifdef USE_WINSOCK
1560                                 if(WSAGetLastError() == WSAEINPROGRESS ||
1561                                         WSAGetLastError() == WSAEWOULDBLOCK)
1562                                         return 1; /* wait until connect done*/
1563 #endif
1564                                 if(tcp_connect_errno_needs_log(
1565                                         (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) {
1566                                         log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
1567                                                 strerror(errno), &c->repinfo.addr, c->repinfo.addrlen);
1568                                 }
1569                                 return 0;
1570                         }
1571
1572                 } else {
1573                         c->tcp_byte_count += r;
1574                         if(c->tcp_byte_count < sizeof(uint16_t))
1575                                 return 1;
1576                         sldns_buffer_set_position(buffer, c->tcp_byte_count - 
1577                                 sizeof(uint16_t));
1578                         if(sldns_buffer_remaining(buffer) == 0) {
1579                                 tcp_callback_writer(c);
1580                                 return 1;
1581                         }
1582                 }
1583         }
1584 #endif /* USE_MSG_FASTOPEN */
1585
1586         if(c->tcp_byte_count < sizeof(uint16_t)) {
1587                 uint16_t len = htons(sldns_buffer_limit(buffer));
1588 #ifdef HAVE_WRITEV
1589                 struct iovec iov[2];
1590                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1591                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1592                 iov[1].iov_base = sldns_buffer_begin(buffer);
1593                 iov[1].iov_len = sldns_buffer_limit(buffer);
1594                 log_assert(iov[0].iov_len > 0);
1595                 log_assert(iov[1].iov_len > 0);
1596                 r = writev(fd, iov, 2);
1597 #else /* HAVE_WRITEV */
1598                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1599                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1600 #endif /* HAVE_WRITEV */
1601                 if(r == -1) {
1602 #ifndef USE_WINSOCK
1603 #  ifdef EPIPE
1604                         if(errno == EPIPE && verbosity < 2)
1605                                 return 0; /* silence 'broken pipe' */
1606   #endif
1607                         if(errno == EINTR || errno == EAGAIN)
1608                                 return 1;
1609 #  ifdef HAVE_WRITEV
1610                         log_err_addr("tcp writev", strerror(errno),
1611                                 &c->repinfo.addr, c->repinfo.addrlen);
1612 #  else /* HAVE_WRITEV */
1613                         log_err_addr("tcp send s", strerror(errno),
1614                                 &c->repinfo.addr, c->repinfo.addrlen);
1615 #  endif /* HAVE_WRITEV */
1616 #else
1617                         if(WSAGetLastError() == WSAENOTCONN)
1618                                 return 1;
1619                         if(WSAGetLastError() == WSAEINPROGRESS)
1620                                 return 1;
1621                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1622                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1623                                         UB_EV_WRITE);
1624                                 return 1; 
1625                         }
1626                         log_err_addr("tcp send s",
1627                                 wsa_strerror(WSAGetLastError()),
1628                                 &c->repinfo.addr, c->repinfo.addrlen);
1629 #endif
1630                         return 0;
1631                 }
1632                 c->tcp_byte_count += r;
1633                 if(c->tcp_byte_count < sizeof(uint16_t))
1634                         return 1;
1635                 sldns_buffer_set_position(buffer, c->tcp_byte_count - 
1636                         sizeof(uint16_t));
1637                 if(sldns_buffer_remaining(buffer) == 0) {
1638                         tcp_callback_writer(c);
1639                         return 1;
1640                 }
1641         }
1642         log_assert(sldns_buffer_remaining(buffer) > 0);
1643         r = send(fd, (void*)sldns_buffer_current(buffer), 
1644                 sldns_buffer_remaining(buffer), 0);
1645         if(r == -1) {
1646 #ifndef USE_WINSOCK
1647                 if(errno == EINTR || errno == EAGAIN)
1648                         return 1;
1649                 log_err_addr("tcp send r", strerror(errno),
1650                         &c->repinfo.addr, c->repinfo.addrlen);
1651 #else
1652                 if(WSAGetLastError() == WSAEINPROGRESS)
1653                         return 1;
1654                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1655                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1656                         return 1; 
1657                 }
1658                 log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1659                         &c->repinfo.addr, c->repinfo.addrlen);
1660 #endif
1661                 return 0;
1662         }
1663         sldns_buffer_skip(buffer, r);
1664
1665         if(sldns_buffer_remaining(buffer) == 0) {
1666                 tcp_callback_writer(c);
1667         }
1668         
1669         return 1;
1670 }
1671
1672 void 
1673 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1674 {
1675         struct comm_point* c = (struct comm_point*)arg;
1676         log_assert(c->type == comm_tcp);
1677         ub_comm_base_now(c->ev->base);
1678
1679 #ifdef USE_DNSCRYPT
1680         /* Initialize if this is a dnscrypt socket */
1681         if(c->tcp_parent) {
1682                 c->dnscrypt = c->tcp_parent->dnscrypt;
1683         }
1684         if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
1685                 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
1686                 if(!c->dnscrypt_buffer) {
1687                         log_err("Could not allocate dnscrypt buffer");
1688                         reclaim_tcp_handler(c);
1689                         if(!c->tcp_do_close) {
1690                                 fptr_ok(fptr_whitelist_comm_point(
1691                                         c->callback));
1692                                 (void)(*c->callback)(c, c->cb_arg, 
1693                                         NETEVENT_CLOSED, NULL);
1694                         }
1695                         return;
1696                 }
1697         }
1698 #endif
1699
1700         if(event&UB_EV_READ) {
1701                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1702                         reclaim_tcp_handler(c);
1703                         if(!c->tcp_do_close) {
1704                                 fptr_ok(fptr_whitelist_comm_point(
1705                                         c->callback));
1706                                 (void)(*c->callback)(c, c->cb_arg, 
1707                                         NETEVENT_CLOSED, NULL);
1708                         }
1709                 }
1710                 return;
1711         }
1712         if(event&UB_EV_WRITE) {
1713                 if(!comm_point_tcp_handle_write(fd, c)) {
1714                         reclaim_tcp_handler(c);
1715                         if(!c->tcp_do_close) {
1716                                 fptr_ok(fptr_whitelist_comm_point(
1717                                         c->callback));
1718                                 (void)(*c->callback)(c, c->cb_arg, 
1719                                         NETEVENT_CLOSED, NULL);
1720                         }
1721                 }
1722                 return;
1723         }
1724         if(event&UB_EV_TIMEOUT) {
1725                 verbose(VERB_QUERY, "tcp took too long, dropped");
1726                 reclaim_tcp_handler(c);
1727                 if(!c->tcp_do_close) {
1728                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1729                         (void)(*c->callback)(c, c->cb_arg,
1730                                 NETEVENT_TIMEOUT, NULL);
1731                 }
1732                 return;
1733         }
1734         log_err("Ignored event %d for tcphdl.", event);
1735 }
1736
1737 /** Make http handler free for next assignment */
1738 static void
1739 reclaim_http_handler(struct comm_point* c)
1740 {
1741         log_assert(c->type == comm_http);
1742         if(c->ssl) {
1743 #ifdef HAVE_SSL
1744                 SSL_shutdown(c->ssl);
1745                 SSL_free(c->ssl);
1746                 c->ssl = NULL;
1747 #endif
1748         }
1749         comm_point_close(c);
1750         if(c->tcp_parent) {
1751                 c->tcp_parent->cur_tcp_count--;
1752                 c->tcp_free = c->tcp_parent->tcp_free;
1753                 c->tcp_parent->tcp_free = c;
1754                 if(!c->tcp_free) {
1755                         /* re-enable listening on accept socket */
1756                         comm_point_start_listening(c->tcp_parent, -1, -1);
1757                 }
1758         }
1759 }
1760
1761 /** read more data for http (with ssl) */
1762 static int
1763 ssl_http_read_more(struct comm_point* c)
1764 {
1765 #ifdef HAVE_SSL
1766         int r;
1767         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1768         ERR_clear_error();
1769         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1770                 (int)sldns_buffer_remaining(c->buffer));
1771         if(r <= 0) {
1772                 int want = SSL_get_error(c->ssl, r);
1773                 if(want == SSL_ERROR_ZERO_RETURN) {
1774                         return 0; /* shutdown, closed */
1775                 } else if(want == SSL_ERROR_WANT_READ) {
1776                         return 1; /* read more later */
1777                 } else if(want == SSL_ERROR_WANT_WRITE) {
1778                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1779                         comm_point_listen_for_rw(c, 0, 1);
1780                         return 1;
1781                 } else if(want == SSL_ERROR_SYSCALL) {
1782                         if(errno != 0)
1783                                 log_err("SSL_read syscall: %s",
1784                                         strerror(errno));
1785                         return 0;
1786                 }
1787                 log_crypto_err("could not SSL_read");
1788                 return 0;
1789         }
1790         sldns_buffer_skip(c->buffer, (ssize_t)r);
1791         return 1;
1792 #else
1793         (void)c;
1794         return 0;
1795 #endif /* HAVE_SSL */
1796 }
1797
1798 /** read more data for http */
1799 static int
1800 http_read_more(int fd, struct comm_point* c)
1801 {
1802         ssize_t r;
1803         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1804         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1805                 sldns_buffer_remaining(c->buffer), 0);
1806         if(r == 0) {
1807                 return 0;
1808         } else if(r == -1) {
1809 #ifndef USE_WINSOCK
1810                 if(errno == EINTR || errno == EAGAIN)
1811                         return 1;
1812                 log_err_addr("read (in http r)", strerror(errno),
1813                         &c->repinfo.addr, c->repinfo.addrlen);
1814 #else /* USE_WINSOCK */
1815                 if(WSAGetLastError() == WSAECONNRESET)
1816                         return 0;
1817                 if(WSAGetLastError() == WSAEINPROGRESS)
1818                         return 1;
1819                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1820                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1821                         return 1;
1822                 }
1823                 log_err_addr("read (in http r)",
1824                         wsa_strerror(WSAGetLastError()),
1825                         &c->repinfo.addr, c->repinfo.addrlen);
1826 #endif
1827                 return 0;
1828         }
1829         sldns_buffer_skip(c->buffer, r);
1830         return 1;
1831 }
1832
1833 /** return true if http header has been read (one line complete) */
1834 static int
1835 http_header_done(sldns_buffer* buf)
1836 {
1837         size_t i;
1838         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
1839                 /* there was a \r before the \n, but we ignore that */
1840                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
1841                         return 1;
1842         }
1843         return 0;
1844 }
1845
1846 /** return character string into buffer for header line, moves buffer
1847  * past that line and puts zero terminator into linefeed-newline */
1848 static char*
1849 http_header_line(sldns_buffer* buf)
1850 {
1851         char* result = (char*)sldns_buffer_current(buf);
1852         size_t i;
1853         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
1854                 /* terminate the string on the \r */
1855                 if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
1856                         sldns_buffer_write_u8_at(buf, i, 0);
1857                 /* terminate on the \n and skip past the it and done */
1858                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
1859                         sldns_buffer_write_u8_at(buf, i, 0);
1860                         sldns_buffer_set_position(buf, i+1);
1861                         return result;
1862                 }
1863         }
1864         return NULL;
1865 }
1866
1867 /** move unread buffer to start and clear rest for putting the rest into it */
1868 static void
1869 http_moveover_buffer(sldns_buffer* buf)
1870 {
1871         size_t pos = sldns_buffer_position(buf);
1872         size_t len = sldns_buffer_remaining(buf);
1873         sldns_buffer_clear(buf);
1874         memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
1875         sldns_buffer_set_position(buf, len);
1876 }
1877
1878 /** a http header is complete, process it */
1879 static int
1880 http_process_initial_header(struct comm_point* c)
1881 {
1882         char* line = http_header_line(c->buffer);
1883         if(!line) return 1;
1884         verbose(VERB_ALGO, "http header: %s", line);
1885         if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
1886                 /* check returncode */
1887                 if(line[9] != '2') {
1888                         verbose(VERB_ALGO, "http bad status %s", line+9);
1889                         return 0;
1890                 }
1891         } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
1892                 if(!c->http_is_chunked)
1893                         c->tcp_byte_count = (size_t)atoi(line+16);
1894         } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
1895                 c->tcp_byte_count = 0;
1896                 c->http_is_chunked = 1;
1897         } else if(line[0] == 0) {
1898                 /* end of initial headers */
1899                 c->http_in_headers = 0;
1900                 if(c->http_is_chunked)
1901                         c->http_in_chunk_headers = 1;
1902                 /* remove header text from front of buffer
1903                  * the buffer is going to be used to return the data segment
1904                  * itself and we don't want the header to get returned
1905                  * prepended with it */
1906                 http_moveover_buffer(c->buffer);
1907                 sldns_buffer_flip(c->buffer);
1908                 return 1;
1909         }
1910         /* ignore other headers */
1911         return 1;
1912 }
1913
1914 /** a chunk header is complete, process it, return 0=fail, 1=continue next
1915  * header line, 2=done with chunked transfer*/
1916 static int
1917 http_process_chunk_header(struct comm_point* c)
1918 {
1919         char* line = http_header_line(c->buffer);
1920         if(!line) return 1;
1921         if(c->http_in_chunk_headers == 3) {
1922                 verbose(VERB_ALGO, "http chunk trailer: %s", line);
1923                 /* are we done ? */
1924                 if(line[0] == 0 && c->tcp_byte_count == 0) {
1925                         /* callback of http reader when NETEVENT_DONE,
1926                          * end of data, with no data in buffer */
1927                         sldns_buffer_set_position(c->buffer, 0);
1928                         sldns_buffer_set_limit(c->buffer, 0);
1929                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1930                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
1931                         /* return that we are done */
1932                         return 2;
1933                 }
1934                 if(line[0] == 0) {
1935                         /* continue with header of the next chunk */
1936                         c->http_in_chunk_headers = 1;
1937                         /* remove header text from front of buffer */
1938                         http_moveover_buffer(c->buffer);
1939                         sldns_buffer_flip(c->buffer);
1940                         return 1;
1941                 }
1942                 /* ignore further trail headers */
1943                 return 1;
1944         }
1945         verbose(VERB_ALGO, "http chunk header: %s", line);
1946         if(c->http_in_chunk_headers == 1) {
1947                 /* read chunked start line */
1948                 char* end = NULL;
1949                 c->tcp_byte_count = (size_t)strtol(line, &end, 16);
1950                 if(end == line)
1951                         return 0;
1952                 c->http_in_chunk_headers = 0;
1953                 /* remove header text from front of buffer */
1954                 http_moveover_buffer(c->buffer);
1955                 sldns_buffer_flip(c->buffer);
1956                 if(c->tcp_byte_count == 0) {
1957                         /* done with chunks, process chunk_trailer lines */
1958                         c->http_in_chunk_headers = 3;
1959                 }
1960                 return 1;
1961         }
1962         /* ignore other headers */
1963         return 1;
1964 }
1965
1966 /** handle nonchunked data segment */
1967 static int
1968 http_nonchunk_segment(struct comm_point* c)
1969 {
1970         /* c->buffer at position..limit has new data we read in.
1971          * the buffer itself is full of nonchunked data.
1972          * we are looking to read tcp_byte_count more data
1973          * and then the transfer is done. */
1974         size_t remainbufferlen;
1975         size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
1976         if(c->tcp_byte_count <= got_now) {
1977                 /* done, this is the last data fragment */
1978                 c->http_stored = 0;
1979                 sldns_buffer_set_position(c->buffer, 0);
1980                 fptr_ok(fptr_whitelist_comm_point(c->callback));
1981                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
1982                 return 1;
1983         }
1984         c->tcp_byte_count -= got_now;
1985         /* if we have the buffer space,
1986          * read more data collected into the buffer */
1987         remainbufferlen = sldns_buffer_capacity(c->buffer) -
1988                 sldns_buffer_limit(c->buffer);
1989         if(remainbufferlen >= c->tcp_byte_count ||
1990                 remainbufferlen >= 2048) {
1991                 size_t total = sldns_buffer_limit(c->buffer);
1992                 sldns_buffer_clear(c->buffer);
1993                 sldns_buffer_set_position(c->buffer, total);
1994                 c->http_stored = total;
1995                 /* return and wait to read more */
1996                 return 1;
1997         }
1998         /* call callback with this data amount, then
1999          * wait for more */
2000         c->http_stored = 0;
2001         sldns_buffer_set_position(c->buffer, 0);
2002         fptr_ok(fptr_whitelist_comm_point(c->callback));
2003         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2004         /* c->callback has to buffer_clear(c->buffer). */
2005         /* return and wait to read more */
2006         return 1;
2007 }
2008
2009 /** handle nonchunked data segment, return 0=fail, 1=wait, 2=process more */
2010 static int
2011 http_chunked_segment(struct comm_point* c)
2012 {
2013         /* the c->buffer has from position..limit new data we read. */
2014         /* the current chunk has length tcp_byte_count.
2015          * once we read that read more chunk headers.
2016          */
2017         size_t remainbufferlen;
2018         size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
2019         if(c->tcp_byte_count <= got_now) {
2020                 /* the chunk has completed (with perhaps some extra data
2021                  * from next chunk header and next chunk) */
2022                 /* save too much info into temp buffer */
2023                 size_t fraglen;
2024                 struct comm_reply repinfo;
2025                 c->http_stored = 0;
2026                 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
2027                 sldns_buffer_clear(c->http_temp);
2028                 sldns_buffer_write(c->http_temp,
2029                         sldns_buffer_current(c->buffer),
2030                         sldns_buffer_remaining(c->buffer));
2031                 sldns_buffer_flip(c->http_temp);
2032
2033                 /* callback with this fragment */
2034                 fraglen = sldns_buffer_position(c->buffer);
2035                 sldns_buffer_set_position(c->buffer, 0);
2036                 sldns_buffer_set_limit(c->buffer, fraglen);
2037                 repinfo = c->repinfo;
2038                 fptr_ok(fptr_whitelist_comm_point(c->callback));
2039                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
2040                 /* c->callback has to buffer_clear(). */
2041
2042                 /* is commpoint deleted? */
2043                 if(!repinfo.c) {
2044                         return 1;
2045                 }
2046                 /* copy waiting info */
2047                 sldns_buffer_clear(c->buffer);
2048                 sldns_buffer_write(c->buffer,
2049                         sldns_buffer_begin(c->http_temp),
2050                         sldns_buffer_remaining(c->http_temp));
2051                 sldns_buffer_flip(c->buffer);
2052                 /* process end of chunk trailer header lines, until
2053                  * an empty line */
2054                 c->http_in_chunk_headers = 3;
2055                 /* process more data in buffer (if any) */
2056                 return 2;
2057         }
2058         c->tcp_byte_count -= got_now;
2059
2060         /* if we have the buffer space,
2061          * read more data collected into the buffer */
2062         remainbufferlen = sldns_buffer_capacity(c->buffer) -
2063                 sldns_buffer_limit(c->buffer);
2064         if(remainbufferlen >= c->tcp_byte_count ||
2065                 remainbufferlen >= 2048) {
2066                 size_t total = sldns_buffer_limit(c->buffer);
2067                 sldns_buffer_clear(c->buffer);
2068                 sldns_buffer_set_position(c->buffer, total);
2069                 c->http_stored = total;
2070                 /* return and wait to read more */
2071                 return 1;
2072         }
2073         
2074         /* callback of http reader for a new part of the data */
2075         c->http_stored = 0;
2076         sldns_buffer_set_position(c->buffer, 0);
2077         fptr_ok(fptr_whitelist_comm_point(c->callback));
2078         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2079         /* c->callback has to buffer_clear(c->buffer). */
2080         /* return and wait to read more */
2081         return 1;
2082 }
2083
2084 /**
2085  * Handle http reading callback. 
2086  * @param fd: file descriptor of socket.
2087  * @param c: comm point to read from into buffer.
2088  * @return: 0 on error 
2089  */
2090 static int
2091 comm_point_http_handle_read(int fd, struct comm_point* c)
2092 {
2093         log_assert(c->type == comm_http);
2094         log_assert(fd != -1);
2095
2096         /* if we are in ssl handshake, handle SSL handshake */
2097 #ifdef HAVE_SSL
2098         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2099                 if(!ssl_handshake(c))
2100                         return 0;
2101                 if(c->ssl_shake_state != comm_ssl_shake_none)
2102                         return 1;
2103         }
2104 #endif /* HAVE_SSL */
2105
2106         if(!c->tcp_is_reading)
2107                 return 1;
2108         /* read more data */
2109         if(c->ssl) {
2110                 if(!ssl_http_read_more(c))
2111                         return 0;
2112         } else {
2113                 if(!http_read_more(fd, c))
2114                         return 0;
2115         }
2116
2117         sldns_buffer_flip(c->buffer);
2118         while(sldns_buffer_remaining(c->buffer) > 0) {
2119                 /* if we are reading headers, read more headers */
2120                 if(c->http_in_headers || c->http_in_chunk_headers) {
2121                         /* if header is done, process the header */
2122                         if(!http_header_done(c->buffer)) {
2123                                 /* copy remaining data to front of buffer
2124                                  * and set rest for writing into it */
2125                                 http_moveover_buffer(c->buffer);
2126                                 /* return and wait to read more */
2127                                 return 1;
2128                         }
2129                         if(!c->http_in_chunk_headers) {
2130                                 /* process initial headers */
2131                                 if(!http_process_initial_header(c))
2132                                         return 0;
2133                         } else {
2134                                 /* process chunk headers */
2135                                 int r = http_process_chunk_header(c);
2136                                 if(r == 0) return 0;
2137                                 if(r == 2) return 1; /* done */
2138                                 /* r == 1, continue */
2139                         }
2140                         /* see if we have more to process */
2141                         continue;
2142                 }
2143
2144                 if(!c->http_is_chunked) {
2145                         /* if we are reading nonchunks, process that*/
2146                         return http_nonchunk_segment(c);
2147                 } else {
2148                         /* if we are reading chunks, read the chunk */
2149                         int r = http_chunked_segment(c);
2150                         if(r == 0) return 0;
2151                         if(r == 1) return 1;
2152                         continue;
2153                 }
2154         }
2155         /* broke out of the loop; could not process header instead need
2156          * to read more */
2157         /* moveover any remaining data and read more data */
2158         http_moveover_buffer(c->buffer);
2159         /* return and wait to read more */
2160         return 1;
2161 }
2162
2163 /** check pending connect for http */
2164 static int
2165 http_check_connect(int fd, struct comm_point* c)
2166 {
2167         /* check for pending error from nonblocking connect */
2168         /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
2169         int error = 0;
2170         socklen_t len = (socklen_t)sizeof(error);
2171         if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
2172                 &len) < 0){
2173 #ifndef USE_WINSOCK
2174                 error = errno; /* on solaris errno is error */
2175 #else /* USE_WINSOCK */
2176                 error = WSAGetLastError();
2177 #endif
2178         }
2179 #ifndef USE_WINSOCK
2180 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
2181         if(error == EINPROGRESS || error == EWOULDBLOCK)
2182                 return 1; /* try again later */
2183         else
2184 #endif
2185         if(error != 0 && verbosity < 2)
2186                 return 0; /* silence lots of chatter in the logs */
2187         else if(error != 0) {
2188                 log_err_addr("http connect", strerror(error),
2189                         &c->repinfo.addr, c->repinfo.addrlen);
2190 #else /* USE_WINSOCK */
2191         /* examine error */
2192         if(error == WSAEINPROGRESS)
2193                 return 1;
2194         else if(error == WSAEWOULDBLOCK) {
2195                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2196                 return 1;
2197         } else if(error != 0 && verbosity < 2)
2198                 return 0;
2199         else if(error != 0) {
2200                 log_err_addr("http connect", wsa_strerror(error),
2201                         &c->repinfo.addr, c->repinfo.addrlen);
2202 #endif /* USE_WINSOCK */
2203                 return 0;
2204         }
2205         /* keep on processing this socket */
2206         return 2;
2207 }
2208
2209 /** write more data for http (with ssl) */
2210 static int
2211 ssl_http_write_more(struct comm_point* c)
2212 {
2213 #ifdef HAVE_SSL
2214         int r;
2215         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2216         ERR_clear_error();
2217         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
2218                 (int)sldns_buffer_remaining(c->buffer));
2219         if(r <= 0) {
2220                 int want = SSL_get_error(c->ssl, r);
2221                 if(want == SSL_ERROR_ZERO_RETURN) {
2222                         return 0; /* closed */
2223                 } else if(want == SSL_ERROR_WANT_READ) {
2224                         c->ssl_shake_state = comm_ssl_shake_read;
2225                         comm_point_listen_for_rw(c, 1, 0);
2226                         return 1; /* wait for read condition */
2227                 } else if(want == SSL_ERROR_WANT_WRITE) {
2228                         return 1; /* write more later */
2229                 } else if(want == SSL_ERROR_SYSCALL) {
2230                         if(errno != 0)
2231                                 log_err("SSL_write syscall: %s",
2232                                         strerror(errno));
2233                         return 0;
2234                 }
2235                 log_crypto_err("could not SSL_write");
2236                 return 0;
2237         }
2238         sldns_buffer_skip(c->buffer, (ssize_t)r);
2239         return 1;
2240 #else
2241         (void)c;
2242         return 0;
2243 #endif /* HAVE_SSL */
2244 }
2245
2246 /** write more data for http */
2247 static int
2248 http_write_more(int fd, struct comm_point* c)
2249 {
2250         ssize_t r;
2251         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2252         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
2253                 sldns_buffer_remaining(c->buffer), 0);
2254         if(r == -1) {
2255 #ifndef USE_WINSOCK
2256                 if(errno == EINTR || errno == EAGAIN)
2257                         return 1;
2258                 log_err_addr("http send r", strerror(errno),
2259                         &c->repinfo.addr, c->repinfo.addrlen);
2260 #else
2261                 if(WSAGetLastError() == WSAEINPROGRESS)
2262                         return 1;
2263                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2264                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2265                         return 1; 
2266                 }
2267                 log_err_addr("http send r", wsa_strerror(WSAGetLastError()),
2268                         &c->repinfo.addr, c->repinfo.addrlen);
2269 #endif
2270                 return 0;
2271         }
2272         sldns_buffer_skip(c->buffer, r);
2273         return 1;
2274 }
2275
2276 /** 
2277  * Handle http writing callback. 
2278  * @param fd: file descriptor of socket.
2279  * @param c: comm point to write buffer out of.
2280  * @return: 0 on error
2281  */
2282 static int
2283 comm_point_http_handle_write(int fd, struct comm_point* c)
2284 {
2285         log_assert(c->type == comm_http);
2286         log_assert(fd != -1);
2287
2288         /* check pending connect errors, if that fails, we wait for more,
2289          * or we can continue to write contents */
2290         if(c->tcp_check_nb_connect) {
2291                 int r = http_check_connect(fd, c);
2292                 if(r == 0) return 0;
2293                 if(r == 1) return 1;
2294                 c->tcp_check_nb_connect = 0;
2295         }
2296         /* if we are in ssl handshake, handle SSL handshake */
2297 #ifdef HAVE_SSL
2298         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2299                 if(!ssl_handshake(c))
2300                         return 0;
2301                 if(c->ssl_shake_state != comm_ssl_shake_none)
2302                         return 1;
2303         }
2304 #endif /* HAVE_SSL */
2305         if(c->tcp_is_reading)
2306                 return 1;
2307         /* if we are writing, write more */
2308         if(c->ssl) {
2309                 if(!ssl_http_write_more(c))
2310                         return 0;
2311         } else {
2312                 if(!http_write_more(fd, c))
2313                         return 0;
2314         }
2315
2316         /* we write a single buffer contents, that can contain
2317          * the http request, and then flip to read the results */
2318         /* see if write is done */
2319         if(sldns_buffer_remaining(c->buffer) == 0) {
2320                 sldns_buffer_clear(c->buffer);
2321                 if(c->tcp_do_toggle_rw)
2322                         c->tcp_is_reading = 1;
2323                 c->tcp_byte_count = 0;
2324                 /* switch from listening(write) to listening(read) */
2325                 comm_point_stop_listening(c);
2326                 comm_point_start_listening(c, -1, -1);
2327         }
2328         return 1;
2329 }
2330
2331 void 
2332 comm_point_http_handle_callback(int fd, short event, void* arg)
2333 {
2334         struct comm_point* c = (struct comm_point*)arg;
2335         log_assert(c->type == comm_http);
2336         ub_comm_base_now(c->ev->base);
2337
2338         if(event&UB_EV_READ) {
2339                 if(!comm_point_http_handle_read(fd, c)) {
2340                         reclaim_http_handler(c);
2341                         if(!c->tcp_do_close) {
2342                                 fptr_ok(fptr_whitelist_comm_point(
2343                                         c->callback));
2344                                 (void)(*c->callback)(c, c->cb_arg, 
2345                                         NETEVENT_CLOSED, NULL);
2346                         }
2347                 }
2348                 return;
2349         }
2350         if(event&UB_EV_WRITE) {
2351                 if(!comm_point_http_handle_write(fd, c)) {
2352                         reclaim_http_handler(c);
2353                         if(!c->tcp_do_close) {
2354                                 fptr_ok(fptr_whitelist_comm_point(
2355                                         c->callback));
2356                                 (void)(*c->callback)(c, c->cb_arg, 
2357                                         NETEVENT_CLOSED, NULL);
2358                         }
2359                 }
2360                 return;
2361         }
2362         if(event&UB_EV_TIMEOUT) {
2363                 verbose(VERB_QUERY, "http took too long, dropped");
2364                 reclaim_http_handler(c);
2365                 if(!c->tcp_do_close) {
2366                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2367                         (void)(*c->callback)(c, c->cb_arg,
2368                                 NETEVENT_TIMEOUT, NULL);
2369                 }
2370                 return;
2371         }
2372         log_err("Ignored event %d for httphdl.", event);
2373 }
2374
2375 void comm_point_local_handle_callback(int fd, short event, void* arg)
2376 {
2377         struct comm_point* c = (struct comm_point*)arg;
2378         log_assert(c->type == comm_local);
2379         ub_comm_base_now(c->ev->base);
2380
2381         if(event&UB_EV_READ) {
2382                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
2383                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2384                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
2385                                 NULL);
2386                 }
2387                 return;
2388         }
2389         log_err("Ignored event %d for localhdl.", event);
2390 }
2391
2392 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
2393         short event, void* arg)
2394 {
2395         struct comm_point* c = (struct comm_point*)arg;
2396         int err = NETEVENT_NOERROR;
2397         log_assert(c->type == comm_raw);
2398         ub_comm_base_now(c->ev->base);
2399         
2400         if(event&UB_EV_TIMEOUT)
2401                 err = NETEVENT_TIMEOUT;
2402         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
2403         (void)(*c->callback)(c, c->cb_arg, err, NULL);
2404 }
2405
2406 struct comm_point* 
2407 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
2408         comm_point_callback_type* callback, void* callback_arg)
2409 {
2410         struct comm_point* c = (struct comm_point*)calloc(1,
2411                 sizeof(struct comm_point));
2412         short evbits;
2413         if(!c)
2414                 return NULL;
2415         c->ev = (struct internal_event*)calloc(1,
2416                 sizeof(struct internal_event));
2417         if(!c->ev) {
2418                 free(c);
2419                 return NULL;
2420         }
2421         c->ev->base = base;
2422         c->fd = fd;
2423         c->buffer = buffer;
2424         c->timeout = NULL;
2425         c->tcp_is_reading = 0;
2426         c->tcp_byte_count = 0;
2427         c->tcp_parent = NULL;
2428         c->max_tcp_count = 0;
2429         c->cur_tcp_count = 0;
2430         c->tcp_handlers = NULL;
2431         c->tcp_free = NULL;
2432         c->type = comm_udp;
2433         c->tcp_do_close = 0;
2434         c->do_not_close = 0;
2435         c->tcp_do_toggle_rw = 0;
2436         c->tcp_check_nb_connect = 0;
2437 #ifdef USE_MSG_FASTOPEN
2438         c->tcp_do_fastopen = 0;
2439 #endif
2440 #ifdef USE_DNSCRYPT
2441         c->dnscrypt = 0;
2442         c->dnscrypt_buffer = buffer;
2443 #endif
2444         c->inuse = 0;
2445         c->callback = callback;
2446         c->cb_arg = callback_arg;
2447         evbits = UB_EV_READ | UB_EV_PERSIST;
2448         /* ub_event stuff */
2449         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2450                 comm_point_udp_callback, c);
2451         if(c->ev->ev == NULL) {
2452                 log_err("could not baseset udp event");
2453                 comm_point_delete(c);
2454                 return NULL;
2455         }
2456         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
2457                 log_err("could not add udp event");
2458                 comm_point_delete(c);
2459                 return NULL;
2460         }
2461         return c;
2462 }
2463
2464 struct comm_point* 
2465 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
2466         sldns_buffer* buffer, 
2467         comm_point_callback_type* callback, void* callback_arg)
2468 {
2469         struct comm_point* c = (struct comm_point*)calloc(1,
2470                 sizeof(struct comm_point));
2471         short evbits;
2472         if(!c)
2473                 return NULL;
2474         c->ev = (struct internal_event*)calloc(1,
2475                 sizeof(struct internal_event));
2476         if(!c->ev) {
2477                 free(c);
2478                 return NULL;
2479         }
2480         c->ev->base = base;
2481         c->fd = fd;
2482         c->buffer = buffer;
2483         c->timeout = NULL;
2484         c->tcp_is_reading = 0;
2485         c->tcp_byte_count = 0;
2486         c->tcp_parent = NULL;
2487         c->max_tcp_count = 0;
2488         c->cur_tcp_count = 0;
2489         c->tcp_handlers = NULL;
2490         c->tcp_free = NULL;
2491         c->type = comm_udp;
2492         c->tcp_do_close = 0;
2493         c->do_not_close = 0;
2494 #ifdef USE_DNSCRYPT
2495         c->dnscrypt = 0;
2496         c->dnscrypt_buffer = buffer;
2497 #endif
2498         c->inuse = 0;
2499         c->tcp_do_toggle_rw = 0;
2500         c->tcp_check_nb_connect = 0;
2501 #ifdef USE_MSG_FASTOPEN
2502         c->tcp_do_fastopen = 0;
2503 #endif
2504         c->callback = callback;
2505         c->cb_arg = callback_arg;
2506         evbits = UB_EV_READ | UB_EV_PERSIST;
2507         /* ub_event stuff */
2508         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2509                 comm_point_udp_ancil_callback, c);
2510         if(c->ev->ev == NULL) {
2511                 log_err("could not baseset udp event");
2512                 comm_point_delete(c);
2513                 return NULL;
2514         }
2515         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
2516                 log_err("could not add udp event");
2517                 comm_point_delete(c);
2518                 return NULL;
2519         }
2520         return c;
2521 }
2522
2523 static struct comm_point* 
2524 comm_point_create_tcp_handler(struct comm_base *base, 
2525         struct comm_point* parent, size_t bufsize,
2526         comm_point_callback_type* callback, void* callback_arg)
2527 {
2528         struct comm_point* c = (struct comm_point*)calloc(1,
2529                 sizeof(struct comm_point));
2530         short evbits;
2531         if(!c)
2532                 return NULL;
2533         c->ev = (struct internal_event*)calloc(1,
2534                 sizeof(struct internal_event));
2535         if(!c->ev) {
2536                 free(c);
2537                 return NULL;
2538         }
2539         c->ev->base = base;
2540         c->fd = -1;
2541         c->buffer = sldns_buffer_new(bufsize);
2542         if(!c->buffer) {
2543                 free(c->ev);
2544                 free(c);
2545                 return NULL;
2546         }
2547         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
2548         if(!c->timeout) {
2549                 sldns_buffer_free(c->buffer);
2550                 free(c->ev);
2551                 free(c);
2552                 return NULL;
2553         }
2554         c->tcp_is_reading = 0;
2555         c->tcp_byte_count = 0;
2556         c->tcp_parent = parent;
2557         c->tcp_timeout_msec = parent->tcp_timeout_msec;
2558         c->tcp_conn_limit = parent->tcp_conn_limit;
2559         c->tcl_addr = NULL;
2560         c->tcp_keepalive = 0;
2561         c->max_tcp_count = 0;
2562         c->cur_tcp_count = 0;
2563         c->tcp_handlers = NULL;
2564         c->tcp_free = NULL;
2565         c->type = comm_tcp;
2566         c->tcp_do_close = 0;
2567         c->do_not_close = 0;
2568         c->tcp_do_toggle_rw = 1;
2569         c->tcp_check_nb_connect = 0;
2570 #ifdef USE_MSG_FASTOPEN
2571         c->tcp_do_fastopen = 0;
2572 #endif
2573 #ifdef USE_DNSCRYPT
2574         c->dnscrypt = 0;
2575         /* We don't know just yet if this is a dnscrypt channel. Allocation
2576          * will be done when handling the callback. */
2577         c->dnscrypt_buffer = c->buffer;
2578 #endif
2579         c->repinfo.c = c;
2580         c->callback = callback;
2581         c->cb_arg = callback_arg;
2582         /* add to parent free list */
2583         c->tcp_free = parent->tcp_free;
2584         parent->tcp_free = c;
2585         /* ub_event stuff */
2586         evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
2587         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2588                 comm_point_tcp_handle_callback, c);
2589         if(c->ev->ev == NULL)
2590         {
2591                 log_err("could not basetset tcphdl event");
2592                 parent->tcp_free = c->tcp_free;
2593                 free(c->ev);
2594                 free(c);
2595                 return NULL;
2596         }
2597         return c;
2598 }
2599
2600 struct comm_point* 
2601 comm_point_create_tcp(struct comm_base *base, int fd, int num,
2602         int idle_timeout, struct tcl_list* tcp_conn_limit, size_t bufsize,
2603         comm_point_callback_type* callback, void* callback_arg)
2604 {
2605         struct comm_point* c = (struct comm_point*)calloc(1,
2606                 sizeof(struct comm_point));
2607         short evbits;
2608         int i;
2609         /* first allocate the TCP accept listener */
2610         if(!c)
2611                 return NULL;
2612         c->ev = (struct internal_event*)calloc(1,
2613                 sizeof(struct internal_event));
2614         if(!c->ev) {
2615                 free(c);
2616                 return NULL;
2617         }
2618         c->ev->base = base;
2619         c->fd = fd;
2620         c->buffer = NULL;
2621         c->timeout = NULL;
2622         c->tcp_is_reading = 0;
2623         c->tcp_byte_count = 0;
2624         c->tcp_timeout_msec = idle_timeout;
2625         c->tcp_conn_limit = tcp_conn_limit;
2626         c->tcl_addr = NULL;
2627         c->tcp_keepalive = 0;
2628         c->tcp_parent = NULL;
2629         c->max_tcp_count = num;
2630         c->cur_tcp_count = 0;
2631         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
2632                 sizeof(struct comm_point*));
2633         if(!c->tcp_handlers) {
2634                 free(c->ev);
2635                 free(c);
2636                 return NULL;
2637         }
2638         c->tcp_free = NULL;
2639         c->type = comm_tcp_accept;
2640         c->tcp_do_close = 0;
2641         c->do_not_close = 0;
2642         c->tcp_do_toggle_rw = 0;
2643         c->tcp_check_nb_connect = 0;
2644 #ifdef USE_MSG_FASTOPEN
2645         c->tcp_do_fastopen = 0;
2646 #endif
2647 #ifdef USE_DNSCRYPT
2648         c->dnscrypt = 0;
2649         c->dnscrypt_buffer = NULL;
2650 #endif
2651         c->callback = NULL;
2652         c->cb_arg = NULL;
2653         evbits = UB_EV_READ | UB_EV_PERSIST;
2654         /* ub_event stuff */
2655         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2656                 comm_point_tcp_accept_callback, c);
2657         if(c->ev->ev == NULL) {
2658                 log_err("could not baseset tcpacc event");
2659                 comm_point_delete(c);
2660                 return NULL;
2661         }
2662         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2663                 log_err("could not add tcpacc event");
2664                 comm_point_delete(c);
2665                 return NULL;
2666         }
2667         /* now prealloc the tcp handlers */
2668         for(i=0; i<num; i++) {
2669                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
2670                         c, bufsize, callback, callback_arg);
2671                 if(!c->tcp_handlers[i]) {
2672                         comm_point_delete(c);
2673                         return NULL;
2674                 }
2675         }
2676         
2677         return c;
2678 }
2679
2680 struct comm_point* 
2681 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
2682         comm_point_callback_type* callback, void* callback_arg)
2683 {
2684         struct comm_point* c = (struct comm_point*)calloc(1,
2685                 sizeof(struct comm_point));
2686         short evbits;
2687         if(!c)
2688                 return NULL;
2689         c->ev = (struct internal_event*)calloc(1,
2690                 sizeof(struct internal_event));
2691         if(!c->ev) {
2692                 free(c);
2693                 return NULL;
2694         }
2695         c->ev->base = base;
2696         c->fd = -1;
2697         c->buffer = sldns_buffer_new(bufsize);
2698         if(!c->buffer) {
2699                 free(c->ev);
2700                 free(c);
2701                 return NULL;
2702         }
2703         c->timeout = NULL;
2704         c->tcp_is_reading = 0;
2705         c->tcp_byte_count = 0;
2706         c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
2707         c->tcp_conn_limit = NULL;
2708         c->tcl_addr = NULL;
2709         c->tcp_keepalive = 0;
2710         c->tcp_parent = NULL;
2711         c->max_tcp_count = 0;
2712         c->cur_tcp_count = 0;
2713         c->tcp_handlers = NULL;
2714         c->tcp_free = NULL;
2715         c->type = comm_tcp;
2716         c->tcp_do_close = 0;
2717         c->do_not_close = 0;
2718         c->tcp_do_toggle_rw = 1;
2719         c->tcp_check_nb_connect = 1;
2720 #ifdef USE_MSG_FASTOPEN
2721         c->tcp_do_fastopen = 1;
2722 #endif
2723 #ifdef USE_DNSCRYPT
2724         c->dnscrypt = 0;
2725         c->dnscrypt_buffer = c->buffer;
2726 #endif
2727         c->repinfo.c = c;
2728         c->callback = callback;
2729         c->cb_arg = callback_arg;
2730         evbits = UB_EV_PERSIST | UB_EV_WRITE;
2731         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2732                 comm_point_tcp_handle_callback, c);
2733         if(c->ev->ev == NULL)
2734         {
2735                 log_err("could not baseset tcpout event");
2736                 sldns_buffer_free(c->buffer);
2737                 free(c->ev);
2738                 free(c);
2739                 return NULL;
2740         }
2741
2742         return c;
2743 }
2744
2745 struct comm_point* 
2746 comm_point_create_http_out(struct comm_base *base, size_t bufsize,
2747         comm_point_callback_type* callback, void* callback_arg,
2748         sldns_buffer* temp)
2749 {
2750         struct comm_point* c = (struct comm_point*)calloc(1,
2751                 sizeof(struct comm_point));
2752         short evbits;
2753         if(!c)
2754                 return NULL;
2755         c->ev = (struct internal_event*)calloc(1,
2756                 sizeof(struct internal_event));
2757         if(!c->ev) {
2758                 free(c);
2759                 return NULL;
2760         }
2761         c->ev->base = base;
2762         c->fd = -1;
2763         c->buffer = sldns_buffer_new(bufsize);
2764         if(!c->buffer) {
2765                 free(c->ev);
2766                 free(c);
2767                 return NULL;
2768         }
2769         c->timeout = NULL;
2770         c->tcp_is_reading = 0;
2771         c->tcp_byte_count = 0;
2772         c->tcp_parent = NULL;
2773         c->max_tcp_count = 0;
2774         c->cur_tcp_count = 0;
2775         c->tcp_handlers = NULL;
2776         c->tcp_free = NULL;
2777         c->type = comm_http;
2778         c->tcp_do_close = 0;
2779         c->do_not_close = 0;
2780         c->tcp_do_toggle_rw = 1;
2781         c->tcp_check_nb_connect = 1;
2782         c->http_in_headers = 1;
2783         c->http_in_chunk_headers = 0;
2784         c->http_is_chunked = 0;
2785         c->http_temp = temp;
2786 #ifdef USE_MSG_FASTOPEN
2787         c->tcp_do_fastopen = 1;
2788 #endif
2789 #ifdef USE_DNSCRYPT
2790         c->dnscrypt = 0;
2791         c->dnscrypt_buffer = c->buffer;
2792 #endif
2793         c->repinfo.c = c;
2794         c->callback = callback;
2795         c->cb_arg = callback_arg;
2796         evbits = UB_EV_PERSIST | UB_EV_WRITE;
2797         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2798                 comm_point_http_handle_callback, c);
2799         if(c->ev->ev == NULL)
2800         {
2801                 log_err("could not baseset tcpout event");
2802 #ifdef HAVE_SSL
2803                 SSL_free(c->ssl);
2804 #endif
2805                 sldns_buffer_free(c->buffer);
2806                 free(c->ev);
2807                 free(c);
2808                 return NULL;
2809         }
2810
2811         return c;
2812 }
2813
2814 struct comm_point* 
2815 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
2816         comm_point_callback_type* callback, void* callback_arg)
2817 {
2818         struct comm_point* c = (struct comm_point*)calloc(1,
2819                 sizeof(struct comm_point));
2820         short evbits;
2821         if(!c)
2822                 return NULL;
2823         c->ev = (struct internal_event*)calloc(1,
2824                 sizeof(struct internal_event));
2825         if(!c->ev) {
2826                 free(c);
2827                 return NULL;
2828         }
2829         c->ev->base = base;
2830         c->fd = fd;
2831         c->buffer = sldns_buffer_new(bufsize);
2832         if(!c->buffer) {
2833                 free(c->ev);
2834                 free(c);
2835                 return NULL;
2836         }
2837         c->timeout = NULL;
2838         c->tcp_is_reading = 1;
2839         c->tcp_byte_count = 0;
2840         c->tcp_parent = NULL;
2841         c->max_tcp_count = 0;
2842         c->cur_tcp_count = 0;
2843         c->tcp_handlers = NULL;
2844         c->tcp_free = NULL;
2845         c->type = comm_local;
2846         c->tcp_do_close = 0;
2847         c->do_not_close = 1;
2848         c->tcp_do_toggle_rw = 0;
2849         c->tcp_check_nb_connect = 0;
2850 #ifdef USE_MSG_FASTOPEN
2851         c->tcp_do_fastopen = 0;
2852 #endif
2853 #ifdef USE_DNSCRYPT
2854         c->dnscrypt = 0;
2855         c->dnscrypt_buffer = c->buffer;
2856 #endif
2857         c->callback = callback;
2858         c->cb_arg = callback_arg;
2859         /* ub_event stuff */
2860         evbits = UB_EV_PERSIST | UB_EV_READ;
2861         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2862                 comm_point_local_handle_callback, c);
2863         if(c->ev->ev == NULL) {
2864                 log_err("could not baseset localhdl event");
2865                 free(c->ev);
2866                 free(c);
2867                 return NULL;
2868         }
2869         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2870                 log_err("could not add localhdl event");
2871                 ub_event_free(c->ev->ev);
2872                 free(c->ev);
2873                 free(c);
2874                 return NULL;
2875         }
2876         return c;
2877 }
2878
2879 struct comm_point* 
2880 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
2881         comm_point_callback_type* callback, void* callback_arg)
2882 {
2883         struct comm_point* c = (struct comm_point*)calloc(1,
2884                 sizeof(struct comm_point));
2885         short evbits;
2886         if(!c)
2887                 return NULL;
2888         c->ev = (struct internal_event*)calloc(1,
2889                 sizeof(struct internal_event));
2890         if(!c->ev) {
2891                 free(c);
2892                 return NULL;
2893         }
2894         c->ev->base = base;
2895         c->fd = fd;
2896         c->buffer = NULL;
2897         c->timeout = NULL;
2898         c->tcp_is_reading = 0;
2899         c->tcp_byte_count = 0;
2900         c->tcp_parent = NULL;
2901         c->max_tcp_count = 0;
2902         c->cur_tcp_count = 0;
2903         c->tcp_handlers = NULL;
2904         c->tcp_free = NULL;
2905         c->type = comm_raw;
2906         c->tcp_do_close = 0;
2907         c->do_not_close = 1;
2908         c->tcp_do_toggle_rw = 0;
2909         c->tcp_check_nb_connect = 0;
2910 #ifdef USE_MSG_FASTOPEN
2911         c->tcp_do_fastopen = 0;
2912 #endif
2913 #ifdef USE_DNSCRYPT
2914         c->dnscrypt = 0;
2915         c->dnscrypt_buffer = c->buffer;
2916 #endif
2917         c->callback = callback;
2918         c->cb_arg = callback_arg;
2919         /* ub_event stuff */
2920         if(writing)
2921                 evbits = UB_EV_PERSIST | UB_EV_WRITE;
2922         else    evbits = UB_EV_PERSIST | UB_EV_READ;
2923         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2924                 comm_point_raw_handle_callback, c);
2925         if(c->ev->ev == NULL) {
2926                 log_err("could not baseset rawhdl event");
2927                 free(c->ev);
2928                 free(c);
2929                 return NULL;
2930         }
2931         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2932                 log_err("could not add rawhdl event");
2933                 ub_event_free(c->ev->ev);
2934                 free(c->ev);
2935                 free(c);
2936                 return NULL;
2937         }
2938         return c;
2939 }
2940
2941 void 
2942 comm_point_close(struct comm_point* c)
2943 {
2944         if(!c)
2945                 return;
2946         if(c->fd != -1) {
2947                 if(ub_event_del(c->ev->ev) != 0) {
2948                         log_err("could not event_del on close");
2949                 }
2950         }
2951         tcl_close_connection(c->tcl_addr);
2952         /* close fd after removing from event lists, or epoll.. is messed up */
2953         if(c->fd != -1 && !c->do_not_close) {
2954                 if(c->type == comm_tcp || c->type == comm_http) {
2955                         /* delete sticky events for the fd, it gets closed */
2956                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
2957                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2958                 }
2959                 verbose(VERB_ALGO, "close fd %d", c->fd);
2960 #ifndef USE_WINSOCK
2961                 close(c->fd);
2962 #else
2963                 closesocket(c->fd);
2964 #endif
2965         }
2966         c->fd = -1;
2967 }
2968
2969 void 
2970 comm_point_delete(struct comm_point* c)
2971 {
2972         if(!c) 
2973                 return;
2974         if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
2975 #ifdef HAVE_SSL
2976                 SSL_shutdown(c->ssl);
2977                 SSL_free(c->ssl);
2978 #endif
2979         }
2980         comm_point_close(c);
2981         if(c->tcp_handlers) {
2982                 int i;
2983                 for(i=0; i<c->max_tcp_count; i++)
2984                         comm_point_delete(c->tcp_handlers[i]);
2985                 free(c->tcp_handlers);
2986         }
2987         free(c->timeout);
2988         if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
2989                 sldns_buffer_free(c->buffer);
2990 #ifdef USE_DNSCRYPT
2991                 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
2992                         sldns_buffer_free(c->dnscrypt_buffer);
2993                 }
2994 #endif
2995         }
2996         ub_event_free(c->ev->ev);
2997         free(c->ev);
2998         free(c);
2999 }
3000
3001 void 
3002 comm_point_send_reply(struct comm_reply *repinfo)
3003 {
3004         struct sldns_buffer* buffer;
3005         log_assert(repinfo && repinfo->c);
3006 #ifdef USE_DNSCRYPT
3007         buffer = repinfo->c->dnscrypt_buffer;
3008         if(!dnsc_handle_uncurved_request(repinfo)) {
3009                 return;
3010         }
3011 #else
3012         buffer = repinfo->c->buffer;
3013 #endif
3014         if(repinfo->c->type == comm_udp) {
3015                 if(repinfo->srctype)
3016                         comm_point_send_udp_msg_if(repinfo->c, 
3017                         buffer, (struct sockaddr*)&repinfo->addr, 
3018                         repinfo->addrlen, repinfo);
3019                 else
3020                         comm_point_send_udp_msg(repinfo->c, buffer,
3021                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
3022 #ifdef USE_DNSTAP
3023                 if(repinfo->c->dtenv != NULL &&
3024                    repinfo->c->dtenv->log_client_response_messages)
3025                         dt_msg_send_client_response(repinfo->c->dtenv,
3026                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
3027 #endif
3028         } else {
3029 #ifdef USE_DNSTAP
3030                 if(repinfo->c->tcp_parent->dtenv != NULL &&
3031                    repinfo->c->tcp_parent->dtenv->log_client_response_messages)
3032                         dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
3033                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
3034 #endif
3035                 comm_point_start_listening(repinfo->c, -1,
3036                         repinfo->c->tcp_timeout_msec);
3037         }
3038 }
3039
3040 void 
3041 comm_point_drop_reply(struct comm_reply* repinfo)
3042 {
3043         if(!repinfo)
3044                 return;
3045         log_assert(repinfo && repinfo->c);
3046         log_assert(repinfo->c->type != comm_tcp_accept);
3047         if(repinfo->c->type == comm_udp)
3048                 return;
3049         reclaim_tcp_handler(repinfo->c);
3050 }
3051
3052 void 
3053 comm_point_stop_listening(struct comm_point* c)
3054 {
3055         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
3056         if(ub_event_del(c->ev->ev) != 0) {
3057                 log_err("event_del error to stoplisten");
3058         }
3059 }
3060
3061 void 
3062 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
3063 {
3064         verbose(VERB_ALGO, "comm point start listening %d", 
3065                 c->fd==-1?newfd:c->fd);
3066         if(c->type == comm_tcp_accept && !c->tcp_free) {
3067                 /* no use to start listening no free slots. */
3068                 return;
3069         }
3070         if(msec != -1 && msec != 0) {
3071                 if(!c->timeout) {
3072                         c->timeout = (struct timeval*)malloc(sizeof(
3073                                 struct timeval));
3074                         if(!c->timeout) {
3075                                 log_err("cpsl: malloc failed. No net read.");
3076                                 return;
3077                         }
3078                 }
3079                 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
3080 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
3081                 c->timeout->tv_sec = msec/1000;
3082                 c->timeout->tv_usec = (msec%1000)*1000;
3083 #endif /* S_SPLINT_S */
3084         }
3085         if(c->type == comm_tcp || c->type == comm_http) {
3086                 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
3087                 if(c->tcp_is_reading)
3088                         ub_event_add_bits(c->ev->ev, UB_EV_READ);
3089                 else    ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
3090         }
3091         if(newfd != -1) {
3092                 if(c->fd != -1) {
3093 #ifndef USE_WINSOCK
3094                         close(c->fd);
3095 #else
3096                         closesocket(c->fd);
3097 #endif
3098                 }
3099                 c->fd = newfd;
3100                 ub_event_set_fd(c->ev->ev, c->fd);
3101         }
3102         if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
3103                 log_err("event_add failed. in cpsl.");
3104         }
3105 }
3106
3107 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
3108 {
3109         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
3110         if(ub_event_del(c->ev->ev) != 0) {
3111                 log_err("event_del error to cplf");
3112         }
3113         ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
3114         if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
3115         if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
3116         if(ub_event_add(c->ev->ev, c->timeout) != 0) {
3117                 log_err("event_add failed. in cplf.");
3118         }
3119 }
3120
3121 size_t comm_point_get_mem(struct comm_point* c)
3122 {
3123         size_t s;
3124         if(!c) 
3125                 return 0;
3126         s = sizeof(*c) + sizeof(*c->ev);
3127         if(c->timeout) 
3128                 s += sizeof(*c->timeout);
3129         if(c->type == comm_tcp || c->type == comm_local) {
3130                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
3131 #ifdef USE_DNSCRYPT
3132                 s += sizeof(*c->dnscrypt_buffer);
3133                 if(c->buffer != c->dnscrypt_buffer) {
3134                         s += sldns_buffer_capacity(c->dnscrypt_buffer);
3135                 }
3136 #endif
3137         }
3138         if(c->type == comm_tcp_accept) {
3139                 int i;
3140                 for(i=0; i<c->max_tcp_count; i++)
3141                         s += comm_point_get_mem(c->tcp_handlers[i]);
3142         }
3143         return s;
3144 }
3145
3146 struct comm_timer* 
3147 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
3148 {
3149         struct internal_timer *tm = (struct internal_timer*)calloc(1,
3150                 sizeof(struct internal_timer));
3151         if(!tm) {
3152                 log_err("malloc failed");
3153                 return NULL;
3154         }
3155         tm->super.ev_timer = tm;
3156         tm->base = base;
3157         tm->super.callback = cb;
3158         tm->super.cb_arg = cb_arg;
3159         tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 
3160                 comm_timer_callback, &tm->super);
3161         if(tm->ev == NULL) {
3162                 log_err("timer_create: event_base_set failed.");
3163                 free(tm);
3164                 return NULL;
3165         }
3166         return &tm->super;
3167 }
3168
3169 void 
3170 comm_timer_disable(struct comm_timer* timer)
3171 {
3172         if(!timer)
3173                 return;
3174         ub_timer_del(timer->ev_timer->ev);
3175         timer->ev_timer->enabled = 0;
3176 }
3177
3178 void 
3179 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
3180 {
3181         log_assert(tv);
3182         if(timer->ev_timer->enabled)
3183                 comm_timer_disable(timer);
3184         if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
3185                 comm_timer_callback, timer, tv) != 0)
3186                 log_err("comm_timer_set: evtimer_add failed.");
3187         timer->ev_timer->enabled = 1;
3188 }
3189
3190 void 
3191 comm_timer_delete(struct comm_timer* timer)
3192 {
3193         if(!timer)
3194                 return;
3195         comm_timer_disable(timer);
3196         /* Free the sub struct timer->ev_timer derived from the super struct timer.
3197          * i.e. assert(timer == timer->ev_timer)
3198          */
3199         ub_event_free(timer->ev_timer->ev);
3200         free(timer->ev_timer);
3201 }
3202
3203 void 
3204 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
3205 {
3206         struct comm_timer* tm = (struct comm_timer*)arg;
3207         if(!(event&UB_EV_TIMEOUT))
3208                 return;
3209         ub_comm_base_now(tm->ev_timer->base);
3210         tm->ev_timer->enabled = 0;
3211         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
3212         (*tm->callback)(tm->cb_arg);
3213 }
3214
3215 int 
3216 comm_timer_is_set(struct comm_timer* timer)
3217 {
3218         return (int)timer->ev_timer->enabled;
3219 }
3220
3221 size_t 
3222 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer))
3223 {
3224         return sizeof(struct internal_timer);
3225 }
3226
3227 struct comm_signal* 
3228 comm_signal_create(struct comm_base* base,
3229         void (*callback)(int, void*), void* cb_arg)
3230 {
3231         struct comm_signal* com = (struct comm_signal*)malloc(
3232                 sizeof(struct comm_signal));
3233         if(!com) {
3234                 log_err("malloc failed");
3235                 return NULL;
3236         }
3237         com->base = base;
3238         com->callback = callback;
3239         com->cb_arg = cb_arg;
3240         com->ev_signal = NULL;
3241         return com;
3242 }
3243
3244 void 
3245 comm_signal_callback(int sig, short event, void* arg)
3246 {
3247         struct comm_signal* comsig = (struct comm_signal*)arg;
3248         if(!(event & UB_EV_SIGNAL))
3249                 return;
3250         ub_comm_base_now(comsig->base);
3251         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
3252         (*comsig->callback)(sig, comsig->cb_arg);
3253 }
3254
3255 int 
3256 comm_signal_bind(struct comm_signal* comsig, int sig)
3257 {
3258         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
3259                 sizeof(struct internal_signal));
3260         if(!entry) {
3261                 log_err("malloc failed");
3262                 return 0;
3263         }
3264         log_assert(comsig);
3265         /* add signal event */
3266         entry->ev = ub_signal_new(comsig->base->eb->base, sig,
3267                 comm_signal_callback, comsig);
3268         if(entry->ev == NULL) {
3269                 log_err("Could not create signal event");
3270                 free(entry);
3271                 return 0;
3272         }
3273         if(ub_signal_add(entry->ev, NULL) != 0) {
3274                 log_err("Could not add signal handler");
3275                 ub_event_free(entry->ev);
3276                 free(entry);
3277                 return 0;
3278         }
3279         /* link into list */
3280         entry->next = comsig->ev_signal;
3281         comsig->ev_signal = entry;
3282         return 1;
3283 }
3284
3285 void 
3286 comm_signal_delete(struct comm_signal* comsig)
3287 {
3288         struct internal_signal* p, *np;
3289         if(!comsig)
3290                 return;
3291         p=comsig->ev_signal;
3292         while(p) {
3293                 np = p->next;
3294                 ub_signal_del(p->ev);
3295                 ub_event_free(p->ev);
3296                 free(p);
3297                 p = np;
3298         }
3299         free(comsig);
3300 }