]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/util/netevent.c
MFV 364468:
[FreeBSD/FreeBSD.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/ub_event.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/tcp_conn_limit.h"
47 #include "util/fptr_wlist.h"
48 #include "sldns/pkthdr.h"
49 #include "sldns/sbuffer.h"
50 #include "sldns/str2wire.h"
51 #include "dnstap/dnstap.h"
52 #include "dnscrypt/dnscrypt.h"
53 #include "services/listen_dnsport.h"
54 #ifdef HAVE_OPENSSL_SSL_H
55 #include <openssl/ssl.h>
56 #endif
57 #ifdef HAVE_OPENSSL_ERR_H
58 #include <openssl/err.h>
59 #endif
60
61 /* -------- Start of local definitions -------- */
62 /** if CMSG_ALIGN is not defined on this platform, a workaround */
63 #ifndef CMSG_ALIGN
64 #  ifdef __CMSG_ALIGN
65 #    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
66 #  elif defined(CMSG_DATA_ALIGN)
67 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
68 #  else
69 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
70 #  endif
71 #endif
72
73 /** if CMSG_LEN is not defined on this platform, a workaround */
74 #ifndef CMSG_LEN
75 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
76 #endif
77
78 /** if CMSG_SPACE is not defined on this platform, a workaround */
79 #ifndef CMSG_SPACE
80 #  ifdef _CMSG_HDR_ALIGN
81 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
82 #  else
83 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
84 #  endif
85 #endif
86
87 /** The TCP writing query timeout in milliseconds */
88 #define TCP_QUERY_TIMEOUT 120000
89 /** The minimum actual TCP timeout to use, regardless of what we advertise,
90  * in msec */
91 #define TCP_QUERY_TIMEOUT_MINIMUM 200
92
93 #ifndef NONBLOCKING_IS_BROKEN
94 /** number of UDP reads to perform per read indication from select */
95 #define NUM_UDP_PER_SELECT 100
96 #else
97 #define NUM_UDP_PER_SELECT 1
98 #endif
99
100 /**
101  * The internal event structure for keeping ub_event info for the event.
102  * Possibly other structures (list, tree) this is part of.
103  */
104 struct internal_event {
105         /** the comm base */
106         struct comm_base* base;
107         /** ub_event event type */
108         struct ub_event* ev;
109 };
110
111 /**
112  * Internal base structure, so that every thread has its own events.
113  */
114 struct internal_base {
115         /** ub_event event_base type. */
116         struct ub_event_base* base;
117         /** seconds time pointer points here */
118         time_t secs;
119         /** timeval with current time */
120         struct timeval now;
121         /** the event used for slow_accept timeouts */
122         struct ub_event* slow_accept;
123         /** true if slow_accept is enabled */
124         int slow_accept_enabled;
125 };
126
127 /**
128  * Internal timer structure, to store timer event in.
129  */
130 struct internal_timer {
131         /** the super struct from which derived */
132         struct comm_timer super;
133         /** the comm base */
134         struct comm_base* base;
135         /** ub_event event type */
136         struct ub_event* ev;
137         /** is timer enabled */
138         uint8_t enabled;
139 };
140
141 /**
142  * Internal signal structure, to store signal event in.
143  */
144 struct internal_signal {
145         /** ub_event event type */
146         struct ub_event* ev;
147         /** next in signal list */
148         struct internal_signal* next;
149 };
150
151 /** create a tcp handler with a parent */
152 static struct comm_point* comm_point_create_tcp_handler(
153         struct comm_base *base, struct comm_point* parent, size_t bufsize,
154         struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
155         void* callback_arg);
156
157 /* -------- End of local definitions -------- */
158
159 struct comm_base* 
160 comm_base_create(int sigs)
161 {
162         struct comm_base* b = (struct comm_base*)calloc(1,
163                 sizeof(struct comm_base));
164         const char *evnm="event", *evsys="", *evmethod="";
165
166         if(!b)
167                 return NULL;
168         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
169         if(!b->eb) {
170                 free(b);
171                 return NULL;
172         }
173         b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
174         if(!b->eb->base) {
175                 free(b->eb);
176                 free(b);
177                 return NULL;
178         }
179         ub_comm_base_now(b);
180         ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
181         verbose(VERB_ALGO, "%s %s uses %s method.", evnm, evsys, evmethod);
182         return b;
183 }
184
185 struct comm_base*
186 comm_base_create_event(struct ub_event_base* base)
187 {
188         struct comm_base* b = (struct comm_base*)calloc(1,
189                 sizeof(struct comm_base));
190         if(!b)
191                 return NULL;
192         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
193         if(!b->eb) {
194                 free(b);
195                 return NULL;
196         }
197         b->eb->base = base;
198         ub_comm_base_now(b);
199         return b;
200 }
201
202 void 
203 comm_base_delete(struct comm_base* b)
204 {
205         if(!b)
206                 return;
207         if(b->eb->slow_accept_enabled) {
208                 if(ub_event_del(b->eb->slow_accept) != 0) {
209                         log_err("could not event_del slow_accept");
210                 }
211                 ub_event_free(b->eb->slow_accept);
212         }
213         ub_event_base_free(b->eb->base);
214         b->eb->base = NULL;
215         free(b->eb);
216         free(b);
217 }
218
219 void 
220 comm_base_delete_no_base(struct comm_base* b)
221 {
222         if(!b)
223                 return;
224         if(b->eb->slow_accept_enabled) {
225                 if(ub_event_del(b->eb->slow_accept) != 0) {
226                         log_err("could not event_del slow_accept");
227                 }
228                 ub_event_free(b->eb->slow_accept);
229         }
230         b->eb->base = NULL;
231         free(b->eb);
232         free(b);
233 }
234
235 void 
236 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
237 {
238         *tt = &b->eb->secs;
239         *tv = &b->eb->now;
240 }
241
242 void 
243 comm_base_dispatch(struct comm_base* b)
244 {
245         int retval;
246         retval = ub_event_base_dispatch(b->eb->base);
247         if(retval < 0) {
248                 fatal_exit("event_dispatch returned error %d, "
249                         "errno is %s", retval, strerror(errno));
250         }
251 }
252
253 void comm_base_exit(struct comm_base* b)
254 {
255         if(ub_event_base_loopexit(b->eb->base) != 0) {
256                 log_err("Could not loopexit");
257         }
258 }
259
260 void comm_base_set_slow_accept_handlers(struct comm_base* b,
261         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
262 {
263         b->stop_accept = stop_acc;
264         b->start_accept = start_acc;
265         b->cb_arg = arg;
266 }
267
268 struct ub_event_base* comm_base_internal(struct comm_base* b)
269 {
270         return b->eb->base;
271 }
272
273 /** see if errno for udp has to be logged or not uses globals */
274 static int
275 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
276 {
277         /* do not log transient errors (unless high verbosity) */
278 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
279         switch(errno) {
280 #  ifdef ENETUNREACH
281                 case ENETUNREACH:
282 #  endif
283 #  ifdef EHOSTDOWN
284                 case EHOSTDOWN:
285 #  endif
286 #  ifdef EHOSTUNREACH
287                 case EHOSTUNREACH:
288 #  endif
289 #  ifdef ENETDOWN
290                 case ENETDOWN:
291 #  endif
292                         if(verbosity < VERB_ALGO)
293                                 return 0;
294                 default:
295                         break;
296         }
297 #endif
298         /* permission denied is gotten for every send if the
299          * network is disconnected (on some OS), squelch it */
300         if( ((errno == EPERM)
301 #  ifdef EADDRNOTAVAIL
302                 /* 'Cannot assign requested address' also when disconnected */
303                 || (errno == EADDRNOTAVAIL)
304 #  endif
305                 ) && verbosity < VERB_DETAIL)
306                 return 0;
307 #  ifdef EADDRINUSE
308         /* If SO_REUSEADDR is set, we could try to connect to the same server
309          * from the same source port twice. */
310         if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
311                 return 0;
312 #  endif
313         /* squelch errors where people deploy AAAA ::ffff:bla for
314          * authority servers, which we try for intranets. */
315         if(errno == EINVAL && addr_is_ip4mapped(
316                 (struct sockaddr_storage*)addr, addrlen) &&
317                 verbosity < VERB_DETAIL)
318                 return 0;
319         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
320          * but a dns cache does not need it. */
321         if(errno == EACCES && addr_is_broadcast(
322                 (struct sockaddr_storage*)addr, addrlen) &&
323                 verbosity < VERB_DETAIL)
324                 return 0;
325         return 1;
326 }
327
328 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
329 {
330         return udp_send_errno_needs_log(addr, addrlen);
331 }
332
333 /* send a UDP reply */
334 int
335 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
336         struct sockaddr* addr, socklen_t addrlen) 
337 {
338         ssize_t sent;
339         log_assert(c->fd != -1);
340 #ifdef UNBOUND_DEBUG
341         if(sldns_buffer_remaining(packet) == 0)
342                 log_err("error: send empty UDP packet");
343 #endif
344         log_assert(addr && addrlen > 0);
345         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
346                 sldns_buffer_remaining(packet), 0,
347                 addr, addrlen);
348         if(sent == -1) {
349                 /* try again and block, waiting for IO to complete,
350                  * we want to send the answer, and we will wait for
351                  * the ethernet interface buffer to have space. */
352 #ifndef USE_WINSOCK
353                 if(errno == EAGAIN || 
354 #  ifdef EWOULDBLOCK
355                         errno == EWOULDBLOCK ||
356 #  endif
357                         errno == ENOBUFS) {
358 #else
359                 if(WSAGetLastError() == WSAEINPROGRESS ||
360                         WSAGetLastError() == WSAENOBUFS ||
361                         WSAGetLastError() == WSAEWOULDBLOCK) {
362 #endif
363                         int e;
364                         fd_set_block(c->fd);
365                         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
366                                 sldns_buffer_remaining(packet), 0,
367                                 addr, addrlen);
368                         e = errno;
369                         fd_set_nonblock(c->fd);
370                         errno = e;
371                 }
372         }
373         if(sent == -1) {
374                 if(!udp_send_errno_needs_log(addr, addrlen))
375                         return 0;
376 #ifndef USE_WINSOCK
377                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
378 #else
379                 verbose(VERB_OPS, "sendto failed: %s", 
380                         wsa_strerror(WSAGetLastError()));
381 #endif
382                 log_addr(VERB_OPS, "remote address is", 
383                         (struct sockaddr_storage*)addr, addrlen);
384                 return 0;
385         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
386                 log_err("sent %d in place of %d bytes", 
387                         (int)sent, (int)sldns_buffer_remaining(packet));
388                 return 0;
389         }
390         return 1;
391 }
392
393 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
394 /** print debug ancillary info */
395 static void p_ancil(const char* str, struct comm_reply* r)
396 {
397         if(r->srctype != 4 && r->srctype != 6) {
398                 log_info("%s: unknown srctype %d", str, r->srctype);
399                 return;
400         }
401         if(r->srctype == 6) {
402                 char buf[1024];
403                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
404                         buf, (socklen_t)sizeof(buf)) == 0) {
405                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
406                 }
407                 buf[sizeof(buf)-1]=0;
408                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
409         } else if(r->srctype == 4) {
410 #ifdef IP_PKTINFO
411                 char buf1[1024], buf2[1024];
412                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
413                         buf1, (socklen_t)sizeof(buf1)) == 0) {
414                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
415                 }
416                 buf1[sizeof(buf1)-1]=0;
417 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
418                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
419                         buf2, (socklen_t)sizeof(buf2)) == 0) {
420                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
421                 }
422                 buf2[sizeof(buf2)-1]=0;
423 #else
424                 buf2[0]=0;
425 #endif
426                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
427                         buf1, buf2);
428 #elif defined(IP_RECVDSTADDR)
429                 char buf1[1024];
430                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
431                         buf1, (socklen_t)sizeof(buf1)) == 0) {
432                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
433                 }
434                 buf1[sizeof(buf1)-1]=0;
435                 log_info("%s: %s", str, buf1);
436 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
437         }
438 }
439 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
440
441 /** send a UDP reply over specified interface*/
442 static int
443 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
444         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
445 {
446 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
447         ssize_t sent;
448         struct msghdr msg;
449         struct iovec iov[1];
450         union {
451                 struct cmsghdr hdr;
452                 char buf[256];
453         } control;
454 #ifndef S_SPLINT_S
455         struct cmsghdr *cmsg;
456 #endif /* S_SPLINT_S */
457
458         log_assert(c->fd != -1);
459 #ifdef UNBOUND_DEBUG
460         if(sldns_buffer_remaining(packet) == 0)
461                 log_err("error: send empty UDP packet");
462 #endif
463         log_assert(addr && addrlen > 0);
464
465         msg.msg_name = addr;
466         msg.msg_namelen = addrlen;
467         iov[0].iov_base = sldns_buffer_begin(packet);
468         iov[0].iov_len = sldns_buffer_remaining(packet);
469         msg.msg_iov = iov;
470         msg.msg_iovlen = 1;
471         msg.msg_control = control.buf;
472 #ifndef S_SPLINT_S
473         msg.msg_controllen = sizeof(control.buf);
474 #endif /* S_SPLINT_S */
475         msg.msg_flags = 0;
476
477 #ifndef S_SPLINT_S
478         cmsg = CMSG_FIRSTHDR(&msg);
479         if(r->srctype == 4) {
480 #ifdef IP_PKTINFO
481                 void* cmsg_data;
482                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
483                 log_assert(msg.msg_controllen <= sizeof(control.buf));
484                 cmsg->cmsg_level = IPPROTO_IP;
485                 cmsg->cmsg_type = IP_PKTINFO;
486                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
487                         sizeof(struct in_pktinfo));
488                 /* unset the ifindex to not bypass the routing tables */
489                 cmsg_data = CMSG_DATA(cmsg);
490                 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
491                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
492 #elif defined(IP_SENDSRCADDR)
493                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
494                 log_assert(msg.msg_controllen <= sizeof(control.buf));
495                 cmsg->cmsg_level = IPPROTO_IP;
496                 cmsg->cmsg_type = IP_SENDSRCADDR;
497                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
498                         sizeof(struct in_addr));
499                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
500 #else
501                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
502                 msg.msg_control = NULL;
503 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
504         } else if(r->srctype == 6) {
505                 void* cmsg_data;
506                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
507                 log_assert(msg.msg_controllen <= sizeof(control.buf));
508                 cmsg->cmsg_level = IPPROTO_IPV6;
509                 cmsg->cmsg_type = IPV6_PKTINFO;
510                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
511                         sizeof(struct in6_pktinfo));
512                 /* unset the ifindex to not bypass the routing tables */
513                 cmsg_data = CMSG_DATA(cmsg);
514                 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
515                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
516         } else {
517                 /* try to pass all 0 to use default route */
518                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
519                 log_assert(msg.msg_controllen <= sizeof(control.buf));
520                 cmsg->cmsg_level = IPPROTO_IPV6;
521                 cmsg->cmsg_type = IPV6_PKTINFO;
522                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
523                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
524         }
525 #endif /* S_SPLINT_S */
526         if(verbosity >= VERB_ALGO)
527                 p_ancil("send_udp over interface", r);
528         sent = sendmsg(c->fd, &msg, 0);
529         if(sent == -1) {
530                 /* try again and block, waiting for IO to complete,
531                  * we want to send the answer, and we will wait for
532                  * the ethernet interface buffer to have space. */
533 #ifndef USE_WINSOCK
534                 if(errno == EAGAIN || 
535 #  ifdef EWOULDBLOCK
536                         errno == EWOULDBLOCK ||
537 #  endif
538                         errno == ENOBUFS) {
539 #else
540                 if(WSAGetLastError() == WSAEINPROGRESS ||
541                         WSAGetLastError() == WSAENOBUFS ||
542                         WSAGetLastError() == WSAEWOULDBLOCK) {
543 #endif
544                         int e;
545                         fd_set_block(c->fd);
546                         sent = sendmsg(c->fd, &msg, 0);
547                         e = errno;
548                         fd_set_nonblock(c->fd);
549                         errno = e;
550                 }
551         }
552         if(sent == -1) {
553                 if(!udp_send_errno_needs_log(addr, addrlen))
554                         return 0;
555                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
556                 log_addr(VERB_OPS, "remote address is", 
557                         (struct sockaddr_storage*)addr, addrlen);
558 #ifdef __NetBSD__
559                 /* netbsd 7 has IP_PKTINFO for recv but not send */
560                 if(errno == EINVAL && r->srctype == 4)
561                         log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
562                                 "Please disable interface-automatic");
563 #endif
564                 return 0;
565         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
566                 log_err("sent %d in place of %d bytes", 
567                         (int)sent, (int)sldns_buffer_remaining(packet));
568                 return 0;
569         }
570         return 1;
571 #else
572         (void)c;
573         (void)packet;
574         (void)addr;
575         (void)addrlen;
576         (void)r;
577         log_err("sendmsg: IPV6_PKTINFO not supported");
578         return 0;
579 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
580 }
581
582 void 
583 comm_point_udp_ancil_callback(int fd, short event, void* arg)
584 {
585 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
586         struct comm_reply rep;
587         struct msghdr msg;
588         struct iovec iov[1];
589         ssize_t rcv;
590         union {
591                 struct cmsghdr hdr;
592                 char buf[256];
593         } ancil;
594         int i;
595 #ifndef S_SPLINT_S
596         struct cmsghdr* cmsg;
597 #endif /* S_SPLINT_S */
598
599         rep.c = (struct comm_point*)arg;
600         log_assert(rep.c->type == comm_udp);
601
602         if(!(event&UB_EV_READ))
603                 return;
604         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
605         ub_comm_base_now(rep.c->ev->base);
606         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
607                 sldns_buffer_clear(rep.c->buffer);
608                 rep.addrlen = (socklen_t)sizeof(rep.addr);
609                 log_assert(fd != -1);
610                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
611                 msg.msg_name = &rep.addr;
612                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
613                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
614                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
615                 msg.msg_iov = iov;
616                 msg.msg_iovlen = 1;
617                 msg.msg_control = ancil.buf;
618 #ifndef S_SPLINT_S
619                 msg.msg_controllen = sizeof(ancil.buf);
620 #endif /* S_SPLINT_S */
621                 msg.msg_flags = 0;
622                 rcv = recvmsg(fd, &msg, 0);
623                 if(rcv == -1) {
624                         if(errno != EAGAIN && errno != EINTR) {
625                                 log_err("recvmsg failed: %s", strerror(errno));
626                         }
627                         return;
628                 }
629                 rep.addrlen = msg.msg_namelen;
630                 sldns_buffer_skip(rep.c->buffer, rcv);
631                 sldns_buffer_flip(rep.c->buffer);
632                 rep.srctype = 0;
633 #ifndef S_SPLINT_S
634                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
635                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
636                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
637                                 cmsg->cmsg_type == IPV6_PKTINFO) {
638                                 rep.srctype = 6;
639                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
640                                         sizeof(struct in6_pktinfo));
641                                 break;
642 #ifdef IP_PKTINFO
643                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
644                                 cmsg->cmsg_type == IP_PKTINFO) {
645                                 rep.srctype = 4;
646                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
647                                         sizeof(struct in_pktinfo));
648                                 break;
649 #elif defined(IP_RECVDSTADDR)
650                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
651                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
652                                 rep.srctype = 4;
653                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
654                                         sizeof(struct in_addr));
655                                 break;
656 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
657                         }
658                 }
659                 if(verbosity >= VERB_ALGO)
660                         p_ancil("receive_udp on interface", &rep);
661 #endif /* S_SPLINT_S */
662                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
663                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
664                         /* send back immediate reply */
665                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
666                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
667                 }
668                 if(!rep.c || rep.c->fd == -1) /* commpoint closed */
669                         break;
670         }
671 #else
672         (void)fd;
673         (void)event;
674         (void)arg;
675         fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. "
676                 "Please disable interface-automatic");
677 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
678 }
679
680 void 
681 comm_point_udp_callback(int fd, short event, void* arg)
682 {
683         struct comm_reply rep;
684         ssize_t rcv;
685         int i;
686         struct sldns_buffer *buffer;
687
688         rep.c = (struct comm_point*)arg;
689         log_assert(rep.c->type == comm_udp);
690
691         if(!(event&UB_EV_READ))
692                 return;
693         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
694         ub_comm_base_now(rep.c->ev->base);
695         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
696                 sldns_buffer_clear(rep.c->buffer);
697                 rep.addrlen = (socklen_t)sizeof(rep.addr);
698                 log_assert(fd != -1);
699                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
700                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
701                         sldns_buffer_remaining(rep.c->buffer), 0, 
702                         (struct sockaddr*)&rep.addr, &rep.addrlen);
703                 if(rcv == -1) {
704 #ifndef USE_WINSOCK
705                         if(errno != EAGAIN && errno != EINTR)
706                                 log_err("recvfrom %d failed: %s", 
707                                         fd, strerror(errno));
708 #else
709                         if(WSAGetLastError() != WSAEINPROGRESS &&
710                                 WSAGetLastError() != WSAECONNRESET &&
711                                 WSAGetLastError()!= WSAEWOULDBLOCK)
712                                 log_err("recvfrom failed: %s",
713                                         wsa_strerror(WSAGetLastError()));
714 #endif
715                         return;
716                 }
717                 sldns_buffer_skip(rep.c->buffer, rcv);
718                 sldns_buffer_flip(rep.c->buffer);
719                 rep.srctype = 0;
720                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
721                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
722                         /* send back immediate reply */
723 #ifdef USE_DNSCRYPT
724                         buffer = rep.c->dnscrypt_buffer;
725 #else
726                         buffer = rep.c->buffer;
727 #endif
728                         (void)comm_point_send_udp_msg(rep.c, buffer,
729                                 (struct sockaddr*)&rep.addr, rep.addrlen);
730                 }
731                 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
732                 another UDP port. Note rep.c cannot be reused with TCP fd. */
733                         break;
734         }
735 }
736
737 /** Use a new tcp handler for new query fd, set to read query */
738 static void
739 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 
740 {
741         int handler_usage;
742         log_assert(c->type == comm_tcp);
743         log_assert(c->fd == -1);
744         sldns_buffer_clear(c->buffer);
745 #ifdef USE_DNSCRYPT
746         if (c->dnscrypt)
747                 sldns_buffer_clear(c->dnscrypt_buffer);
748 #endif
749         c->tcp_is_reading = 1;
750         c->tcp_byte_count = 0;
751         /* if more than half the tcp handlers are in use, use a shorter
752          * timeout for this TCP connection, we need to make space for
753          * other connections to be able to get attention */
754         /* If > 50% TCP handler structures in use, set timeout to 1/100th
755          *      configured value.
756          * If > 65%TCP handler structures in use, set to 1/500th configured
757          *      value.
758          * If > 80% TCP handler structures in use, set to 0.
759          *
760          * If the timeout to use falls below 200 milliseconds, an actual
761          * timeout of 200ms is used.
762          */
763         handler_usage = (cur * 100) / max;
764         if(handler_usage > 50 && handler_usage <= 65)
765                 c->tcp_timeout_msec /= 100;
766         else if (handler_usage > 65 && handler_usage <= 80)
767                 c->tcp_timeout_msec /= 500;
768         else if (handler_usage > 80)
769                 c->tcp_timeout_msec = 0;
770         comm_point_start_listening(c, fd,
771                 c->tcp_timeout_msec < TCP_QUERY_TIMEOUT_MINIMUM
772                         ? TCP_QUERY_TIMEOUT_MINIMUM
773                         : c->tcp_timeout_msec);
774 }
775
776 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
777         short ATTR_UNUSED(event), void* arg)
778 {
779         struct comm_base* b = (struct comm_base*)arg;
780         /* timeout for the slow accept, re-enable accepts again */
781         if(b->start_accept) {
782                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
783                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
784                 (*b->start_accept)(b->cb_arg);
785                 b->eb->slow_accept_enabled = 0;
786         }
787 }
788
789 int comm_point_perform_accept(struct comm_point* c,
790         struct sockaddr_storage* addr, socklen_t* addrlen)
791 {
792         int new_fd;
793         *addrlen = (socklen_t)sizeof(*addr);
794 #ifndef HAVE_ACCEPT4
795         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
796 #else
797         /* SOCK_NONBLOCK saves extra calls to fcntl for the same result */
798         new_fd = accept4(c->fd, (struct sockaddr*)addr, addrlen, SOCK_NONBLOCK);
799 #endif
800         if(new_fd == -1) {
801 #ifndef USE_WINSOCK
802                 /* EINTR is signal interrupt. others are closed connection. */
803                 if(     errno == EINTR || errno == EAGAIN
804 #ifdef EWOULDBLOCK
805                         || errno == EWOULDBLOCK 
806 #endif
807 #ifdef ECONNABORTED
808                         || errno == ECONNABORTED 
809 #endif
810 #ifdef EPROTO
811                         || errno == EPROTO
812 #endif /* EPROTO */
813                         )
814                         return -1;
815 #if defined(ENFILE) && defined(EMFILE)
816                 if(errno == ENFILE || errno == EMFILE) {
817                         /* out of file descriptors, likely outside of our
818                          * control. stop accept() calls for some time */
819                         if(c->ev->base->stop_accept) {
820                                 struct comm_base* b = c->ev->base;
821                                 struct timeval tv;
822                                 verbose(VERB_ALGO, "out of file descriptors: "
823                                         "slow accept");
824                                 b->eb->slow_accept_enabled = 1;
825                                 fptr_ok(fptr_whitelist_stop_accept(
826                                         b->stop_accept));
827                                 (*b->stop_accept)(b->cb_arg);
828                                 /* set timeout, no mallocs */
829                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
830                                 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
831                                 b->eb->slow_accept = ub_event_new(b->eb->base,
832                                         -1, UB_EV_TIMEOUT,
833                                         comm_base_handle_slow_accept, b);
834                                 if(b->eb->slow_accept == NULL) {
835                                         /* we do not want to log here, because
836                                          * that would spam the logfiles.
837                                          * error: "event_base_set failed." */
838                                 }
839                                 else if(ub_event_add(b->eb->slow_accept, &tv)
840                                         != 0) {
841                                         /* we do not want to log here,
842                                          * error: "event_add failed." */
843                                 }
844                         }
845                         return -1;
846                 }
847 #endif
848                 log_err_addr("accept failed", strerror(errno), addr, *addrlen);
849 #else /* USE_WINSOCK */
850                 if(WSAGetLastError() == WSAEINPROGRESS ||
851                         WSAGetLastError() == WSAECONNRESET)
852                         return -1;
853                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
854                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
855                         return -1;
856                 }
857                 log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
858                         addr, *addrlen);
859 #endif
860                 return -1;
861         }
862         if(c->tcp_conn_limit && c->type == comm_tcp_accept) {
863                 c->tcl_addr = tcl_addr_lookup(c->tcp_conn_limit, addr, *addrlen);
864                 if(!tcl_new_connection(c->tcl_addr)) {
865                         if(verbosity >= 3)
866                                 log_err_addr("accept rejected",
867                                 "connection limit exceeded", addr, *addrlen);
868                         close(new_fd);
869                         return -1;
870                 }
871         }
872 #ifndef HAVE_ACCEPT4
873         fd_set_nonblock(new_fd);
874 #endif
875         return new_fd;
876 }
877
878 #ifdef USE_WINSOCK
879 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
880         int ATTR_UNUSED(argi), long argl, long retvalue)
881 {
882         int wsa_err = WSAGetLastError(); /* store errcode before it is gone */
883         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
884                 (oper&BIO_CB_RETURN)?"return":"before",
885                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
886                 wsa_err==WSAEWOULDBLOCK?"wsawb":"");
887         /* on windows, check if previous operation caused EWOULDBLOCK */
888         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
889                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
890                 if(wsa_err == WSAEWOULDBLOCK)
891                         ub_winsock_tcp_wouldblock((struct ub_event*)
892                                 BIO_get_callback_arg(b), UB_EV_READ);
893         }
894         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
895                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
896                 if(wsa_err == WSAEWOULDBLOCK)
897                         ub_winsock_tcp_wouldblock((struct ub_event*)
898                                 BIO_get_callback_arg(b), UB_EV_WRITE);
899         }
900         /* return original return value */
901         return retvalue;
902 }
903
904 /** set win bio callbacks for nonblocking operations */
905 void
906 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
907 {
908         SSL* ssl = (SSL*)thessl;
909         /* set them both just in case, but usually they are the same BIO */
910         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
911         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
912         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
913         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
914 }
915 #endif
916
917 void 
918 comm_point_tcp_accept_callback(int fd, short event, void* arg)
919 {
920         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
921         int new_fd;
922         log_assert(c->type == comm_tcp_accept);
923         if(!(event & UB_EV_READ)) {
924                 log_info("ignoring tcp accept event %d", (int)event);
925                 return;
926         }
927         ub_comm_base_now(c->ev->base);
928         /* find free tcp handler. */
929         if(!c->tcp_free) {
930                 log_warn("accepted too many tcp, connections full");
931                 return;
932         }
933         /* accept incoming connection. */
934         c_hdl = c->tcp_free;
935         /* clear leftover flags from previous use, and then set the
936          * correct event base for the event structure for libevent */
937         ub_event_free(c_hdl->ev->ev);
938         c_hdl->ev->ev = ub_event_new(c_hdl->ev->base->eb->base, -1, UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT, comm_point_tcp_handle_callback, c_hdl);
939         if(!c_hdl->ev->ev) {
940                 log_warn("could not ub_event_new, dropped tcp");
941                 return;
942         }
943         log_assert(fd != -1);
944         (void)fd;
945         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
946                 &c_hdl->repinfo.addrlen);
947         if(new_fd == -1)
948                 return;
949         if(c->ssl) {
950                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
951                 if(!c_hdl->ssl) {
952                         c_hdl->fd = new_fd;
953                         comm_point_close(c_hdl);
954                         return;
955                 }
956                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
957 #ifdef USE_WINSOCK
958                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
959 #endif
960         }
961
962         /* grab the tcp handler buffers */
963         c->cur_tcp_count++;
964         c->tcp_free = c_hdl->tcp_free;
965         if(!c->tcp_free) {
966                 /* stop accepting incoming queries for now. */
967                 comm_point_stop_listening(c);
968         }
969         setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
970 }
971
972 /** Make tcp handler free for next assignment */
973 static void
974 reclaim_tcp_handler(struct comm_point* c)
975 {
976         log_assert(c->type == comm_tcp);
977         if(c->ssl) {
978 #ifdef HAVE_SSL
979                 SSL_shutdown(c->ssl);
980                 SSL_free(c->ssl);
981                 c->ssl = NULL;
982 #endif
983         }
984         comm_point_close(c);
985         if(c->tcp_parent) {
986                 c->tcp_parent->cur_tcp_count--;
987                 c->tcp_free = c->tcp_parent->tcp_free;
988                 c->tcp_parent->tcp_free = c;
989                 if(!c->tcp_free) {
990                         /* re-enable listening on accept socket */
991                         comm_point_start_listening(c->tcp_parent, -1, -1);
992                 }
993         }
994 }
995
996 /** do the callback when writing is done */
997 static void
998 tcp_callback_writer(struct comm_point* c)
999 {
1000         log_assert(c->type == comm_tcp);
1001         sldns_buffer_clear(c->buffer);
1002         if(c->tcp_do_toggle_rw)
1003                 c->tcp_is_reading = 1;
1004         c->tcp_byte_count = 0;
1005         /* switch from listening(write) to listening(read) */
1006         if(c->tcp_req_info) {
1007                 tcp_req_info_handle_writedone(c->tcp_req_info);
1008         } else {
1009                 comm_point_stop_listening(c);
1010                 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1011         }
1012 }
1013
1014 /** do the callback when reading is done */
1015 static void
1016 tcp_callback_reader(struct comm_point* c)
1017 {
1018         log_assert(c->type == comm_tcp || c->type == comm_local);
1019         sldns_buffer_flip(c->buffer);
1020         if(c->tcp_do_toggle_rw)
1021                 c->tcp_is_reading = 0;
1022         c->tcp_byte_count = 0;
1023         if(c->tcp_req_info) {
1024                 tcp_req_info_handle_readdone(c->tcp_req_info);
1025         } else {
1026                 if(c->type == comm_tcp)
1027                         comm_point_stop_listening(c);
1028                 fptr_ok(fptr_whitelist_comm_point(c->callback));
1029                 if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1030                         comm_point_start_listening(c, -1, c->tcp_timeout_msec);
1031                 }
1032         }
1033 }
1034
1035 #ifdef HAVE_SSL
1036 /** true if the ssl handshake error has to be squelched from the logs */
1037 int
1038 squelch_err_ssl_handshake(unsigned long err)
1039 {
1040         if(verbosity >= VERB_QUERY)
1041                 return 0; /* only squelch on low verbosity */
1042         /* this is very specific, we could filter on ERR_GET_REASON()
1043          * (the third element in ERR_PACK) */
1044         if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) ||
1045                 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) ||
1046                 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) ||
1047                 err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE)
1048 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
1049                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER)
1050 #endif
1051 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
1052                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL)
1053                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL)
1054 #  ifdef SSL_R_VERSION_TOO_LOW
1055                 || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW)
1056 #  endif
1057 #endif
1058                 )
1059                 return 1;
1060         return 0;
1061 }
1062 #endif /* HAVE_SSL */
1063
1064 /** continue ssl handshake */
1065 #ifdef HAVE_SSL
1066 static int
1067 ssl_handshake(struct comm_point* c)
1068 {
1069         int r;
1070         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
1071                 /* read condition satisfied back to writing */
1072                 comm_point_listen_for_rw(c, 1, 1);
1073                 c->ssl_shake_state = comm_ssl_shake_none;
1074                 return 1;
1075         }
1076         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
1077                 /* write condition satisfied, back to reading */
1078                 comm_point_listen_for_rw(c, 1, 0);
1079                 c->ssl_shake_state = comm_ssl_shake_none;
1080                 return 1;
1081         }
1082
1083         ERR_clear_error();
1084         r = SSL_do_handshake(c->ssl);
1085         if(r != 1) {
1086                 int want = SSL_get_error(c->ssl, r);
1087                 if(want == SSL_ERROR_WANT_READ) {
1088                         if(c->ssl_shake_state == comm_ssl_shake_read)
1089                                 return 1;
1090                         c->ssl_shake_state = comm_ssl_shake_read;
1091                         comm_point_listen_for_rw(c, 1, 0);
1092                         return 1;
1093                 } else if(want == SSL_ERROR_WANT_WRITE) {
1094                         if(c->ssl_shake_state == comm_ssl_shake_write)
1095                                 return 1;
1096                         c->ssl_shake_state = comm_ssl_shake_write;
1097                         comm_point_listen_for_rw(c, 0, 1);
1098                         return 1;
1099                 } else if(r == 0) {
1100                         return 0; /* closed */
1101                 } else if(want == SSL_ERROR_SYSCALL) {
1102                         /* SYSCALL and errno==0 means closed uncleanly */
1103 #ifdef EPIPE
1104                         if(errno == EPIPE && verbosity < 2)
1105                                 return 0; /* silence 'broken pipe' */
1106 #endif
1107 #ifdef ECONNRESET
1108                         if(errno == ECONNRESET && verbosity < 2)
1109                                 return 0; /* silence reset by peer */
1110 #endif
1111                         if(errno != 0)
1112                                 log_err("SSL_handshake syscall: %s",
1113                                         strerror(errno));
1114                         return 0;
1115                 } else {
1116                         unsigned long err = ERR_get_error();
1117                         if(!squelch_err_ssl_handshake(err)) {
1118                                 log_crypto_err_code("ssl handshake failed", err);
1119                                 log_addr(VERB_OPS, "ssl handshake failed", &c->repinfo.addr,
1120                                         c->repinfo.addrlen);
1121                         }
1122                         return 0;
1123                 }
1124         }
1125         /* this is where peer verification could take place */
1126         if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
1127                 /* verification */
1128                 if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
1129                         X509* x = SSL_get_peer_certificate(c->ssl);
1130                         if(!x) {
1131                                 log_addr(VERB_ALGO, "SSL connection failed: "
1132                                         "no certificate",
1133                                         &c->repinfo.addr, c->repinfo.addrlen);
1134                                 return 0;
1135                         }
1136                         log_cert(VERB_ALGO, "peer certificate", x);
1137 #ifdef HAVE_SSL_GET0_PEERNAME
1138                         if(SSL_get0_peername(c->ssl)) {
1139                                 char buf[255];
1140                                 snprintf(buf, sizeof(buf), "SSL connection "
1141                                         "to %s authenticated",
1142                                         SSL_get0_peername(c->ssl));
1143                                 log_addr(VERB_ALGO, buf, &c->repinfo.addr,
1144                                         c->repinfo.addrlen);
1145                         } else {
1146 #endif
1147                                 log_addr(VERB_ALGO, "SSL connection "
1148                                         "authenticated", &c->repinfo.addr,
1149                                         c->repinfo.addrlen);
1150 #ifdef HAVE_SSL_GET0_PEERNAME
1151                         }
1152 #endif
1153                         X509_free(x);
1154                 } else {
1155                         X509* x = SSL_get_peer_certificate(c->ssl);
1156                         if(x) {
1157                                 log_cert(VERB_ALGO, "peer certificate", x);
1158                                 X509_free(x);
1159                         }
1160                         log_addr(VERB_ALGO, "SSL connection failed: "
1161                                 "failed to authenticate",
1162                                 &c->repinfo.addr, c->repinfo.addrlen);
1163                         return 0;
1164                 }
1165         } else {
1166                 /* unauthenticated, the verify peer flag was not set
1167                  * in c->ssl when the ssl object was created from ssl_ctx */
1168                 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr,
1169                         c->repinfo.addrlen);
1170         }
1171
1172         /* setup listen rw correctly */
1173         if(c->tcp_is_reading) {
1174                 if(c->ssl_shake_state != comm_ssl_shake_read)
1175                         comm_point_listen_for_rw(c, 1, 0);
1176         } else {
1177                 comm_point_listen_for_rw(c, 1, 1);
1178         }
1179         c->ssl_shake_state = comm_ssl_shake_none;
1180         return 1;
1181 }
1182 #endif /* HAVE_SSL */
1183
1184 /** ssl read callback on TCP */
1185 static int
1186 ssl_handle_read(struct comm_point* c)
1187 {
1188 #ifdef HAVE_SSL
1189         int r;
1190         if(c->ssl_shake_state != comm_ssl_shake_none) {
1191                 if(!ssl_handshake(c))
1192                         return 0;
1193                 if(c->ssl_shake_state != comm_ssl_shake_none)
1194                         return 1;
1195         }
1196         if(c->tcp_byte_count < sizeof(uint16_t)) {
1197                 /* read length bytes */
1198                 ERR_clear_error();
1199                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1200                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1201                         c->tcp_byte_count))) <= 0) {
1202                         int want = SSL_get_error(c->ssl, r);
1203                         if(want == SSL_ERROR_ZERO_RETURN) {
1204                                 if(c->tcp_req_info)
1205                                         return tcp_req_info_handle_read_close(c->tcp_req_info);
1206                                 return 0; /* shutdown, closed */
1207                         } else if(want == SSL_ERROR_WANT_READ) {
1208                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1209                                 return 1; /* read more later */
1210                         } else if(want == SSL_ERROR_WANT_WRITE) {
1211                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1212                                 comm_point_listen_for_rw(c, 0, 1);
1213                                 return 1;
1214                         } else if(want == SSL_ERROR_SYSCALL) {
1215 #ifdef ECONNRESET
1216                                 if(errno == ECONNRESET && verbosity < 2)
1217                                         return 0; /* silence reset by peer */
1218 #endif
1219                                 if(errno != 0)
1220                                         log_err("SSL_read syscall: %s",
1221                                                 strerror(errno));
1222                                 return 0;
1223                         }
1224                         log_crypto_err("could not SSL_read");
1225                         return 0;
1226                 }
1227                 c->tcp_byte_count += r;
1228                 if(c->tcp_byte_count < sizeof(uint16_t))
1229                         return 1;
1230                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1231                         sldns_buffer_capacity(c->buffer)) {
1232                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1233                         return 0;
1234                 }
1235                 sldns_buffer_set_limit(c->buffer,
1236                         sldns_buffer_read_u16_at(c->buffer, 0));
1237                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1238                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1239                         return 0;
1240                 }
1241                 sldns_buffer_skip(c->buffer, (ssize_t)(c->tcp_byte_count-sizeof(uint16_t)));
1242                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1243                         (int)sldns_buffer_limit(c->buffer));
1244         }
1245         if(sldns_buffer_remaining(c->buffer) > 0) {
1246                 ERR_clear_error();
1247                 r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1248                         (int)sldns_buffer_remaining(c->buffer));
1249                 if(r <= 0) {
1250                         int want = SSL_get_error(c->ssl, r);
1251                         if(want == SSL_ERROR_ZERO_RETURN) {
1252                                 if(c->tcp_req_info)
1253                                         return tcp_req_info_handle_read_close(c->tcp_req_info);
1254                                 return 0; /* shutdown, closed */
1255                         } else if(want == SSL_ERROR_WANT_READ) {
1256                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1257                                 return 1; /* read more later */
1258                         } else if(want == SSL_ERROR_WANT_WRITE) {
1259                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1260                                 comm_point_listen_for_rw(c, 0, 1);
1261                                 return 1;
1262                         } else if(want == SSL_ERROR_SYSCALL) {
1263 #ifdef ECONNRESET
1264                                 if(errno == ECONNRESET && verbosity < 2)
1265                                         return 0; /* silence reset by peer */
1266 #endif
1267                                 if(errno != 0)
1268                                         log_err("SSL_read syscall: %s",
1269                                                 strerror(errno));
1270                                 return 0;
1271                         }
1272                         log_crypto_err("could not SSL_read");
1273                         return 0;
1274                 }
1275                 sldns_buffer_skip(c->buffer, (ssize_t)r);
1276         }
1277         if(sldns_buffer_remaining(c->buffer) <= 0) {
1278                 tcp_callback_reader(c);
1279         }
1280         return 1;
1281 #else
1282         (void)c;
1283         return 0;
1284 #endif /* HAVE_SSL */
1285 }
1286
1287 /** ssl write callback on TCP */
1288 static int
1289 ssl_handle_write(struct comm_point* c)
1290 {
1291 #ifdef HAVE_SSL
1292         int r;
1293         if(c->ssl_shake_state != comm_ssl_shake_none) {
1294                 if(!ssl_handshake(c))
1295                         return 0;
1296                 if(c->ssl_shake_state != comm_ssl_shake_none)
1297                         return 1;
1298         }
1299         /* ignore return, if fails we may simply block */
1300         (void)SSL_set_mode(c->ssl, (long)SSL_MODE_ENABLE_PARTIAL_WRITE);
1301         if(c->tcp_byte_count < sizeof(uint16_t)) {
1302                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1303                 ERR_clear_error();
1304                 if(sizeof(uint16_t)+sldns_buffer_remaining(c->buffer) <
1305                         LDNS_RR_BUF_SIZE) {
1306                         /* combine the tcp length and the query for write,
1307                          * this emulates writev */
1308                         uint8_t buf[LDNS_RR_BUF_SIZE];
1309                         memmove(buf, &len, sizeof(uint16_t));
1310                         memmove(buf+sizeof(uint16_t),
1311                                 sldns_buffer_current(c->buffer),
1312                                 sldns_buffer_remaining(c->buffer));
1313                         r = SSL_write(c->ssl, (void*)(buf+c->tcp_byte_count),
1314                                 (int)(sizeof(uint16_t)+
1315                                 sldns_buffer_remaining(c->buffer)
1316                                 - c->tcp_byte_count));
1317                 } else {
1318                         r = SSL_write(c->ssl,
1319                                 (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1320                                 (int)(sizeof(uint16_t)-c->tcp_byte_count));
1321                 }
1322                 if(r <= 0) {
1323                         int want = SSL_get_error(c->ssl, r);
1324                         if(want == SSL_ERROR_ZERO_RETURN) {
1325                                 return 0; /* closed */
1326                         } else if(want == SSL_ERROR_WANT_READ) {
1327                                 c->ssl_shake_state = comm_ssl_shake_hs_read;
1328                                 comm_point_listen_for_rw(c, 1, 0);
1329                                 return 1; /* wait for read condition */
1330                         } else if(want == SSL_ERROR_WANT_WRITE) {
1331                                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1332                                 return 1; /* write more later */
1333                         } else if(want == SSL_ERROR_SYSCALL) {
1334 #ifdef EPIPE
1335                                 if(errno == EPIPE && verbosity < 2)
1336                                         return 0; /* silence 'broken pipe' */
1337 #endif
1338                                 if(errno != 0)
1339                                         log_err("SSL_write syscall: %s",
1340                                                 strerror(errno));
1341                                 return 0;
1342                         }
1343                         log_crypto_err("could not SSL_write");
1344                         return 0;
1345                 }
1346                 c->tcp_byte_count += r;
1347                 if(c->tcp_byte_count < sizeof(uint16_t))
1348                         return 1;
1349                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1350                         sizeof(uint16_t));
1351                 if(sldns_buffer_remaining(c->buffer) == 0) {
1352                         tcp_callback_writer(c);
1353                         return 1;
1354                 }
1355         }
1356         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1357         ERR_clear_error();
1358         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1359                 (int)sldns_buffer_remaining(c->buffer));
1360         if(r <= 0) {
1361                 int want = SSL_get_error(c->ssl, r);
1362                 if(want == SSL_ERROR_ZERO_RETURN) {
1363                         return 0; /* closed */
1364                 } else if(want == SSL_ERROR_WANT_READ) {
1365                         c->ssl_shake_state = comm_ssl_shake_hs_read;
1366                         comm_point_listen_for_rw(c, 1, 0);
1367                         return 1; /* wait for read condition */
1368                 } else if(want == SSL_ERROR_WANT_WRITE) {
1369                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1370                         return 1; /* write more later */
1371                 } else if(want == SSL_ERROR_SYSCALL) {
1372 #ifdef EPIPE
1373                         if(errno == EPIPE && verbosity < 2)
1374                                 return 0; /* silence 'broken pipe' */
1375 #endif
1376                         if(errno != 0)
1377                                 log_err("SSL_write syscall: %s",
1378                                         strerror(errno));
1379                         return 0;
1380                 }
1381                 log_crypto_err("could not SSL_write");
1382                 return 0;
1383         }
1384         sldns_buffer_skip(c->buffer, (ssize_t)r);
1385
1386         if(sldns_buffer_remaining(c->buffer) == 0) {
1387                 tcp_callback_writer(c);
1388         }
1389         return 1;
1390 #else
1391         (void)c;
1392         return 0;
1393 #endif /* HAVE_SSL */
1394 }
1395
1396 /** handle ssl tcp connection with dns contents */
1397 static int
1398 ssl_handle_it(struct comm_point* c)
1399 {
1400         if(c->tcp_is_reading)
1401                 return ssl_handle_read(c);
1402         return ssl_handle_write(c);
1403 }
1404
1405 /** Handle tcp reading callback. 
1406  * @param fd: file descriptor of socket.
1407  * @param c: comm point to read from into buffer.
1408  * @param short_ok: if true, very short packets are OK (for comm_local).
1409  * @return: 0 on error 
1410  */
1411 static int
1412 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1413 {
1414         ssize_t r;
1415         log_assert(c->type == comm_tcp || c->type == comm_local);
1416         if(c->ssl)
1417                 return ssl_handle_it(c);
1418         if(!c->tcp_is_reading)
1419                 return 0;
1420
1421         log_assert(fd != -1);
1422         if(c->tcp_byte_count < sizeof(uint16_t)) {
1423                 /* read length bytes */
1424                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1425                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1426                 if(r == 0) {
1427                         if(c->tcp_req_info)
1428                                 return tcp_req_info_handle_read_close(c->tcp_req_info);
1429                         return 0;
1430                 } else if(r == -1) {
1431 #ifndef USE_WINSOCK
1432                         if(errno == EINTR || errno == EAGAIN)
1433                                 return 1;
1434 #ifdef ECONNRESET
1435                         if(errno == ECONNRESET && verbosity < 2)
1436                                 return 0; /* silence reset by peer */
1437 #endif
1438                         log_err_addr("read (in tcp s)", strerror(errno),
1439                                 &c->repinfo.addr, c->repinfo.addrlen);
1440 #else /* USE_WINSOCK */
1441                         if(WSAGetLastError() == WSAECONNRESET)
1442                                 return 0;
1443                         if(WSAGetLastError() == WSAEINPROGRESS)
1444                                 return 1;
1445                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1446                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1447                                         UB_EV_READ);
1448                                 return 1;
1449                         }
1450                         log_err_addr("read (in tcp s)", 
1451                                 wsa_strerror(WSAGetLastError()),
1452                                 &c->repinfo.addr, c->repinfo.addrlen);
1453 #endif
1454                         return 0;
1455                 } 
1456                 c->tcp_byte_count += r;
1457                 if(c->tcp_byte_count != sizeof(uint16_t))
1458                         return 1;
1459                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1460                         sldns_buffer_capacity(c->buffer)) {
1461                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1462                         return 0;
1463                 }
1464                 sldns_buffer_set_limit(c->buffer, 
1465                         sldns_buffer_read_u16_at(c->buffer, 0));
1466                 if(!short_ok && 
1467                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1468                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1469                         return 0;
1470                 }
1471                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1472                         (int)sldns_buffer_limit(c->buffer));
1473         }
1474
1475         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1476         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1477                 sldns_buffer_remaining(c->buffer), 0);
1478         if(r == 0) {
1479                 if(c->tcp_req_info)
1480                         return tcp_req_info_handle_read_close(c->tcp_req_info);
1481                 return 0;
1482         } else if(r == -1) {
1483 #ifndef USE_WINSOCK
1484                 if(errno == EINTR || errno == EAGAIN)
1485                         return 1;
1486                 log_err_addr("read (in tcp r)", strerror(errno),
1487                         &c->repinfo.addr, c->repinfo.addrlen);
1488 #else /* USE_WINSOCK */
1489                 if(WSAGetLastError() == WSAECONNRESET)
1490                         return 0;
1491                 if(WSAGetLastError() == WSAEINPROGRESS)
1492                         return 1;
1493                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1494                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1495                         return 1;
1496                 }
1497                 log_err_addr("read (in tcp r)",
1498                         wsa_strerror(WSAGetLastError()),
1499                         &c->repinfo.addr, c->repinfo.addrlen);
1500 #endif
1501                 return 0;
1502         }
1503         sldns_buffer_skip(c->buffer, r);
1504         if(sldns_buffer_remaining(c->buffer) <= 0) {
1505                 tcp_callback_reader(c);
1506         }
1507         return 1;
1508 }
1509
1510 /** 
1511  * Handle tcp writing callback. 
1512  * @param fd: file descriptor of socket.
1513  * @param c: comm point to write buffer out of.
1514  * @return: 0 on error
1515  */
1516 static int
1517 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1518 {
1519         ssize_t r;
1520         struct sldns_buffer *buffer;
1521         log_assert(c->type == comm_tcp);
1522 #ifdef USE_DNSCRYPT
1523         buffer = c->dnscrypt_buffer;
1524 #else
1525         buffer = c->buffer;
1526 #endif
1527         if(c->tcp_is_reading && !c->ssl)
1528                 return 0;
1529         log_assert(fd != -1);
1530         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1531                 /* check for pending error from nonblocking connect */
1532                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1533                 int error = 0;
1534                 socklen_t len = (socklen_t)sizeof(error);
1535                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1536                         &len) < 0){
1537 #ifndef USE_WINSOCK
1538                         error = errno; /* on solaris errno is error */
1539 #else /* USE_WINSOCK */
1540                         error = WSAGetLastError();
1541 #endif
1542                 }
1543 #ifndef USE_WINSOCK
1544 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1545                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1546                         return 1; /* try again later */
1547                 else
1548 #endif
1549                 if(error != 0 && verbosity < 2)
1550                         return 0; /* silence lots of chatter in the logs */
1551                 else if(error != 0) {
1552                         log_err_addr("tcp connect", strerror(error),
1553                                 &c->repinfo.addr, c->repinfo.addrlen);
1554 #else /* USE_WINSOCK */
1555                 /* examine error */
1556                 if(error == WSAEINPROGRESS)
1557                         return 1;
1558                 else if(error == WSAEWOULDBLOCK) {
1559                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1560                         return 1;
1561                 } else if(error != 0 && verbosity < 2)
1562                         return 0;
1563                 else if(error != 0) {
1564                         log_err_addr("tcp connect", wsa_strerror(error),
1565                                 &c->repinfo.addr, c->repinfo.addrlen);
1566 #endif /* USE_WINSOCK */
1567                         return 0;
1568                 }
1569         }
1570         if(c->ssl)
1571                 return ssl_handle_it(c);
1572
1573 #ifdef USE_MSG_FASTOPEN
1574         /* Only try this on first use of a connection that uses tfo, 
1575            otherwise fall through to normal write */
1576         /* Also, TFO support on WINDOWS not implemented at the moment */
1577         if(c->tcp_do_fastopen == 1) {
1578                 /* this form of sendmsg() does both a connect() and send() so need to
1579                    look for various flavours of error*/
1580                 uint16_t len = htons(sldns_buffer_limit(buffer));
1581                 struct msghdr msg;
1582                 struct iovec iov[2];
1583                 c->tcp_do_fastopen = 0;
1584                 memset(&msg, 0, sizeof(msg));
1585                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1586                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1587                 iov[1].iov_base = sldns_buffer_begin(buffer);
1588                 iov[1].iov_len = sldns_buffer_limit(buffer);
1589                 log_assert(iov[0].iov_len > 0);
1590                 msg.msg_name = &c->repinfo.addr;
1591                 msg.msg_namelen = c->repinfo.addrlen;
1592                 msg.msg_iov = iov;
1593                 msg.msg_iovlen = 2;
1594                 r = sendmsg(fd, &msg, MSG_FASTOPEN);
1595                 if (r == -1) {
1596 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1597                         /* Handshake is underway, maybe because no TFO cookie available.
1598                            Come back to write the message*/
1599                         if(errno == EINPROGRESS || errno == EWOULDBLOCK)
1600                                 return 1;
1601 #endif
1602                         if(errno == EINTR || errno == EAGAIN)
1603                                 return 1;
1604                         /* Not handling EISCONN here as shouldn't ever hit that case.*/
1605                         if(errno != EPIPE && errno != 0 && verbosity < 2)
1606                                 return 0; /* silence lots of chatter in the logs */
1607                         if(errno != EPIPE && errno != 0) {
1608                                 log_err_addr("tcp sendmsg", strerror(errno),
1609                                         &c->repinfo.addr, c->repinfo.addrlen);
1610                                 return 0;
1611                         }
1612                         /* fallthrough to nonFASTOPEN
1613                          * (MSG_FASTOPEN on Linux 3 produces EPIPE)
1614                          * we need to perform connect() */
1615                         if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) {
1616 #ifdef EINPROGRESS
1617                                 if(errno == EINPROGRESS)
1618                                         return 1; /* wait until connect done*/
1619 #endif
1620 #ifdef USE_WINSOCK
1621                                 if(WSAGetLastError() == WSAEINPROGRESS ||
1622                                         WSAGetLastError() == WSAEWOULDBLOCK)
1623                                         return 1; /* wait until connect done*/
1624 #endif
1625                                 if(tcp_connect_errno_needs_log(
1626                                         (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) {
1627                                         log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
1628                                                 strerror(errno), &c->repinfo.addr, c->repinfo.addrlen);
1629                                 }
1630                                 return 0;
1631                         }
1632
1633                 } else {
1634                         c->tcp_byte_count += r;
1635                         if(c->tcp_byte_count < sizeof(uint16_t))
1636                                 return 1;
1637                         sldns_buffer_set_position(buffer, c->tcp_byte_count - 
1638                                 sizeof(uint16_t));
1639                         if(sldns_buffer_remaining(buffer) == 0) {
1640                                 tcp_callback_writer(c);
1641                                 return 1;
1642                         }
1643                 }
1644         }
1645 #endif /* USE_MSG_FASTOPEN */
1646
1647         if(c->tcp_byte_count < sizeof(uint16_t)) {
1648                 uint16_t len = htons(sldns_buffer_limit(buffer));
1649 #ifdef HAVE_WRITEV
1650                 struct iovec iov[2];
1651                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1652                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1653                 iov[1].iov_base = sldns_buffer_begin(buffer);
1654                 iov[1].iov_len = sldns_buffer_limit(buffer);
1655                 log_assert(iov[0].iov_len > 0);
1656                 r = writev(fd, iov, 2);
1657 #else /* HAVE_WRITEV */
1658                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1659                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1660 #endif /* HAVE_WRITEV */
1661                 if(r == -1) {
1662 #ifndef USE_WINSOCK
1663 #  ifdef EPIPE
1664                         if(errno == EPIPE && verbosity < 2)
1665                                 return 0; /* silence 'broken pipe' */
1666   #endif
1667                         if(errno == EINTR || errno == EAGAIN)
1668                                 return 1;
1669 #ifdef ECONNRESET
1670                         if(errno == ECONNRESET && verbosity < 2)
1671                                 return 0; /* silence reset by peer */
1672 #endif
1673 #  ifdef HAVE_WRITEV
1674                         log_err_addr("tcp writev", strerror(errno),
1675                                 &c->repinfo.addr, c->repinfo.addrlen);
1676 #  else /* HAVE_WRITEV */
1677                         log_err_addr("tcp send s", strerror(errno),
1678                                 &c->repinfo.addr, c->repinfo.addrlen);
1679 #  endif /* HAVE_WRITEV */
1680 #else
1681                         if(WSAGetLastError() == WSAENOTCONN)
1682                                 return 1;
1683                         if(WSAGetLastError() == WSAEINPROGRESS)
1684                                 return 1;
1685                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1686                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1687                                         UB_EV_WRITE);
1688                                 return 1; 
1689                         }
1690                         if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
1691                                 return 0; /* silence reset by peer */
1692                         log_err_addr("tcp send s",
1693                                 wsa_strerror(WSAGetLastError()),
1694                                 &c->repinfo.addr, c->repinfo.addrlen);
1695 #endif
1696                         return 0;
1697                 }
1698                 c->tcp_byte_count += r;
1699                 if(c->tcp_byte_count < sizeof(uint16_t))
1700                         return 1;
1701                 sldns_buffer_set_position(buffer, c->tcp_byte_count - 
1702                         sizeof(uint16_t));
1703                 if(sldns_buffer_remaining(buffer) == 0) {
1704                         tcp_callback_writer(c);
1705                         return 1;
1706                 }
1707         }
1708         log_assert(sldns_buffer_remaining(buffer) > 0);
1709         r = send(fd, (void*)sldns_buffer_current(buffer), 
1710                 sldns_buffer_remaining(buffer), 0);
1711         if(r == -1) {
1712 #ifndef USE_WINSOCK
1713                 if(errno == EINTR || errno == EAGAIN)
1714                         return 1;
1715 #ifdef ECONNRESET
1716                 if(errno == ECONNRESET && verbosity < 2)
1717                         return 0; /* silence reset by peer */
1718 #endif
1719                 log_err_addr("tcp send r", strerror(errno),
1720                         &c->repinfo.addr, c->repinfo.addrlen);
1721 #else
1722                 if(WSAGetLastError() == WSAEINPROGRESS)
1723                         return 1;
1724                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1725                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1726                         return 1; 
1727                 }
1728                 if(WSAGetLastError() == WSAECONNRESET && verbosity < 2)
1729                         return 0; /* silence reset by peer */
1730                 log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1731                         &c->repinfo.addr, c->repinfo.addrlen);
1732 #endif
1733                 return 0;
1734         }
1735         sldns_buffer_skip(buffer, r);
1736
1737         if(sldns_buffer_remaining(buffer) == 0) {
1738                 tcp_callback_writer(c);
1739         }
1740         
1741         return 1;
1742 }
1743
1744 /** read again to drain buffers when there could be more to read */
1745 static void
1746 tcp_req_info_read_again(int fd, struct comm_point* c)
1747 {
1748         while(c->tcp_req_info->read_again) {
1749                 int r;
1750                 c->tcp_req_info->read_again = 0;
1751                 if(c->tcp_is_reading)
1752                         r = comm_point_tcp_handle_read(fd, c, 0);
1753                 else    r = comm_point_tcp_handle_write(fd, c);
1754                 if(!r) {
1755                         reclaim_tcp_handler(c);
1756                         if(!c->tcp_do_close) {
1757                                 fptr_ok(fptr_whitelist_comm_point(
1758                                         c->callback));
1759                                 (void)(*c->callback)(c, c->cb_arg, 
1760                                         NETEVENT_CLOSED, NULL);
1761                         }
1762                         return;
1763                 }
1764         }
1765 }
1766
1767 void 
1768 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1769 {
1770         struct comm_point* c = (struct comm_point*)arg;
1771         log_assert(c->type == comm_tcp);
1772         ub_comm_base_now(c->ev->base);
1773
1774 #ifdef USE_DNSCRYPT
1775         /* Initialize if this is a dnscrypt socket */
1776         if(c->tcp_parent) {
1777                 c->dnscrypt = c->tcp_parent->dnscrypt;
1778         }
1779         if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
1780                 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
1781                 if(!c->dnscrypt_buffer) {
1782                         log_err("Could not allocate dnscrypt buffer");
1783                         reclaim_tcp_handler(c);
1784                         if(!c->tcp_do_close) {
1785                                 fptr_ok(fptr_whitelist_comm_point(
1786                                         c->callback));
1787                                 (void)(*c->callback)(c, c->cb_arg, 
1788                                         NETEVENT_CLOSED, NULL);
1789                         }
1790                         return;
1791                 }
1792         }
1793 #endif
1794
1795         if(event&UB_EV_TIMEOUT) {
1796                 verbose(VERB_QUERY, "tcp took too long, dropped");
1797                 reclaim_tcp_handler(c);
1798                 if(!c->tcp_do_close) {
1799                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1800                         (void)(*c->callback)(c, c->cb_arg,
1801                                 NETEVENT_TIMEOUT, NULL);
1802                 }
1803                 return;
1804         }
1805         if(event&UB_EV_READ) {
1806                 int has_tcpq = (c->tcp_req_info != NULL);
1807                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1808                         reclaim_tcp_handler(c);
1809                         if(!c->tcp_do_close) {
1810                                 fptr_ok(fptr_whitelist_comm_point(
1811                                         c->callback));
1812                                 (void)(*c->callback)(c, c->cb_arg, 
1813                                         NETEVENT_CLOSED, NULL);
1814                         }
1815                 }
1816                 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again)
1817                         tcp_req_info_read_again(fd, c);
1818                 return;
1819         }
1820         if(event&UB_EV_WRITE) {
1821                 int has_tcpq = (c->tcp_req_info != NULL);
1822                 if(!comm_point_tcp_handle_write(fd, c)) {
1823                         reclaim_tcp_handler(c);
1824                         if(!c->tcp_do_close) {
1825                                 fptr_ok(fptr_whitelist_comm_point(
1826                                         c->callback));
1827                                 (void)(*c->callback)(c, c->cb_arg, 
1828                                         NETEVENT_CLOSED, NULL);
1829                         }
1830                 }
1831                 if(has_tcpq && c->tcp_req_info && c->tcp_req_info->read_again)
1832                         tcp_req_info_read_again(fd, c);
1833                 return;
1834         }
1835         log_err("Ignored event %d for tcphdl.", event);
1836 }
1837
1838 /** Make http handler free for next assignment */
1839 static void
1840 reclaim_http_handler(struct comm_point* c)
1841 {
1842         log_assert(c->type == comm_http);
1843         if(c->ssl) {
1844 #ifdef HAVE_SSL
1845                 SSL_shutdown(c->ssl);
1846                 SSL_free(c->ssl);
1847                 c->ssl = NULL;
1848 #endif
1849         }
1850         comm_point_close(c);
1851         if(c->tcp_parent) {
1852                 c->tcp_parent->cur_tcp_count--;
1853                 c->tcp_free = c->tcp_parent->tcp_free;
1854                 c->tcp_parent->tcp_free = c;
1855                 if(!c->tcp_free) {
1856                         /* re-enable listening on accept socket */
1857                         comm_point_start_listening(c->tcp_parent, -1, -1);
1858                 }
1859         }
1860 }
1861
1862 /** read more data for http (with ssl) */
1863 static int
1864 ssl_http_read_more(struct comm_point* c)
1865 {
1866 #ifdef HAVE_SSL
1867         int r;
1868         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1869         ERR_clear_error();
1870         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1871                 (int)sldns_buffer_remaining(c->buffer));
1872         if(r <= 0) {
1873                 int want = SSL_get_error(c->ssl, r);
1874                 if(want == SSL_ERROR_ZERO_RETURN) {
1875                         return 0; /* shutdown, closed */
1876                 } else if(want == SSL_ERROR_WANT_READ) {
1877                         return 1; /* read more later */
1878                 } else if(want == SSL_ERROR_WANT_WRITE) {
1879                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1880                         comm_point_listen_for_rw(c, 0, 1);
1881                         return 1;
1882                 } else if(want == SSL_ERROR_SYSCALL) {
1883 #ifdef ECONNRESET
1884                         if(errno == ECONNRESET && verbosity < 2)
1885                                 return 0; /* silence reset by peer */
1886 #endif
1887                         if(errno != 0)
1888                                 log_err("SSL_read syscall: %s",
1889                                         strerror(errno));
1890                         return 0;
1891                 }
1892                 log_crypto_err("could not SSL_read");
1893                 return 0;
1894         }
1895         sldns_buffer_skip(c->buffer, (ssize_t)r);
1896         return 1;
1897 #else
1898         (void)c;
1899         return 0;
1900 #endif /* HAVE_SSL */
1901 }
1902
1903 /** read more data for http */
1904 static int
1905 http_read_more(int fd, struct comm_point* c)
1906 {
1907         ssize_t r;
1908         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1909         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1910                 sldns_buffer_remaining(c->buffer), 0);
1911         if(r == 0) {
1912                 return 0;
1913         } else if(r == -1) {
1914 #ifndef USE_WINSOCK
1915                 if(errno == EINTR || errno == EAGAIN)
1916                         return 1;
1917                 log_err_addr("read (in http r)", strerror(errno),
1918                         &c->repinfo.addr, c->repinfo.addrlen);
1919 #else /* USE_WINSOCK */
1920                 if(WSAGetLastError() == WSAECONNRESET)
1921                         return 0;
1922                 if(WSAGetLastError() == WSAEINPROGRESS)
1923                         return 1;
1924                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1925                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1926                         return 1;
1927                 }
1928                 log_err_addr("read (in http r)",
1929                         wsa_strerror(WSAGetLastError()),
1930                         &c->repinfo.addr, c->repinfo.addrlen);
1931 #endif
1932                 return 0;
1933         }
1934         sldns_buffer_skip(c->buffer, r);
1935         return 1;
1936 }
1937
1938 /** return true if http header has been read (one line complete) */
1939 static int
1940 http_header_done(sldns_buffer* buf)
1941 {
1942         size_t i;
1943         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
1944                 /* there was a \r before the \n, but we ignore that */
1945                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
1946                         return 1;
1947         }
1948         return 0;
1949 }
1950
1951 /** return character string into buffer for header line, moves buffer
1952  * past that line and puts zero terminator into linefeed-newline */
1953 static char*
1954 http_header_line(sldns_buffer* buf)
1955 {
1956         char* result = (char*)sldns_buffer_current(buf);
1957         size_t i;
1958         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
1959                 /* terminate the string on the \r */
1960                 if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
1961                         sldns_buffer_write_u8_at(buf, i, 0);
1962                 /* terminate on the \n and skip past the it and done */
1963                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
1964                         sldns_buffer_write_u8_at(buf, i, 0);
1965                         sldns_buffer_set_position(buf, i+1);
1966                         return result;
1967                 }
1968         }
1969         return NULL;
1970 }
1971
1972 /** move unread buffer to start and clear rest for putting the rest into it */
1973 static void
1974 http_moveover_buffer(sldns_buffer* buf)
1975 {
1976         size_t pos = sldns_buffer_position(buf);
1977         size_t len = sldns_buffer_remaining(buf);
1978         sldns_buffer_clear(buf);
1979         memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
1980         sldns_buffer_set_position(buf, len);
1981 }
1982
1983 /** a http header is complete, process it */
1984 static int
1985 http_process_initial_header(struct comm_point* c)
1986 {
1987         char* line = http_header_line(c->buffer);
1988         if(!line) return 1;
1989         verbose(VERB_ALGO, "http header: %s", line);
1990         if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
1991                 /* check returncode */
1992                 if(line[9] != '2') {
1993                         verbose(VERB_ALGO, "http bad status %s", line+9);
1994                         return 0;
1995                 }
1996         } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
1997                 if(!c->http_is_chunked)
1998                         c->tcp_byte_count = (size_t)atoi(line+16);
1999         } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
2000                 c->tcp_byte_count = 0;
2001                 c->http_is_chunked = 1;
2002         } else if(line[0] == 0) {
2003                 /* end of initial headers */
2004                 c->http_in_headers = 0;
2005                 if(c->http_is_chunked)
2006                         c->http_in_chunk_headers = 1;
2007                 /* remove header text from front of buffer
2008                  * the buffer is going to be used to return the data segment
2009                  * itself and we don't want the header to get returned
2010                  * prepended with it */
2011                 http_moveover_buffer(c->buffer);
2012                 sldns_buffer_flip(c->buffer);
2013                 return 1;
2014         }
2015         /* ignore other headers */
2016         return 1;
2017 }
2018
2019 /** a chunk header is complete, process it, return 0=fail, 1=continue next
2020  * header line, 2=done with chunked transfer*/
2021 static int
2022 http_process_chunk_header(struct comm_point* c)
2023 {
2024         char* line = http_header_line(c->buffer);
2025         if(!line) return 1;
2026         if(c->http_in_chunk_headers == 3) {
2027                 verbose(VERB_ALGO, "http chunk trailer: %s", line);
2028                 /* are we done ? */
2029                 if(line[0] == 0 && c->tcp_byte_count == 0) {
2030                         /* callback of http reader when NETEVENT_DONE,
2031                          * end of data, with no data in buffer */
2032                         sldns_buffer_set_position(c->buffer, 0);
2033                         sldns_buffer_set_limit(c->buffer, 0);
2034                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2035                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
2036                         /* return that we are done */
2037                         return 2;
2038                 }
2039                 if(line[0] == 0) {
2040                         /* continue with header of the next chunk */
2041                         c->http_in_chunk_headers = 1;
2042                         /* remove header text from front of buffer */
2043                         http_moveover_buffer(c->buffer);
2044                         sldns_buffer_flip(c->buffer);
2045                         return 1;
2046                 }
2047                 /* ignore further trail headers */
2048                 return 1;
2049         }
2050         verbose(VERB_ALGO, "http chunk header: %s", line);
2051         if(c->http_in_chunk_headers == 1) {
2052                 /* read chunked start line */
2053                 char* end = NULL;
2054                 c->tcp_byte_count = (size_t)strtol(line, &end, 16);
2055                 if(end == line)
2056                         return 0;
2057                 c->http_in_chunk_headers = 0;
2058                 /* remove header text from front of buffer */
2059                 http_moveover_buffer(c->buffer);
2060                 sldns_buffer_flip(c->buffer);
2061                 if(c->tcp_byte_count == 0) {
2062                         /* done with chunks, process chunk_trailer lines */
2063                         c->http_in_chunk_headers = 3;
2064                 }
2065                 return 1;
2066         }
2067         /* ignore other headers */
2068         return 1;
2069 }
2070
2071 /** handle nonchunked data segment */
2072 static int
2073 http_nonchunk_segment(struct comm_point* c)
2074 {
2075         /* c->buffer at position..limit has new data we read in.
2076          * the buffer itself is full of nonchunked data.
2077          * we are looking to read tcp_byte_count more data
2078          * and then the transfer is done. */
2079         size_t remainbufferlen;
2080         size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
2081         if(c->tcp_byte_count <= got_now) {
2082                 /* done, this is the last data fragment */
2083                 c->http_stored = 0;
2084                 sldns_buffer_set_position(c->buffer, 0);
2085                 fptr_ok(fptr_whitelist_comm_point(c->callback));
2086                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
2087                 return 1;
2088         }
2089         c->tcp_byte_count -= got_now;
2090         /* if we have the buffer space,
2091          * read more data collected into the buffer */
2092         remainbufferlen = sldns_buffer_capacity(c->buffer) -
2093                 sldns_buffer_limit(c->buffer);
2094         if(remainbufferlen >= c->tcp_byte_count ||
2095                 remainbufferlen >= 2048) {
2096                 size_t total = sldns_buffer_limit(c->buffer);
2097                 sldns_buffer_clear(c->buffer);
2098                 sldns_buffer_set_position(c->buffer, total);
2099                 c->http_stored = total;
2100                 /* return and wait to read more */
2101                 return 1;
2102         }
2103         /* call callback with this data amount, then
2104          * wait for more */
2105         c->http_stored = 0;
2106         sldns_buffer_set_position(c->buffer, 0);
2107         fptr_ok(fptr_whitelist_comm_point(c->callback));
2108         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2109         /* c->callback has to buffer_clear(c->buffer). */
2110         /* return and wait to read more */
2111         return 1;
2112 }
2113
2114 /** handle nonchunked data segment, return 0=fail, 1=wait, 2=process more */
2115 static int
2116 http_chunked_segment(struct comm_point* c)
2117 {
2118         /* the c->buffer has from position..limit new data we read. */
2119         /* the current chunk has length tcp_byte_count.
2120          * once we read that read more chunk headers.
2121          */
2122         size_t remainbufferlen;
2123         size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
2124         if(c->tcp_byte_count <= got_now) {
2125                 /* the chunk has completed (with perhaps some extra data
2126                  * from next chunk header and next chunk) */
2127                 /* save too much info into temp buffer */
2128                 size_t fraglen;
2129                 struct comm_reply repinfo;
2130                 c->http_stored = 0;
2131                 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
2132                 sldns_buffer_clear(c->http_temp);
2133                 sldns_buffer_write(c->http_temp,
2134                         sldns_buffer_current(c->buffer),
2135                         sldns_buffer_remaining(c->buffer));
2136                 sldns_buffer_flip(c->http_temp);
2137
2138                 /* callback with this fragment */
2139                 fraglen = sldns_buffer_position(c->buffer);
2140                 sldns_buffer_set_position(c->buffer, 0);
2141                 sldns_buffer_set_limit(c->buffer, fraglen);
2142                 repinfo = c->repinfo;
2143                 fptr_ok(fptr_whitelist_comm_point(c->callback));
2144                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
2145                 /* c->callback has to buffer_clear(). */
2146
2147                 /* is commpoint deleted? */
2148                 if(!repinfo.c) {
2149                         return 1;
2150                 }
2151                 /* copy waiting info */
2152                 sldns_buffer_clear(c->buffer);
2153                 sldns_buffer_write(c->buffer,
2154                         sldns_buffer_begin(c->http_temp),
2155                         sldns_buffer_remaining(c->http_temp));
2156                 sldns_buffer_flip(c->buffer);
2157                 /* process end of chunk trailer header lines, until
2158                  * an empty line */
2159                 c->http_in_chunk_headers = 3;
2160                 /* process more data in buffer (if any) */
2161                 return 2;
2162         }
2163         c->tcp_byte_count -= got_now;
2164
2165         /* if we have the buffer space,
2166          * read more data collected into the buffer */
2167         remainbufferlen = sldns_buffer_capacity(c->buffer) -
2168                 sldns_buffer_limit(c->buffer);
2169         if(remainbufferlen >= c->tcp_byte_count ||
2170                 remainbufferlen >= 2048) {
2171                 size_t total = sldns_buffer_limit(c->buffer);
2172                 sldns_buffer_clear(c->buffer);
2173                 sldns_buffer_set_position(c->buffer, total);
2174                 c->http_stored = total;
2175                 /* return and wait to read more */
2176                 return 1;
2177         }
2178         
2179         /* callback of http reader for a new part of the data */
2180         c->http_stored = 0;
2181         sldns_buffer_set_position(c->buffer, 0);
2182         fptr_ok(fptr_whitelist_comm_point(c->callback));
2183         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2184         /* c->callback has to buffer_clear(c->buffer). */
2185         /* return and wait to read more */
2186         return 1;
2187 }
2188
2189 /**
2190  * Handle http reading callback. 
2191  * @param fd: file descriptor of socket.
2192  * @param c: comm point to read from into buffer.
2193  * @return: 0 on error 
2194  */
2195 static int
2196 comm_point_http_handle_read(int fd, struct comm_point* c)
2197 {
2198         log_assert(c->type == comm_http);
2199         log_assert(fd != -1);
2200
2201         /* if we are in ssl handshake, handle SSL handshake */
2202 #ifdef HAVE_SSL
2203         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2204                 if(!ssl_handshake(c))
2205                         return 0;
2206                 if(c->ssl_shake_state != comm_ssl_shake_none)
2207                         return 1;
2208         }
2209 #endif /* HAVE_SSL */
2210
2211         if(!c->tcp_is_reading)
2212                 return 1;
2213         /* read more data */
2214         if(c->ssl) {
2215                 if(!ssl_http_read_more(c))
2216                         return 0;
2217         } else {
2218                 if(!http_read_more(fd, c))
2219                         return 0;
2220         }
2221
2222         sldns_buffer_flip(c->buffer);
2223         while(sldns_buffer_remaining(c->buffer) > 0) {
2224                 /* if we are reading headers, read more headers */
2225                 if(c->http_in_headers || c->http_in_chunk_headers) {
2226                         /* if header is done, process the header */
2227                         if(!http_header_done(c->buffer)) {
2228                                 /* copy remaining data to front of buffer
2229                                  * and set rest for writing into it */
2230                                 http_moveover_buffer(c->buffer);
2231                                 /* return and wait to read more */
2232                                 return 1;
2233                         }
2234                         if(!c->http_in_chunk_headers) {
2235                                 /* process initial headers */
2236                                 if(!http_process_initial_header(c))
2237                                         return 0;
2238                         } else {
2239                                 /* process chunk headers */
2240                                 int r = http_process_chunk_header(c);
2241                                 if(r == 0) return 0;
2242                                 if(r == 2) return 1; /* done */
2243                                 /* r == 1, continue */
2244                         }
2245                         /* see if we have more to process */
2246                         continue;
2247                 }
2248
2249                 if(!c->http_is_chunked) {
2250                         /* if we are reading nonchunks, process that*/
2251                         return http_nonchunk_segment(c);
2252                 } else {
2253                         /* if we are reading chunks, read the chunk */
2254                         int r = http_chunked_segment(c);
2255                         if(r == 0) return 0;
2256                         if(r == 1) return 1;
2257                         continue;
2258                 }
2259         }
2260         /* broke out of the loop; could not process header instead need
2261          * to read more */
2262         /* moveover any remaining data and read more data */
2263         http_moveover_buffer(c->buffer);
2264         /* return and wait to read more */
2265         return 1;
2266 }
2267
2268 /** check pending connect for http */
2269 static int
2270 http_check_connect(int fd, struct comm_point* c)
2271 {
2272         /* check for pending error from nonblocking connect */
2273         /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
2274         int error = 0;
2275         socklen_t len = (socklen_t)sizeof(error);
2276         if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
2277                 &len) < 0){
2278 #ifndef USE_WINSOCK
2279                 error = errno; /* on solaris errno is error */
2280 #else /* USE_WINSOCK */
2281                 error = WSAGetLastError();
2282 #endif
2283         }
2284 #ifndef USE_WINSOCK
2285 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
2286         if(error == EINPROGRESS || error == EWOULDBLOCK)
2287                 return 1; /* try again later */
2288         else
2289 #endif
2290         if(error != 0 && verbosity < 2)
2291                 return 0; /* silence lots of chatter in the logs */
2292         else if(error != 0) {
2293                 log_err_addr("http connect", strerror(error),
2294                         &c->repinfo.addr, c->repinfo.addrlen);
2295 #else /* USE_WINSOCK */
2296         /* examine error */
2297         if(error == WSAEINPROGRESS)
2298                 return 1;
2299         else if(error == WSAEWOULDBLOCK) {
2300                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2301                 return 1;
2302         } else if(error != 0 && verbosity < 2)
2303                 return 0;
2304         else if(error != 0) {
2305                 log_err_addr("http connect", wsa_strerror(error),
2306                         &c->repinfo.addr, c->repinfo.addrlen);
2307 #endif /* USE_WINSOCK */
2308                 return 0;
2309         }
2310         /* keep on processing this socket */
2311         return 2;
2312 }
2313
2314 /** write more data for http (with ssl) */
2315 static int
2316 ssl_http_write_more(struct comm_point* c)
2317 {
2318 #ifdef HAVE_SSL
2319         int r;
2320         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2321         ERR_clear_error();
2322         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
2323                 (int)sldns_buffer_remaining(c->buffer));
2324         if(r <= 0) {
2325                 int want = SSL_get_error(c->ssl, r);
2326                 if(want == SSL_ERROR_ZERO_RETURN) {
2327                         return 0; /* closed */
2328                 } else if(want == SSL_ERROR_WANT_READ) {
2329                         c->ssl_shake_state = comm_ssl_shake_hs_read;
2330                         comm_point_listen_for_rw(c, 1, 0);
2331                         return 1; /* wait for read condition */
2332                 } else if(want == SSL_ERROR_WANT_WRITE) {
2333                         return 1; /* write more later */
2334                 } else if(want == SSL_ERROR_SYSCALL) {
2335 #ifdef EPIPE
2336                         if(errno == EPIPE && verbosity < 2)
2337                                 return 0; /* silence 'broken pipe' */
2338 #endif
2339                         if(errno != 0)
2340                                 log_err("SSL_write syscall: %s",
2341                                         strerror(errno));
2342                         return 0;
2343                 }
2344                 log_crypto_err("could not SSL_write");
2345                 return 0;
2346         }
2347         sldns_buffer_skip(c->buffer, (ssize_t)r);
2348         return 1;
2349 #else
2350         (void)c;
2351         return 0;
2352 #endif /* HAVE_SSL */
2353 }
2354
2355 /** write more data for http */
2356 static int
2357 http_write_more(int fd, struct comm_point* c)
2358 {
2359         ssize_t r;
2360         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2361         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
2362                 sldns_buffer_remaining(c->buffer), 0);
2363         if(r == -1) {
2364 #ifndef USE_WINSOCK
2365                 if(errno == EINTR || errno == EAGAIN)
2366                         return 1;
2367                 log_err_addr("http send r", strerror(errno),
2368                         &c->repinfo.addr, c->repinfo.addrlen);
2369 #else
2370                 if(WSAGetLastError() == WSAEINPROGRESS)
2371                         return 1;
2372                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2373                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2374                         return 1; 
2375                 }
2376                 log_err_addr("http send r", wsa_strerror(WSAGetLastError()),
2377                         &c->repinfo.addr, c->repinfo.addrlen);
2378 #endif
2379                 return 0;
2380         }
2381         sldns_buffer_skip(c->buffer, r);
2382         return 1;
2383 }
2384
2385 /** 
2386  * Handle http writing callback. 
2387  * @param fd: file descriptor of socket.
2388  * @param c: comm point to write buffer out of.
2389  * @return: 0 on error
2390  */
2391 static int
2392 comm_point_http_handle_write(int fd, struct comm_point* c)
2393 {
2394         log_assert(c->type == comm_http);
2395         log_assert(fd != -1);
2396
2397         /* check pending connect errors, if that fails, we wait for more,
2398          * or we can continue to write contents */
2399         if(c->tcp_check_nb_connect) {
2400                 int r = http_check_connect(fd, c);
2401                 if(r == 0) return 0;
2402                 if(r == 1) return 1;
2403                 c->tcp_check_nb_connect = 0;
2404         }
2405         /* if we are in ssl handshake, handle SSL handshake */
2406 #ifdef HAVE_SSL
2407         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2408                 if(!ssl_handshake(c))
2409                         return 0;
2410                 if(c->ssl_shake_state != comm_ssl_shake_none)
2411                         return 1;
2412         }
2413 #endif /* HAVE_SSL */
2414         if(c->tcp_is_reading)
2415                 return 1;
2416         /* if we are writing, write more */
2417         if(c->ssl) {
2418                 if(!ssl_http_write_more(c))
2419                         return 0;
2420         } else {
2421                 if(!http_write_more(fd, c))
2422                         return 0;
2423         }
2424
2425         /* we write a single buffer contents, that can contain
2426          * the http request, and then flip to read the results */
2427         /* see if write is done */
2428         if(sldns_buffer_remaining(c->buffer) == 0) {
2429                 sldns_buffer_clear(c->buffer);
2430                 if(c->tcp_do_toggle_rw)
2431                         c->tcp_is_reading = 1;
2432                 c->tcp_byte_count = 0;
2433                 /* switch from listening(write) to listening(read) */
2434                 comm_point_stop_listening(c);
2435                 comm_point_start_listening(c, -1, -1);
2436         }
2437         return 1;
2438 }
2439
2440 void 
2441 comm_point_http_handle_callback(int fd, short event, void* arg)
2442 {
2443         struct comm_point* c = (struct comm_point*)arg;
2444         log_assert(c->type == comm_http);
2445         ub_comm_base_now(c->ev->base);
2446
2447         if(event&UB_EV_TIMEOUT) {
2448                 verbose(VERB_QUERY, "http took too long, dropped");
2449                 reclaim_http_handler(c);
2450                 if(!c->tcp_do_close) {
2451                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2452                         (void)(*c->callback)(c, c->cb_arg,
2453                                 NETEVENT_TIMEOUT, NULL);
2454                 }
2455                 return;
2456         }
2457         if(event&UB_EV_READ) {
2458                 if(!comm_point_http_handle_read(fd, c)) {
2459                         reclaim_http_handler(c);
2460                         if(!c->tcp_do_close) {
2461                                 fptr_ok(fptr_whitelist_comm_point(
2462                                         c->callback));
2463                                 (void)(*c->callback)(c, c->cb_arg, 
2464                                         NETEVENT_CLOSED, NULL);
2465                         }
2466                 }
2467                 return;
2468         }
2469         if(event&UB_EV_WRITE) {
2470                 if(!comm_point_http_handle_write(fd, c)) {
2471                         reclaim_http_handler(c);
2472                         if(!c->tcp_do_close) {
2473                                 fptr_ok(fptr_whitelist_comm_point(
2474                                         c->callback));
2475                                 (void)(*c->callback)(c, c->cb_arg, 
2476                                         NETEVENT_CLOSED, NULL);
2477                         }
2478                 }
2479                 return;
2480         }
2481         log_err("Ignored event %d for httphdl.", event);
2482 }
2483
2484 void comm_point_local_handle_callback(int fd, short event, void* arg)
2485 {
2486         struct comm_point* c = (struct comm_point*)arg;
2487         log_assert(c->type == comm_local);
2488         ub_comm_base_now(c->ev->base);
2489
2490         if(event&UB_EV_READ) {
2491                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
2492                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2493                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
2494                                 NULL);
2495                 }
2496                 return;
2497         }
2498         log_err("Ignored event %d for localhdl.", event);
2499 }
2500
2501 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
2502         short event, void* arg)
2503 {
2504         struct comm_point* c = (struct comm_point*)arg;
2505         int err = NETEVENT_NOERROR;
2506         log_assert(c->type == comm_raw);
2507         ub_comm_base_now(c->ev->base);
2508         
2509         if(event&UB_EV_TIMEOUT)
2510                 err = NETEVENT_TIMEOUT;
2511         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
2512         (void)(*c->callback)(c, c->cb_arg, err, NULL);
2513 }
2514
2515 struct comm_point* 
2516 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
2517         comm_point_callback_type* callback, void* callback_arg)
2518 {
2519         struct comm_point* c = (struct comm_point*)calloc(1,
2520                 sizeof(struct comm_point));
2521         short evbits;
2522         if(!c)
2523                 return NULL;
2524         c->ev = (struct internal_event*)calloc(1,
2525                 sizeof(struct internal_event));
2526         if(!c->ev) {
2527                 free(c);
2528                 return NULL;
2529         }
2530         c->ev->base = base;
2531         c->fd = fd;
2532         c->buffer = buffer;
2533         c->timeout = NULL;
2534         c->tcp_is_reading = 0;
2535         c->tcp_byte_count = 0;
2536         c->tcp_parent = NULL;
2537         c->max_tcp_count = 0;
2538         c->cur_tcp_count = 0;
2539         c->tcp_handlers = NULL;
2540         c->tcp_free = NULL;
2541         c->type = comm_udp;
2542         c->tcp_do_close = 0;
2543         c->do_not_close = 0;
2544         c->tcp_do_toggle_rw = 0;
2545         c->tcp_check_nb_connect = 0;
2546 #ifdef USE_MSG_FASTOPEN
2547         c->tcp_do_fastopen = 0;
2548 #endif
2549 #ifdef USE_DNSCRYPT
2550         c->dnscrypt = 0;
2551         c->dnscrypt_buffer = buffer;
2552 #endif
2553         c->inuse = 0;
2554         c->callback = callback;
2555         c->cb_arg = callback_arg;
2556         evbits = UB_EV_READ | UB_EV_PERSIST;
2557         /* ub_event stuff */
2558         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2559                 comm_point_udp_callback, c);
2560         if(c->ev->ev == NULL) {
2561                 log_err("could not baseset udp event");
2562                 comm_point_delete(c);
2563                 return NULL;
2564         }
2565         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
2566                 log_err("could not add udp event");
2567                 comm_point_delete(c);
2568                 return NULL;
2569         }
2570         return c;
2571 }
2572
2573 struct comm_point* 
2574 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
2575         sldns_buffer* buffer, 
2576         comm_point_callback_type* callback, void* callback_arg)
2577 {
2578         struct comm_point* c = (struct comm_point*)calloc(1,
2579                 sizeof(struct comm_point));
2580         short evbits;
2581         if(!c)
2582                 return NULL;
2583         c->ev = (struct internal_event*)calloc(1,
2584                 sizeof(struct internal_event));
2585         if(!c->ev) {
2586                 free(c);
2587                 return NULL;
2588         }
2589         c->ev->base = base;
2590         c->fd = fd;
2591         c->buffer = buffer;
2592         c->timeout = NULL;
2593         c->tcp_is_reading = 0;
2594         c->tcp_byte_count = 0;
2595         c->tcp_parent = NULL;
2596         c->max_tcp_count = 0;
2597         c->cur_tcp_count = 0;
2598         c->tcp_handlers = NULL;
2599         c->tcp_free = NULL;
2600         c->type = comm_udp;
2601         c->tcp_do_close = 0;
2602         c->do_not_close = 0;
2603 #ifdef USE_DNSCRYPT
2604         c->dnscrypt = 0;
2605         c->dnscrypt_buffer = buffer;
2606 #endif
2607         c->inuse = 0;
2608         c->tcp_do_toggle_rw = 0;
2609         c->tcp_check_nb_connect = 0;
2610 #ifdef USE_MSG_FASTOPEN
2611         c->tcp_do_fastopen = 0;
2612 #endif
2613         c->callback = callback;
2614         c->cb_arg = callback_arg;
2615         evbits = UB_EV_READ | UB_EV_PERSIST;
2616         /* ub_event stuff */
2617         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2618                 comm_point_udp_ancil_callback, c);
2619         if(c->ev->ev == NULL) {
2620                 log_err("could not baseset udp event");
2621                 comm_point_delete(c);
2622                 return NULL;
2623         }
2624         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
2625                 log_err("could not add udp event");
2626                 comm_point_delete(c);
2627                 return NULL;
2628         }
2629         return c;
2630 }
2631
2632 static struct comm_point* 
2633 comm_point_create_tcp_handler(struct comm_base *base, 
2634         struct comm_point* parent, size_t bufsize,
2635         struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
2636         void* callback_arg)
2637 {
2638         struct comm_point* c = (struct comm_point*)calloc(1,
2639                 sizeof(struct comm_point));
2640         short evbits;
2641         if(!c)
2642                 return NULL;
2643         c->ev = (struct internal_event*)calloc(1,
2644                 sizeof(struct internal_event));
2645         if(!c->ev) {
2646                 free(c);
2647                 return NULL;
2648         }
2649         c->ev->base = base;
2650         c->fd = -1;
2651         c->buffer = sldns_buffer_new(bufsize);
2652         if(!c->buffer) {
2653                 free(c->ev);
2654                 free(c);
2655                 return NULL;
2656         }
2657         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
2658         if(!c->timeout) {
2659                 sldns_buffer_free(c->buffer);
2660                 free(c->ev);
2661                 free(c);
2662                 return NULL;
2663         }
2664         c->tcp_is_reading = 0;
2665         c->tcp_byte_count = 0;
2666         c->tcp_parent = parent;
2667         c->tcp_timeout_msec = parent->tcp_timeout_msec;
2668         c->tcp_conn_limit = parent->tcp_conn_limit;
2669         c->tcl_addr = NULL;
2670         c->tcp_keepalive = 0;
2671         c->max_tcp_count = 0;
2672         c->cur_tcp_count = 0;
2673         c->tcp_handlers = NULL;
2674         c->tcp_free = NULL;
2675         c->type = comm_tcp;
2676         c->tcp_do_close = 0;
2677         c->do_not_close = 0;
2678         c->tcp_do_toggle_rw = 1;
2679         c->tcp_check_nb_connect = 0;
2680 #ifdef USE_MSG_FASTOPEN
2681         c->tcp_do_fastopen = 0;
2682 #endif
2683 #ifdef USE_DNSCRYPT
2684         c->dnscrypt = 0;
2685         /* We don't know just yet if this is a dnscrypt channel. Allocation
2686          * will be done when handling the callback. */
2687         c->dnscrypt_buffer = c->buffer;
2688 #endif
2689         c->repinfo.c = c;
2690         c->callback = callback;
2691         c->cb_arg = callback_arg;
2692         if(spoolbuf) {
2693                 c->tcp_req_info = tcp_req_info_create(spoolbuf);
2694                 if(!c->tcp_req_info) {
2695                         log_err("could not create tcp commpoint");
2696                         sldns_buffer_free(c->buffer);
2697                         free(c->timeout);
2698                         free(c->ev);
2699                         free(c);
2700                         return NULL;
2701                 }
2702                 c->tcp_req_info->cp = c;
2703                 c->tcp_do_close = 1;
2704                 c->tcp_do_toggle_rw = 0;
2705         }
2706         /* add to parent free list */
2707         c->tcp_free = parent->tcp_free;
2708         parent->tcp_free = c;
2709         /* ub_event stuff */
2710         evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
2711         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2712                 comm_point_tcp_handle_callback, c);
2713         if(c->ev->ev == NULL)
2714         {
2715                 log_err("could not basetset tcphdl event");
2716                 parent->tcp_free = c->tcp_free;
2717                 tcp_req_info_delete(c->tcp_req_info);
2718                 sldns_buffer_free(c->buffer);
2719                 free(c->timeout);
2720                 free(c->ev);
2721                 free(c);
2722                 return NULL;
2723         }
2724         return c;
2725 }
2726
2727 struct comm_point* 
2728 comm_point_create_tcp(struct comm_base *base, int fd, int num,
2729         int idle_timeout, struct tcl_list* tcp_conn_limit, size_t bufsize,
2730         struct sldns_buffer* spoolbuf, comm_point_callback_type* callback,
2731         void* callback_arg)
2732 {
2733         struct comm_point* c = (struct comm_point*)calloc(1,
2734                 sizeof(struct comm_point));
2735         short evbits;
2736         int i;
2737         /* first allocate the TCP accept listener */
2738         if(!c)
2739                 return NULL;
2740         c->ev = (struct internal_event*)calloc(1,
2741                 sizeof(struct internal_event));
2742         if(!c->ev) {
2743                 free(c);
2744                 return NULL;
2745         }
2746         c->ev->base = base;
2747         c->fd = fd;
2748         c->buffer = NULL;
2749         c->timeout = NULL;
2750         c->tcp_is_reading = 0;
2751         c->tcp_byte_count = 0;
2752         c->tcp_timeout_msec = idle_timeout;
2753         c->tcp_conn_limit = tcp_conn_limit;
2754         c->tcl_addr = NULL;
2755         c->tcp_keepalive = 0;
2756         c->tcp_parent = NULL;
2757         c->max_tcp_count = num;
2758         c->cur_tcp_count = 0;
2759         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
2760                 sizeof(struct comm_point*));
2761         if(!c->tcp_handlers) {
2762                 free(c->ev);
2763                 free(c);
2764                 return NULL;
2765         }
2766         c->tcp_free = NULL;
2767         c->type = comm_tcp_accept;
2768         c->tcp_do_close = 0;
2769         c->do_not_close = 0;
2770         c->tcp_do_toggle_rw = 0;
2771         c->tcp_check_nb_connect = 0;
2772 #ifdef USE_MSG_FASTOPEN
2773         c->tcp_do_fastopen = 0;
2774 #endif
2775 #ifdef USE_DNSCRYPT
2776         c->dnscrypt = 0;
2777         c->dnscrypt_buffer = NULL;
2778 #endif
2779         c->callback = NULL;
2780         c->cb_arg = NULL;
2781         evbits = UB_EV_READ | UB_EV_PERSIST;
2782         /* ub_event stuff */
2783         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2784                 comm_point_tcp_accept_callback, c);
2785         if(c->ev->ev == NULL) {
2786                 log_err("could not baseset tcpacc event");
2787                 comm_point_delete(c);
2788                 return NULL;
2789         }
2790         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2791                 log_err("could not add tcpacc event");
2792                 comm_point_delete(c);
2793                 return NULL;
2794         }
2795         /* now prealloc the tcp handlers */
2796         for(i=0; i<num; i++) {
2797                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
2798                         c, bufsize, spoolbuf, callback, callback_arg);
2799                 if(!c->tcp_handlers[i]) {
2800                         comm_point_delete(c);
2801                         return NULL;
2802                 }
2803         }
2804         
2805         return c;
2806 }
2807
2808 struct comm_point* 
2809 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
2810         comm_point_callback_type* callback, void* callback_arg)
2811 {
2812         struct comm_point* c = (struct comm_point*)calloc(1,
2813                 sizeof(struct comm_point));
2814         short evbits;
2815         if(!c)
2816                 return NULL;
2817         c->ev = (struct internal_event*)calloc(1,
2818                 sizeof(struct internal_event));
2819         if(!c->ev) {
2820                 free(c);
2821                 return NULL;
2822         }
2823         c->ev->base = base;
2824         c->fd = -1;
2825         c->buffer = sldns_buffer_new(bufsize);
2826         if(!c->buffer) {
2827                 free(c->ev);
2828                 free(c);
2829                 return NULL;
2830         }
2831         c->timeout = NULL;
2832         c->tcp_is_reading = 0;
2833         c->tcp_byte_count = 0;
2834         c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
2835         c->tcp_conn_limit = NULL;
2836         c->tcl_addr = NULL;
2837         c->tcp_keepalive = 0;
2838         c->tcp_parent = NULL;
2839         c->max_tcp_count = 0;
2840         c->cur_tcp_count = 0;
2841         c->tcp_handlers = NULL;
2842         c->tcp_free = NULL;
2843         c->type = comm_tcp;
2844         c->tcp_do_close = 0;
2845         c->do_not_close = 0;
2846         c->tcp_do_toggle_rw = 1;
2847         c->tcp_check_nb_connect = 1;
2848 #ifdef USE_MSG_FASTOPEN
2849         c->tcp_do_fastopen = 1;
2850 #endif
2851 #ifdef USE_DNSCRYPT
2852         c->dnscrypt = 0;
2853         c->dnscrypt_buffer = c->buffer;
2854 #endif
2855         c->repinfo.c = c;
2856         c->callback = callback;
2857         c->cb_arg = callback_arg;
2858         evbits = UB_EV_PERSIST | UB_EV_WRITE;
2859         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2860                 comm_point_tcp_handle_callback, c);
2861         if(c->ev->ev == NULL)
2862         {
2863                 log_err("could not baseset tcpout event");
2864                 sldns_buffer_free(c->buffer);
2865                 free(c->ev);
2866                 free(c);
2867                 return NULL;
2868         }
2869
2870         return c;
2871 }
2872
2873 struct comm_point* 
2874 comm_point_create_http_out(struct comm_base *base, size_t bufsize,
2875         comm_point_callback_type* callback, void* callback_arg,
2876         sldns_buffer* temp)
2877 {
2878         struct comm_point* c = (struct comm_point*)calloc(1,
2879                 sizeof(struct comm_point));
2880         short evbits;
2881         if(!c)
2882                 return NULL;
2883         c->ev = (struct internal_event*)calloc(1,
2884                 sizeof(struct internal_event));
2885         if(!c->ev) {
2886                 free(c);
2887                 return NULL;
2888         }
2889         c->ev->base = base;
2890         c->fd = -1;
2891         c->buffer = sldns_buffer_new(bufsize);
2892         if(!c->buffer) {
2893                 free(c->ev);
2894                 free(c);
2895                 return NULL;
2896         }
2897         c->timeout = NULL;
2898         c->tcp_is_reading = 0;
2899         c->tcp_byte_count = 0;
2900         c->tcp_parent = NULL;
2901         c->max_tcp_count = 0;
2902         c->cur_tcp_count = 0;
2903         c->tcp_handlers = NULL;
2904         c->tcp_free = NULL;
2905         c->type = comm_http;
2906         c->tcp_do_close = 0;
2907         c->do_not_close = 0;
2908         c->tcp_do_toggle_rw = 1;
2909         c->tcp_check_nb_connect = 1;
2910         c->http_in_headers = 1;
2911         c->http_in_chunk_headers = 0;
2912         c->http_is_chunked = 0;
2913         c->http_temp = temp;
2914 #ifdef USE_MSG_FASTOPEN
2915         c->tcp_do_fastopen = 1;
2916 #endif
2917 #ifdef USE_DNSCRYPT
2918         c->dnscrypt = 0;
2919         c->dnscrypt_buffer = c->buffer;
2920 #endif
2921         c->repinfo.c = c;
2922         c->callback = callback;
2923         c->cb_arg = callback_arg;
2924         evbits = UB_EV_PERSIST | UB_EV_WRITE;
2925         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2926                 comm_point_http_handle_callback, c);
2927         if(c->ev->ev == NULL)
2928         {
2929                 log_err("could not baseset tcpout event");
2930 #ifdef HAVE_SSL
2931                 SSL_free(c->ssl);
2932 #endif
2933                 sldns_buffer_free(c->buffer);
2934                 free(c->ev);
2935                 free(c);
2936                 return NULL;
2937         }
2938
2939         return c;
2940 }
2941
2942 struct comm_point* 
2943 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
2944         comm_point_callback_type* callback, void* callback_arg)
2945 {
2946         struct comm_point* c = (struct comm_point*)calloc(1,
2947                 sizeof(struct comm_point));
2948         short evbits;
2949         if(!c)
2950                 return NULL;
2951         c->ev = (struct internal_event*)calloc(1,
2952                 sizeof(struct internal_event));
2953         if(!c->ev) {
2954                 free(c);
2955                 return NULL;
2956         }
2957         c->ev->base = base;
2958         c->fd = fd;
2959         c->buffer = sldns_buffer_new(bufsize);
2960         if(!c->buffer) {
2961                 free(c->ev);
2962                 free(c);
2963                 return NULL;
2964         }
2965         c->timeout = NULL;
2966         c->tcp_is_reading = 1;
2967         c->tcp_byte_count = 0;
2968         c->tcp_parent = NULL;
2969         c->max_tcp_count = 0;
2970         c->cur_tcp_count = 0;
2971         c->tcp_handlers = NULL;
2972         c->tcp_free = NULL;
2973         c->type = comm_local;
2974         c->tcp_do_close = 0;
2975         c->do_not_close = 1;
2976         c->tcp_do_toggle_rw = 0;
2977         c->tcp_check_nb_connect = 0;
2978 #ifdef USE_MSG_FASTOPEN
2979         c->tcp_do_fastopen = 0;
2980 #endif
2981 #ifdef USE_DNSCRYPT
2982         c->dnscrypt = 0;
2983         c->dnscrypt_buffer = c->buffer;
2984 #endif
2985         c->callback = callback;
2986         c->cb_arg = callback_arg;
2987         /* ub_event stuff */
2988         evbits = UB_EV_PERSIST | UB_EV_READ;
2989         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2990                 comm_point_local_handle_callback, c);
2991         if(c->ev->ev == NULL) {
2992                 log_err("could not baseset localhdl event");
2993                 free(c->ev);
2994                 free(c);
2995                 return NULL;
2996         }
2997         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2998                 log_err("could not add localhdl event");
2999                 ub_event_free(c->ev->ev);
3000                 free(c->ev);
3001                 free(c);
3002                 return NULL;
3003         }
3004         return c;
3005 }
3006
3007 struct comm_point* 
3008 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
3009         comm_point_callback_type* callback, void* callback_arg)
3010 {
3011         struct comm_point* c = (struct comm_point*)calloc(1,
3012                 sizeof(struct comm_point));
3013         short evbits;
3014         if(!c)
3015                 return NULL;
3016         c->ev = (struct internal_event*)calloc(1,
3017                 sizeof(struct internal_event));
3018         if(!c->ev) {
3019                 free(c);
3020                 return NULL;
3021         }
3022         c->ev->base = base;
3023         c->fd = fd;
3024         c->buffer = NULL;
3025         c->timeout = NULL;
3026         c->tcp_is_reading = 0;
3027         c->tcp_byte_count = 0;
3028         c->tcp_parent = NULL;
3029         c->max_tcp_count = 0;
3030         c->cur_tcp_count = 0;
3031         c->tcp_handlers = NULL;
3032         c->tcp_free = NULL;
3033         c->type = comm_raw;
3034         c->tcp_do_close = 0;
3035         c->do_not_close = 1;
3036         c->tcp_do_toggle_rw = 0;
3037         c->tcp_check_nb_connect = 0;
3038 #ifdef USE_MSG_FASTOPEN
3039         c->tcp_do_fastopen = 0;
3040 #endif
3041 #ifdef USE_DNSCRYPT
3042         c->dnscrypt = 0;
3043         c->dnscrypt_buffer = c->buffer;
3044 #endif
3045         c->callback = callback;
3046         c->cb_arg = callback_arg;
3047         /* ub_event stuff */
3048         if(writing)
3049                 evbits = UB_EV_PERSIST | UB_EV_WRITE;
3050         else    evbits = UB_EV_PERSIST | UB_EV_READ;
3051         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
3052                 comm_point_raw_handle_callback, c);
3053         if(c->ev->ev == NULL) {
3054                 log_err("could not baseset rawhdl event");
3055                 free(c->ev);
3056                 free(c);
3057                 return NULL;
3058         }
3059         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
3060                 log_err("could not add rawhdl event");
3061                 ub_event_free(c->ev->ev);
3062                 free(c->ev);
3063                 free(c);
3064                 return NULL;
3065         }
3066         return c;
3067 }
3068
3069 void 
3070 comm_point_close(struct comm_point* c)
3071 {
3072         if(!c)
3073                 return;
3074         if(c->fd != -1) {
3075                 if(ub_event_del(c->ev->ev) != 0) {
3076                         log_err("could not event_del on close");
3077                 }
3078         }
3079         tcl_close_connection(c->tcl_addr);
3080         if(c->tcp_req_info)
3081                 tcp_req_info_clear(c->tcp_req_info);
3082         /* close fd after removing from event lists, or epoll.. is messed up */
3083         if(c->fd != -1 && !c->do_not_close) {
3084                 if(c->type == comm_tcp || c->type == comm_http) {
3085                         /* delete sticky events for the fd, it gets closed */
3086                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
3087                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
3088                 }
3089                 verbose(VERB_ALGO, "close fd %d", c->fd);
3090 #ifndef USE_WINSOCK
3091                 close(c->fd);
3092 #else
3093                 closesocket(c->fd);
3094 #endif
3095         }
3096         c->fd = -1;
3097 }
3098
3099 void 
3100 comm_point_delete(struct comm_point* c)
3101 {
3102         if(!c) 
3103                 return;
3104         if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
3105 #ifdef HAVE_SSL
3106                 SSL_shutdown(c->ssl);
3107                 SSL_free(c->ssl);
3108 #endif
3109         }
3110         comm_point_close(c);
3111         if(c->tcp_handlers) {
3112                 int i;
3113                 for(i=0; i<c->max_tcp_count; i++)
3114                         comm_point_delete(c->tcp_handlers[i]);
3115                 free(c->tcp_handlers);
3116         }
3117         free(c->timeout);
3118         if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
3119                 sldns_buffer_free(c->buffer);
3120 #ifdef USE_DNSCRYPT
3121                 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
3122                         sldns_buffer_free(c->dnscrypt_buffer);
3123                 }
3124 #endif
3125                 if(c->tcp_req_info) {
3126                         tcp_req_info_delete(c->tcp_req_info);
3127                 }
3128         }
3129         ub_event_free(c->ev->ev);
3130         free(c->ev);
3131         free(c);
3132 }
3133
3134 void 
3135 comm_point_send_reply(struct comm_reply *repinfo)
3136 {
3137         struct sldns_buffer* buffer;
3138         log_assert(repinfo && repinfo->c);
3139 #ifdef USE_DNSCRYPT
3140         buffer = repinfo->c->dnscrypt_buffer;
3141         if(!dnsc_handle_uncurved_request(repinfo)) {
3142                 return;
3143         }
3144 #else
3145         buffer = repinfo->c->buffer;
3146 #endif
3147         if(repinfo->c->type == comm_udp) {
3148                 if(repinfo->srctype)
3149                         comm_point_send_udp_msg_if(repinfo->c, 
3150                         buffer, (struct sockaddr*)&repinfo->addr, 
3151                         repinfo->addrlen, repinfo);
3152                 else
3153                         comm_point_send_udp_msg(repinfo->c, buffer,
3154                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
3155 #ifdef USE_DNSTAP
3156                 if(repinfo->c->dtenv != NULL &&
3157                    repinfo->c->dtenv->log_client_response_messages)
3158                         dt_msg_send_client_response(repinfo->c->dtenv,
3159                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
3160 #endif
3161         } else {
3162 #ifdef USE_DNSTAP
3163                 if(repinfo->c->tcp_parent->dtenv != NULL &&
3164                    repinfo->c->tcp_parent->dtenv->log_client_response_messages)
3165                         dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
3166                         &repinfo->addr, repinfo->c->type,
3167                         ( repinfo->c->tcp_req_info
3168                         ? repinfo->c->tcp_req_info->spool_buffer
3169                         : repinfo->c->buffer ));
3170 #endif
3171                 if(repinfo->c->tcp_req_info) {
3172                         tcp_req_info_send_reply(repinfo->c->tcp_req_info);
3173                 } else {
3174                         comm_point_start_listening(repinfo->c, -1,
3175                                 repinfo->c->tcp_timeout_msec);
3176                 }
3177         }
3178 }
3179
3180 void 
3181 comm_point_drop_reply(struct comm_reply* repinfo)
3182 {
3183         if(!repinfo)
3184                 return;
3185         log_assert(repinfo->c);
3186         log_assert(repinfo->c->type != comm_tcp_accept);
3187         if(repinfo->c->type == comm_udp)
3188                 return;
3189         if(repinfo->c->tcp_req_info)
3190                 repinfo->c->tcp_req_info->is_drop = 1;
3191         reclaim_tcp_handler(repinfo->c);
3192 }
3193
3194 void 
3195 comm_point_stop_listening(struct comm_point* c)
3196 {
3197         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
3198         if(ub_event_del(c->ev->ev) != 0) {
3199                 log_err("event_del error to stoplisten");
3200         }
3201 }
3202
3203 void 
3204 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
3205 {
3206         verbose(VERB_ALGO, "comm point start listening %d (%d msec)", 
3207                 c->fd==-1?newfd:c->fd, msec);
3208         if(c->type == comm_tcp_accept && !c->tcp_free) {
3209                 /* no use to start listening no free slots. */
3210                 return;
3211         }
3212         if(msec != -1 && msec != 0) {
3213                 if(!c->timeout) {
3214                         c->timeout = (struct timeval*)malloc(sizeof(
3215                                 struct timeval));
3216                         if(!c->timeout) {
3217                                 log_err("cpsl: malloc failed. No net read.");
3218                                 return;
3219                         }
3220                 }
3221                 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
3222 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
3223                 c->timeout->tv_sec = msec/1000;
3224                 c->timeout->tv_usec = (msec%1000)*1000;
3225 #endif /* S_SPLINT_S */
3226         }
3227         if(c->type == comm_tcp || c->type == comm_http) {
3228                 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
3229                 if(c->tcp_is_reading)
3230                         ub_event_add_bits(c->ev->ev, UB_EV_READ);
3231                 else    ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
3232         }
3233         if(newfd != -1) {
3234                 if(c->fd != -1) {
3235 #ifndef USE_WINSOCK
3236                         close(c->fd);
3237 #else
3238                         closesocket(c->fd);
3239 #endif
3240                 }
3241                 c->fd = newfd;
3242                 ub_event_set_fd(c->ev->ev, c->fd);
3243         }
3244         if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
3245                 log_err("event_add failed. in cpsl.");
3246         }
3247 }
3248
3249 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
3250 {
3251         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
3252         if(ub_event_del(c->ev->ev) != 0) {
3253                 log_err("event_del error to cplf");
3254         }
3255         ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
3256         if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
3257         if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
3258         if(ub_event_add(c->ev->ev, c->timeout) != 0) {
3259                 log_err("event_add failed. in cplf.");
3260         }
3261 }
3262
3263 size_t comm_point_get_mem(struct comm_point* c)
3264 {
3265         size_t s;
3266         if(!c) 
3267                 return 0;
3268         s = sizeof(*c) + sizeof(*c->ev);
3269         if(c->timeout) 
3270                 s += sizeof(*c->timeout);
3271         if(c->type == comm_tcp || c->type == comm_local) {
3272                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
3273 #ifdef USE_DNSCRYPT
3274                 s += sizeof(*c->dnscrypt_buffer);
3275                 if(c->buffer != c->dnscrypt_buffer) {
3276                         s += sldns_buffer_capacity(c->dnscrypt_buffer);
3277                 }
3278 #endif
3279         }
3280         if(c->type == comm_tcp_accept) {
3281                 int i;
3282                 for(i=0; i<c->max_tcp_count; i++)
3283                         s += comm_point_get_mem(c->tcp_handlers[i]);
3284         }
3285         return s;
3286 }
3287
3288 struct comm_timer* 
3289 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
3290 {
3291         struct internal_timer *tm = (struct internal_timer*)calloc(1,
3292                 sizeof(struct internal_timer));
3293         if(!tm) {
3294                 log_err("malloc failed");
3295                 return NULL;
3296         }
3297         tm->super.ev_timer = tm;
3298         tm->base = base;
3299         tm->super.callback = cb;
3300         tm->super.cb_arg = cb_arg;
3301         tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 
3302                 comm_timer_callback, &tm->super);
3303         if(tm->ev == NULL) {
3304                 log_err("timer_create: event_base_set failed.");
3305                 free(tm);
3306                 return NULL;
3307         }
3308         return &tm->super;
3309 }
3310
3311 void 
3312 comm_timer_disable(struct comm_timer* timer)
3313 {
3314         if(!timer)
3315                 return;
3316         ub_timer_del(timer->ev_timer->ev);
3317         timer->ev_timer->enabled = 0;
3318 }
3319
3320 void 
3321 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
3322 {
3323         log_assert(tv);
3324         if(timer->ev_timer->enabled)
3325                 comm_timer_disable(timer);
3326         if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
3327                 comm_timer_callback, timer, tv) != 0)
3328                 log_err("comm_timer_set: evtimer_add failed.");
3329         timer->ev_timer->enabled = 1;
3330 }
3331
3332 void 
3333 comm_timer_delete(struct comm_timer* timer)
3334 {
3335         if(!timer)
3336                 return;
3337         comm_timer_disable(timer);
3338         /* Free the sub struct timer->ev_timer derived from the super struct timer.
3339          * i.e. assert(timer == timer->ev_timer)
3340          */
3341         ub_event_free(timer->ev_timer->ev);
3342         free(timer->ev_timer);
3343 }
3344
3345 void 
3346 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
3347 {
3348         struct comm_timer* tm = (struct comm_timer*)arg;
3349         if(!(event&UB_EV_TIMEOUT))
3350                 return;
3351         ub_comm_base_now(tm->ev_timer->base);
3352         tm->ev_timer->enabled = 0;
3353         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
3354         (*tm->callback)(tm->cb_arg);
3355 }
3356
3357 int 
3358 comm_timer_is_set(struct comm_timer* timer)
3359 {
3360         return (int)timer->ev_timer->enabled;
3361 }
3362
3363 size_t 
3364 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer))
3365 {
3366         return sizeof(struct internal_timer);
3367 }
3368
3369 struct comm_signal* 
3370 comm_signal_create(struct comm_base* base,
3371         void (*callback)(int, void*), void* cb_arg)
3372 {
3373         struct comm_signal* com = (struct comm_signal*)malloc(
3374                 sizeof(struct comm_signal));
3375         if(!com) {
3376                 log_err("malloc failed");
3377                 return NULL;
3378         }
3379         com->base = base;
3380         com->callback = callback;
3381         com->cb_arg = cb_arg;
3382         com->ev_signal = NULL;
3383         return com;
3384 }
3385
3386 void 
3387 comm_signal_callback(int sig, short event, void* arg)
3388 {
3389         struct comm_signal* comsig = (struct comm_signal*)arg;
3390         if(!(event & UB_EV_SIGNAL))
3391                 return;
3392         ub_comm_base_now(comsig->base);
3393         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
3394         (*comsig->callback)(sig, comsig->cb_arg);
3395 }
3396
3397 int 
3398 comm_signal_bind(struct comm_signal* comsig, int sig)
3399 {
3400         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
3401                 sizeof(struct internal_signal));
3402         if(!entry) {
3403                 log_err("malloc failed");
3404                 return 0;
3405         }
3406         log_assert(comsig);
3407         /* add signal event */
3408         entry->ev = ub_signal_new(comsig->base->eb->base, sig,
3409                 comm_signal_callback, comsig);
3410         if(entry->ev == NULL) {
3411                 log_err("Could not create signal event");
3412                 free(entry);
3413                 return 0;
3414         }
3415         if(ub_signal_add(entry->ev, NULL) != 0) {
3416                 log_err("Could not add signal handler");
3417                 ub_event_free(entry->ev);
3418                 free(entry);
3419                 return 0;
3420         }
3421         /* link into list */
3422         entry->next = comsig->ev_signal;
3423         comsig->ev_signal = entry;
3424         return 1;
3425 }
3426
3427 void 
3428 comm_signal_delete(struct comm_signal* comsig)
3429 {
3430         struct internal_signal* p, *np;
3431         if(!comsig)
3432                 return;
3433         p=comsig->ev_signal;
3434         while(p) {
3435                 np = p->next;
3436                 ub_signal_del(p->ev);
3437                 ub_event_free(p->ev);
3438                 free(p);
3439                 p = np;
3440         }
3441         free(comsig);
3442 }