]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/util/netevent.c
Upgrade Unbound to 1.7.0. More to follow.
[FreeBSD/FreeBSD.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/ub_event.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/fptr_wlist.h"
47 #include "sldns/pkthdr.h"
48 #include "sldns/sbuffer.h"
49 #include "dnstap/dnstap.h"
50 #include "dnscrypt/dnscrypt.h"
51 #ifdef HAVE_OPENSSL_SSL_H
52 #include <openssl/ssl.h>
53 #endif
54 #ifdef HAVE_OPENSSL_ERR_H
55 #include <openssl/err.h>
56 #endif
57
58 /* -------- Start of local definitions -------- */
59 /** if CMSG_ALIGN is not defined on this platform, a workaround */
60 #ifndef CMSG_ALIGN
61 #  ifdef __CMSG_ALIGN
62 #    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
63 #  elif defined(CMSG_DATA_ALIGN)
64 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
65 #  else
66 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
67 #  endif
68 #endif
69
70 /** if CMSG_LEN is not defined on this platform, a workaround */
71 #ifndef CMSG_LEN
72 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
73 #endif
74
75 /** if CMSG_SPACE is not defined on this platform, a workaround */
76 #ifndef CMSG_SPACE
77 #  ifdef _CMSG_HDR_ALIGN
78 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
79 #  else
80 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
81 #  endif
82 #endif
83
84 /** The TCP reading or writing query timeout in milliseconds */
85 #define TCP_QUERY_TIMEOUT 120000
86 /** The TCP timeout in msec for fast queries, above half are used */
87 #define TCP_QUERY_TIMEOUT_FAST 200
88
89 #ifndef NONBLOCKING_IS_BROKEN
90 /** number of UDP reads to perform per read indication from select */
91 #define NUM_UDP_PER_SELECT 100
92 #else
93 #define NUM_UDP_PER_SELECT 1
94 #endif
95
96 /**
97  * The internal event structure for keeping ub_event info for the event.
98  * Possibly other structures (list, tree) this is part of.
99  */
100 struct internal_event {
101         /** the comm base */
102         struct comm_base* base;
103         /** ub_event event type */
104         struct ub_event* ev;
105 };
106
107 /**
108  * Internal base structure, so that every thread has its own events.
109  */
110 struct internal_base {
111         /** ub_event event_base type. */
112         struct ub_event_base* base;
113         /** seconds time pointer points here */
114         time_t secs;
115         /** timeval with current time */
116         struct timeval now;
117         /** the event used for slow_accept timeouts */
118         struct ub_event* slow_accept;
119         /** true if slow_accept is enabled */
120         int slow_accept_enabled;
121 };
122
123 /**
124  * Internal timer structure, to store timer event in.
125  */
126 struct internal_timer {
127         /** the super struct from which derived */
128         struct comm_timer super;
129         /** the comm base */
130         struct comm_base* base;
131         /** ub_event event type */
132         struct ub_event* ev;
133         /** is timer enabled */
134         uint8_t enabled;
135 };
136
137 /**
138  * Internal signal structure, to store signal event in.
139  */
140 struct internal_signal {
141         /** ub_event event type */
142         struct ub_event* ev;
143         /** next in signal list */
144         struct internal_signal* next;
145 };
146
147 /** create a tcp handler with a parent */
148 static struct comm_point* comm_point_create_tcp_handler(
149         struct comm_base *base, struct comm_point* parent, size_t bufsize,
150         comm_point_callback_type* callback, void* callback_arg);
151
152 /* -------- End of local definitions -------- */
153
154 struct comm_base* 
155 comm_base_create(int sigs)
156 {
157         struct comm_base* b = (struct comm_base*)calloc(1,
158                 sizeof(struct comm_base));
159         const char *evnm="event", *evsys="", *evmethod="";
160
161         if(!b)
162                 return NULL;
163         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
164         if(!b->eb) {
165                 free(b);
166                 return NULL;
167         }
168         b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
169         if(!b->eb->base) {
170                 free(b->eb);
171                 free(b);
172                 return NULL;
173         }
174         ub_comm_base_now(b);
175         ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
176         verbose(VERB_ALGO, "%s %s user %s method.", evnm, evsys, evmethod);
177         return b;
178 }
179
180 struct comm_base*
181 comm_base_create_event(struct ub_event_base* base)
182 {
183         struct comm_base* b = (struct comm_base*)calloc(1,
184                 sizeof(struct comm_base));
185         if(!b)
186                 return NULL;
187         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
188         if(!b->eb) {
189                 free(b);
190                 return NULL;
191         }
192         b->eb->base = base;
193         ub_comm_base_now(b);
194         return b;
195 }
196
197 void 
198 comm_base_delete(struct comm_base* b)
199 {
200         if(!b)
201                 return;
202         if(b->eb->slow_accept_enabled) {
203                 if(ub_event_del(b->eb->slow_accept) != 0) {
204                         log_err("could not event_del slow_accept");
205                 }
206                 ub_event_free(b->eb->slow_accept);
207         }
208         ub_event_base_free(b->eb->base);
209         b->eb->base = NULL;
210         free(b->eb);
211         free(b);
212 }
213
214 void 
215 comm_base_delete_no_base(struct comm_base* b)
216 {
217         if(!b)
218                 return;
219         if(b->eb->slow_accept_enabled) {
220                 if(ub_event_del(b->eb->slow_accept) != 0) {
221                         log_err("could not event_del slow_accept");
222                 }
223                 ub_event_free(b->eb->slow_accept);
224         }
225         b->eb->base = NULL;
226         free(b->eb);
227         free(b);
228 }
229
230 void 
231 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
232 {
233         *tt = &b->eb->secs;
234         *tv = &b->eb->now;
235 }
236
237 void 
238 comm_base_dispatch(struct comm_base* b)
239 {
240         int retval;
241         retval = ub_event_base_dispatch(b->eb->base);
242         if(retval < 0) {
243                 fatal_exit("event_dispatch returned error %d, "
244                         "errno is %s", retval, strerror(errno));
245         }
246 }
247
248 void comm_base_exit(struct comm_base* b)
249 {
250         if(ub_event_base_loopexit(b->eb->base) != 0) {
251                 log_err("Could not loopexit");
252         }
253 }
254
255 void comm_base_set_slow_accept_handlers(struct comm_base* b,
256         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
257 {
258         b->stop_accept = stop_acc;
259         b->start_accept = start_acc;
260         b->cb_arg = arg;
261 }
262
263 struct ub_event_base* comm_base_internal(struct comm_base* b)
264 {
265         return b->eb->base;
266 }
267
268 /** see if errno for udp has to be logged or not uses globals */
269 static int
270 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
271 {
272         /* do not log transient errors (unless high verbosity) */
273 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
274         switch(errno) {
275 #  ifdef ENETUNREACH
276                 case ENETUNREACH:
277 #  endif
278 #  ifdef EHOSTDOWN
279                 case EHOSTDOWN:
280 #  endif
281 #  ifdef EHOSTUNREACH
282                 case EHOSTUNREACH:
283 #  endif
284 #  ifdef ENETDOWN
285                 case ENETDOWN:
286 #  endif
287                         if(verbosity < VERB_ALGO)
288                                 return 0;
289                 default:
290                         break;
291         }
292 #endif
293         /* permission denied is gotten for every send if the
294          * network is disconnected (on some OS), squelch it */
295         if( ((errno == EPERM)
296 #  ifdef EADDRNOTAVAIL
297                 /* 'Cannot assign requested address' also when disconnected */
298                 || (errno == EADDRNOTAVAIL)
299 #  endif
300                 ) && verbosity < VERB_DETAIL)
301                 return 0;
302 #  ifdef EADDRINUSE
303         /* If SO_REUSEADDR is set, we could try to connect to the same server
304          * from the same source port twice. */
305         if(errno == EADDRINUSE && verbosity < VERB_DETAIL)
306                 return 0;
307 #  endif
308         /* squelch errors where people deploy AAAA ::ffff:bla for
309          * authority servers, which we try for intranets. */
310         if(errno == EINVAL && addr_is_ip4mapped(
311                 (struct sockaddr_storage*)addr, addrlen) &&
312                 verbosity < VERB_DETAIL)
313                 return 0;
314         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
315          * but a dns cache does not need it. */
316         if(errno == EACCES && addr_is_broadcast(
317                 (struct sockaddr_storage*)addr, addrlen) &&
318                 verbosity < VERB_DETAIL)
319                 return 0;
320         return 1;
321 }
322
323 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
324 {
325         return udp_send_errno_needs_log(addr, addrlen);
326 }
327
328 /* send a UDP reply */
329 int
330 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
331         struct sockaddr* addr, socklen_t addrlen) 
332 {
333         ssize_t sent;
334         log_assert(c->fd != -1);
335 #ifdef UNBOUND_DEBUG
336         if(sldns_buffer_remaining(packet) == 0)
337                 log_err("error: send empty UDP packet");
338 #endif
339         log_assert(addr && addrlen > 0);
340         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
341                 sldns_buffer_remaining(packet), 0,
342                 addr, addrlen);
343         if(sent == -1) {
344                 /* try again and block, waiting for IO to complete,
345                  * we want to send the answer, and we will wait for
346                  * the ethernet interface buffer to have space. */
347 #ifndef USE_WINSOCK
348                 if(errno == EAGAIN || 
349 #  ifdef EWOULDBLOCK
350                         errno == EWOULDBLOCK ||
351 #  endif
352                         errno == ENOBUFS) {
353 #else
354                 if(WSAGetLastError() == WSAEINPROGRESS ||
355                         WSAGetLastError() == WSAENOBUFS ||
356                         WSAGetLastError() == WSAEWOULDBLOCK) {
357 #endif
358                         int e;
359                         fd_set_block(c->fd);
360                         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
361                                 sldns_buffer_remaining(packet), 0,
362                                 addr, addrlen);
363                         e = errno;
364                         fd_set_nonblock(c->fd);
365                         errno = e;
366                 }
367         }
368         if(sent == -1) {
369                 if(!udp_send_errno_needs_log(addr, addrlen))
370                         return 0;
371 #ifndef USE_WINSOCK
372                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
373 #else
374                 verbose(VERB_OPS, "sendto failed: %s", 
375                         wsa_strerror(WSAGetLastError()));
376 #endif
377                 log_addr(VERB_OPS, "remote address is", 
378                         (struct sockaddr_storage*)addr, addrlen);
379                 return 0;
380         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
381                 log_err("sent %d in place of %d bytes", 
382                         (int)sent, (int)sldns_buffer_remaining(packet));
383                 return 0;
384         }
385         return 1;
386 }
387
388 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
389 /** print debug ancillary info */
390 static void p_ancil(const char* str, struct comm_reply* r)
391 {
392         if(r->srctype != 4 && r->srctype != 6) {
393                 log_info("%s: unknown srctype %d", str, r->srctype);
394                 return;
395         }
396         if(r->srctype == 6) {
397                 char buf[1024];
398                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
399                         buf, (socklen_t)sizeof(buf)) == 0) {
400                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
401                 }
402                 buf[sizeof(buf)-1]=0;
403                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
404         } else if(r->srctype == 4) {
405 #ifdef IP_PKTINFO
406                 char buf1[1024], buf2[1024];
407                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
408                         buf1, (socklen_t)sizeof(buf1)) == 0) {
409                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
410                 }
411                 buf1[sizeof(buf1)-1]=0;
412 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
413                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
414                         buf2, (socklen_t)sizeof(buf2)) == 0) {
415                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
416                 }
417                 buf2[sizeof(buf2)-1]=0;
418 #else
419                 buf2[0]=0;
420 #endif
421                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
422                         buf1, buf2);
423 #elif defined(IP_RECVDSTADDR)
424                 char buf1[1024];
425                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
426                         buf1, (socklen_t)sizeof(buf1)) == 0) {
427                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
428                 }
429                 buf1[sizeof(buf1)-1]=0;
430                 log_info("%s: %s", str, buf1);
431 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
432         }
433 }
434 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
435
436 /** send a UDP reply over specified interface*/
437 static int
438 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
439         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
440 {
441 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
442         ssize_t sent;
443         struct msghdr msg;
444         struct iovec iov[1];
445         char control[256];
446 #ifndef S_SPLINT_S
447         struct cmsghdr *cmsg;
448 #endif /* S_SPLINT_S */
449
450         log_assert(c->fd != -1);
451 #ifdef UNBOUND_DEBUG
452         if(sldns_buffer_remaining(packet) == 0)
453                 log_err("error: send empty UDP packet");
454 #endif
455         log_assert(addr && addrlen > 0);
456
457         msg.msg_name = addr;
458         msg.msg_namelen = addrlen;
459         iov[0].iov_base = sldns_buffer_begin(packet);
460         iov[0].iov_len = sldns_buffer_remaining(packet);
461         msg.msg_iov = iov;
462         msg.msg_iovlen = 1;
463         msg.msg_control = control;
464 #ifndef S_SPLINT_S
465         msg.msg_controllen = sizeof(control);
466 #endif /* S_SPLINT_S */
467         msg.msg_flags = 0;
468
469 #ifndef S_SPLINT_S
470         cmsg = CMSG_FIRSTHDR(&msg);
471         if(r->srctype == 4) {
472 #ifdef IP_PKTINFO
473                 void* cmsg_data;
474                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
475                 log_assert(msg.msg_controllen <= sizeof(control));
476                 cmsg->cmsg_level = IPPROTO_IP;
477                 cmsg->cmsg_type = IP_PKTINFO;
478                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
479                         sizeof(struct in_pktinfo));
480                 /* unset the ifindex to not bypass the routing tables */
481                 cmsg_data = CMSG_DATA(cmsg);
482                 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
483                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
484 #elif defined(IP_SENDSRCADDR)
485                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
486                 log_assert(msg.msg_controllen <= sizeof(control));
487                 cmsg->cmsg_level = IPPROTO_IP;
488                 cmsg->cmsg_type = IP_SENDSRCADDR;
489                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
490                         sizeof(struct in_addr));
491                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
492 #else
493                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
494                 msg.msg_control = NULL;
495 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
496         } else if(r->srctype == 6) {
497                 void* cmsg_data;
498                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
499                 log_assert(msg.msg_controllen <= sizeof(control));
500                 cmsg->cmsg_level = IPPROTO_IPV6;
501                 cmsg->cmsg_type = IPV6_PKTINFO;
502                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
503                         sizeof(struct in6_pktinfo));
504                 /* unset the ifindex to not bypass the routing tables */
505                 cmsg_data = CMSG_DATA(cmsg);
506                 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
507                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
508         } else {
509                 /* try to pass all 0 to use default route */
510                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
511                 log_assert(msg.msg_controllen <= sizeof(control));
512                 cmsg->cmsg_level = IPPROTO_IPV6;
513                 cmsg->cmsg_type = IPV6_PKTINFO;
514                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
515                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
516         }
517 #endif /* S_SPLINT_S */
518         if(verbosity >= VERB_ALGO)
519                 p_ancil("send_udp over interface", r);
520         sent = sendmsg(c->fd, &msg, 0);
521         if(sent == -1) {
522                 /* try again and block, waiting for IO to complete,
523                  * we want to send the answer, and we will wait for
524                  * the ethernet interface buffer to have space. */
525 #ifndef USE_WINSOCK
526                 if(errno == EAGAIN || 
527 #  ifdef EWOULDBLOCK
528                         errno == EWOULDBLOCK ||
529 #  endif
530                         errno == ENOBUFS) {
531 #else
532                 if(WSAGetLastError() == WSAEINPROGRESS ||
533                         WSAGetLastError() == WSAENOBUFS ||
534                         WSAGetLastError() == WSAEWOULDBLOCK) {
535 #endif
536                         int e;
537                         fd_set_block(c->fd);
538                         sent = sendmsg(c->fd, &msg, 0);
539                         e = errno;
540                         fd_set_nonblock(c->fd);
541                         errno = e;
542                 }
543         }
544         if(sent == -1) {
545                 if(!udp_send_errno_needs_log(addr, addrlen))
546                         return 0;
547                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
548                 log_addr(VERB_OPS, "remote address is", 
549                         (struct sockaddr_storage*)addr, addrlen);
550 #ifdef __NetBSD__
551                 /* netbsd 7 has IP_PKTINFO for recv but not send */
552                 if(errno == EINVAL && r->srctype == 4)
553                         log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
554                                 "Please disable interface-automatic");
555 #endif
556                 return 0;
557         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
558                 log_err("sent %d in place of %d bytes", 
559                         (int)sent, (int)sldns_buffer_remaining(packet));
560                 return 0;
561         }
562         return 1;
563 #else
564         (void)c;
565         (void)packet;
566         (void)addr;
567         (void)addrlen;
568         (void)r;
569         log_err("sendmsg: IPV6_PKTINFO not supported");
570         return 0;
571 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
572 }
573
574 void 
575 comm_point_udp_ancil_callback(int fd, short event, void* arg)
576 {
577 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
578         struct comm_reply rep;
579         struct msghdr msg;
580         struct iovec iov[1];
581         ssize_t rcv;
582         char ancil[256];
583         int i;
584 #ifndef S_SPLINT_S
585         struct cmsghdr* cmsg;
586 #endif /* S_SPLINT_S */
587
588         rep.c = (struct comm_point*)arg;
589         log_assert(rep.c->type == comm_udp);
590
591         if(!(event&UB_EV_READ))
592                 return;
593         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
594         ub_comm_base_now(rep.c->ev->base);
595         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
596                 sldns_buffer_clear(rep.c->buffer);
597                 rep.addrlen = (socklen_t)sizeof(rep.addr);
598                 log_assert(fd != -1);
599                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
600                 msg.msg_name = &rep.addr;
601                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
602                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
603                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
604                 msg.msg_iov = iov;
605                 msg.msg_iovlen = 1;
606                 msg.msg_control = ancil;
607 #ifndef S_SPLINT_S
608                 msg.msg_controllen = sizeof(ancil);
609 #endif /* S_SPLINT_S */
610                 msg.msg_flags = 0;
611                 rcv = recvmsg(fd, &msg, 0);
612                 if(rcv == -1) {
613                         if(errno != EAGAIN && errno != EINTR) {
614                                 log_err("recvmsg failed: %s", strerror(errno));
615                         }
616                         return;
617                 }
618                 rep.addrlen = msg.msg_namelen;
619                 sldns_buffer_skip(rep.c->buffer, rcv);
620                 sldns_buffer_flip(rep.c->buffer);
621                 rep.srctype = 0;
622 #ifndef S_SPLINT_S
623                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
624                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
625                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
626                                 cmsg->cmsg_type == IPV6_PKTINFO) {
627                                 rep.srctype = 6;
628                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
629                                         sizeof(struct in6_pktinfo));
630                                 break;
631 #ifdef IP_PKTINFO
632                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
633                                 cmsg->cmsg_type == IP_PKTINFO) {
634                                 rep.srctype = 4;
635                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
636                                         sizeof(struct in_pktinfo));
637                                 break;
638 #elif defined(IP_RECVDSTADDR)
639                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
640                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
641                                 rep.srctype = 4;
642                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
643                                         sizeof(struct in_addr));
644                                 break;
645 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
646                         }
647                 }
648                 if(verbosity >= VERB_ALGO)
649                         p_ancil("receive_udp on interface", &rep);
650 #endif /* S_SPLINT_S */
651                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
652                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
653                         /* send back immediate reply */
654                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
655                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
656                 }
657                 if(!rep.c || rep.c->fd == -1) /* commpoint closed */
658                         break;
659         }
660 #else
661         (void)fd;
662         (void)event;
663         (void)arg;
664         fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. "
665                 "Please disable interface-automatic");
666 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
667 }
668
669 void 
670 comm_point_udp_callback(int fd, short event, void* arg)
671 {
672         struct comm_reply rep;
673         ssize_t rcv;
674         int i;
675         struct sldns_buffer *buffer;
676
677         rep.c = (struct comm_point*)arg;
678         log_assert(rep.c->type == comm_udp);
679
680         if(!(event&UB_EV_READ))
681                 return;
682         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
683         ub_comm_base_now(rep.c->ev->base);
684         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
685                 sldns_buffer_clear(rep.c->buffer);
686                 rep.addrlen = (socklen_t)sizeof(rep.addr);
687                 log_assert(fd != -1);
688                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
689                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
690                         sldns_buffer_remaining(rep.c->buffer), 0, 
691                         (struct sockaddr*)&rep.addr, &rep.addrlen);
692                 if(rcv == -1) {
693 #ifndef USE_WINSOCK
694                         if(errno != EAGAIN && errno != EINTR)
695                                 log_err("recvfrom %d failed: %s", 
696                                         fd, strerror(errno));
697 #else
698                         if(WSAGetLastError() != WSAEINPROGRESS &&
699                                 WSAGetLastError() != WSAECONNRESET &&
700                                 WSAGetLastError()!= WSAEWOULDBLOCK)
701                                 log_err("recvfrom failed: %s",
702                                         wsa_strerror(WSAGetLastError()));
703 #endif
704                         return;
705                 }
706                 sldns_buffer_skip(rep.c->buffer, rcv);
707                 sldns_buffer_flip(rep.c->buffer);
708                 rep.srctype = 0;
709                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
710                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
711                         /* send back immediate reply */
712 #ifdef USE_DNSCRYPT
713                         buffer = rep.c->dnscrypt_buffer;
714 #else
715                         buffer = rep.c->buffer;
716 #endif
717                         (void)comm_point_send_udp_msg(rep.c, buffer,
718                                 (struct sockaddr*)&rep.addr, rep.addrlen);
719                 }
720                 if(!rep.c || rep.c->fd != fd) /* commpoint closed to -1 or reused for
721                 another UDP port. Note rep.c cannot be reused with TCP fd. */
722                         break;
723         }
724 }
725
726 /** Use a new tcp handler for new query fd, set to read query */
727 static void
728 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 
729 {
730         log_assert(c->type == comm_tcp);
731         log_assert(c->fd == -1);
732         sldns_buffer_clear(c->buffer);
733 #ifdef USE_DNSCRYPT
734         if (c->dnscrypt)
735                 sldns_buffer_clear(c->dnscrypt_buffer);
736 #endif
737         c->tcp_is_reading = 1;
738         c->tcp_byte_count = 0;
739         c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
740         /* if more than half the tcp handlers are in use, use a shorter
741          * timeout for this TCP connection, we need to make space for
742          * other connections to be able to get attention */
743         if(cur > max/2)
744                 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT_FAST;
745         comm_point_start_listening(c, fd, c->tcp_timeout_msec);
746 }
747
748 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
749         short ATTR_UNUSED(event), void* arg)
750 {
751         struct comm_base* b = (struct comm_base*)arg;
752         /* timeout for the slow accept, re-enable accepts again */
753         if(b->start_accept) {
754                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
755                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
756                 (*b->start_accept)(b->cb_arg);
757                 b->eb->slow_accept_enabled = 0;
758         }
759 }
760
761 int comm_point_perform_accept(struct comm_point* c,
762         struct sockaddr_storage* addr, socklen_t* addrlen)
763 {
764         int new_fd;
765         *addrlen = (socklen_t)sizeof(*addr);
766         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
767         if(new_fd == -1) {
768 #ifndef USE_WINSOCK
769                 /* EINTR is signal interrupt. others are closed connection. */
770                 if(     errno == EINTR || errno == EAGAIN
771 #ifdef EWOULDBLOCK
772                         || errno == EWOULDBLOCK 
773 #endif
774 #ifdef ECONNABORTED
775                         || errno == ECONNABORTED 
776 #endif
777 #ifdef EPROTO
778                         || errno == EPROTO
779 #endif /* EPROTO */
780                         )
781                         return -1;
782 #if defined(ENFILE) && defined(EMFILE)
783                 if(errno == ENFILE || errno == EMFILE) {
784                         /* out of file descriptors, likely outside of our
785                          * control. stop accept() calls for some time */
786                         if(c->ev->base->stop_accept) {
787                                 struct comm_base* b = c->ev->base;
788                                 struct timeval tv;
789                                 verbose(VERB_ALGO, "out of file descriptors: "
790                                         "slow accept");
791                                 b->eb->slow_accept_enabled = 1;
792                                 fptr_ok(fptr_whitelist_stop_accept(
793                                         b->stop_accept));
794                                 (*b->stop_accept)(b->cb_arg);
795                                 /* set timeout, no mallocs */
796                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
797                                 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
798                                 b->eb->slow_accept = ub_event_new(b->eb->base,
799                                         -1, UB_EV_TIMEOUT,
800                                         comm_base_handle_slow_accept, b);
801                                 if(b->eb->slow_accept == NULL) {
802                                         /* we do not want to log here, because
803                                          * that would spam the logfiles.
804                                          * error: "event_base_set failed." */
805                                 }
806                                 else if(ub_event_add(b->eb->slow_accept, &tv)
807                                         != 0) {
808                                         /* we do not want to log here,
809                                          * error: "event_add failed." */
810                                 }
811                         }
812                         return -1;
813                 }
814 #endif
815                 log_err_addr("accept failed", strerror(errno), addr, *addrlen);
816 #else /* USE_WINSOCK */
817                 if(WSAGetLastError() == WSAEINPROGRESS ||
818                         WSAGetLastError() == WSAECONNRESET)
819                         return -1;
820                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
821                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
822                         return -1;
823                 }
824                 log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
825                         addr, *addrlen);
826 #endif
827                 return -1;
828         }
829         fd_set_nonblock(new_fd);
830         return new_fd;
831 }
832
833 #ifdef USE_WINSOCK
834 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
835         int ATTR_UNUSED(argi), long argl, long retvalue)
836 {
837         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
838                 (oper&BIO_CB_RETURN)?"return":"before",
839                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
840                 WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
841         /* on windows, check if previous operation caused EWOULDBLOCK */
842         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
843                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
844                 if(WSAGetLastError() == WSAEWOULDBLOCK)
845                         ub_winsock_tcp_wouldblock((struct ub_event*)
846                                 BIO_get_callback_arg(b), UB_EV_READ);
847         }
848         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
849                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
850                 if(WSAGetLastError() == WSAEWOULDBLOCK)
851                         ub_winsock_tcp_wouldblock((struct ub_event*)
852                                 BIO_get_callback_arg(b), UB_EV_WRITE);
853         }
854         /* return original return value */
855         return retvalue;
856 }
857
858 /** set win bio callbacks for nonblocking operations */
859 void
860 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
861 {
862         SSL* ssl = (SSL*)thessl;
863         /* set them both just in case, but usually they are the same BIO */
864         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
865         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
866         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
867         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
868 }
869 #endif
870
871 void 
872 comm_point_tcp_accept_callback(int fd, short event, void* arg)
873 {
874         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
875         int new_fd;
876         log_assert(c->type == comm_tcp_accept);
877         if(!(event & UB_EV_READ)) {
878                 log_info("ignoring tcp accept event %d", (int)event);
879                 return;
880         }
881         ub_comm_base_now(c->ev->base);
882         /* find free tcp handler. */
883         if(!c->tcp_free) {
884                 log_warn("accepted too many tcp, connections full");
885                 return;
886         }
887         /* accept incoming connection. */
888         c_hdl = c->tcp_free;
889         log_assert(fd != -1);
890         (void)fd;
891         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
892                 &c_hdl->repinfo.addrlen);
893         if(new_fd == -1)
894                 return;
895         if(c->ssl) {
896                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
897                 if(!c_hdl->ssl) {
898                         c_hdl->fd = new_fd;
899                         comm_point_close(c_hdl);
900                         return;
901                 }
902                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
903 #ifdef USE_WINSOCK
904                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
905 #endif
906         }
907
908         /* grab the tcp handler buffers */
909         c->cur_tcp_count++;
910         c->tcp_free = c_hdl->tcp_free;
911         if(!c->tcp_free) {
912                 /* stop accepting incoming queries for now. */
913                 comm_point_stop_listening(c);
914         }
915         setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
916 }
917
918 /** Make tcp handler free for next assignment */
919 static void
920 reclaim_tcp_handler(struct comm_point* c)
921 {
922         log_assert(c->type == comm_tcp);
923         if(c->ssl) {
924 #ifdef HAVE_SSL
925                 SSL_shutdown(c->ssl);
926                 SSL_free(c->ssl);
927                 c->ssl = NULL;
928 #endif
929         }
930         comm_point_close(c);
931         if(c->tcp_parent) {
932                 c->tcp_parent->cur_tcp_count--;
933                 c->tcp_free = c->tcp_parent->tcp_free;
934                 c->tcp_parent->tcp_free = c;
935                 if(!c->tcp_free) {
936                         /* re-enable listening on accept socket */
937                         comm_point_start_listening(c->tcp_parent, -1, -1);
938                 }
939         }
940 }
941
942 /** do the callback when writing is done */
943 static void
944 tcp_callback_writer(struct comm_point* c)
945 {
946         log_assert(c->type == comm_tcp);
947         sldns_buffer_clear(c->buffer);
948         if(c->tcp_do_toggle_rw)
949                 c->tcp_is_reading = 1;
950         c->tcp_byte_count = 0;
951         /* switch from listening(write) to listening(read) */
952         comm_point_stop_listening(c);
953         comm_point_start_listening(c, -1, -1);
954 }
955
956 /** do the callback when reading is done */
957 static void
958 tcp_callback_reader(struct comm_point* c)
959 {
960         log_assert(c->type == comm_tcp || c->type == comm_local);
961         sldns_buffer_flip(c->buffer);
962         if(c->tcp_do_toggle_rw)
963                 c->tcp_is_reading = 0;
964         c->tcp_byte_count = 0;
965         if(c->type == comm_tcp)
966                 comm_point_stop_listening(c);
967         fptr_ok(fptr_whitelist_comm_point(c->callback));
968         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
969                 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
970         }
971 }
972
973 #ifdef HAVE_SSL
974 /** log certificate details */
975 static void
976 log_cert(unsigned level, const char* str, X509* cert)
977 {
978         BIO* bio;
979         char nul = 0;
980         char* pp = NULL;
981         long len;
982         if(verbosity < level) return;
983         bio = BIO_new(BIO_s_mem());
984         if(!bio) return;
985         X509_print_ex(bio, cert, 0, (unsigned long)-1
986                 ^(X509_FLAG_NO_SUBJECT
987                         |X509_FLAG_NO_ISSUER|X509_FLAG_NO_VALIDITY
988                         |X509_FLAG_NO_EXTENSIONS|X509_FLAG_NO_AUX
989                         |X509_FLAG_NO_ATTRIBUTES));
990         BIO_write(bio, &nul, (int)sizeof(nul));
991         len = BIO_get_mem_data(bio, &pp);
992         if(len != 0 && pp) {
993                 verbose(level, "%s: \n%s", str, pp);
994         }
995         BIO_free(bio);
996 }
997 #endif /* HAVE_SSL */
998
999 /** continue ssl handshake */
1000 #ifdef HAVE_SSL
1001 static int
1002 ssl_handshake(struct comm_point* c)
1003 {
1004         int r;
1005         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
1006                 /* read condition satisfied back to writing */
1007                 comm_point_listen_for_rw(c, 1, 1);
1008                 c->ssl_shake_state = comm_ssl_shake_none;
1009                 return 1;
1010         }
1011         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
1012                 /* write condition satisfied, back to reading */
1013                 comm_point_listen_for_rw(c, 1, 0);
1014                 c->ssl_shake_state = comm_ssl_shake_none;
1015                 return 1;
1016         }
1017
1018         ERR_clear_error();
1019         r = SSL_do_handshake(c->ssl);
1020         if(r != 1) {
1021                 int want = SSL_get_error(c->ssl, r);
1022                 if(want == SSL_ERROR_WANT_READ) {
1023                         if(c->ssl_shake_state == comm_ssl_shake_read)
1024                                 return 1;
1025                         c->ssl_shake_state = comm_ssl_shake_read;
1026                         comm_point_listen_for_rw(c, 1, 0);
1027                         return 1;
1028                 } else if(want == SSL_ERROR_WANT_WRITE) {
1029                         if(c->ssl_shake_state == comm_ssl_shake_write)
1030                                 return 1;
1031                         c->ssl_shake_state = comm_ssl_shake_write;
1032                         comm_point_listen_for_rw(c, 0, 1);
1033                         return 1;
1034                 } else if(r == 0) {
1035                         return 0; /* closed */
1036                 } else if(want == SSL_ERROR_SYSCALL) {
1037                         /* SYSCALL and errno==0 means closed uncleanly */
1038                         if(errno != 0)
1039                                 log_err("SSL_handshake syscall: %s",
1040                                         strerror(errno));
1041                         return 0;
1042                 } else {
1043                         log_crypto_err("ssl handshake failed");
1044                         log_addr(1, "ssl handshake failed", &c->repinfo.addr,
1045                                 c->repinfo.addrlen);
1046                         return 0;
1047                 }
1048         }
1049         /* this is where peer verification could take place */
1050         if((SSL_get_verify_mode(c->ssl)&SSL_VERIFY_PEER)) {
1051                 /* verification */
1052                 if(SSL_get_verify_result(c->ssl) == X509_V_OK) {
1053                         X509* x = SSL_get_peer_certificate(c->ssl);
1054                         if(!x) {
1055                                 log_addr(VERB_ALGO, "SSL connection failed: "
1056                                         "no certificate",
1057                                         &c->repinfo.addr, c->repinfo.addrlen);
1058                                 return 0;
1059                         }
1060                         log_cert(VERB_ALGO, "peer certificate", x);
1061 #ifdef HAVE_SSL_GET0_PEERNAME
1062                         if(SSL_get0_peername(c->ssl)) {
1063                                 char buf[255];
1064                                 snprintf(buf, sizeof(buf), "SSL connection "
1065                                         "to %s authenticated",
1066                                         SSL_get0_peername(c->ssl));
1067                                 log_addr(VERB_ALGO, buf, &c->repinfo.addr,
1068                                         c->repinfo.addrlen);
1069                         } else {
1070 #endif
1071                                 log_addr(VERB_ALGO, "SSL connection "
1072                                         "authenticated", &c->repinfo.addr,
1073                                         c->repinfo.addrlen);
1074 #ifdef HAVE_SSL_GET0_PEERNAME
1075                         }
1076 #endif
1077                         X509_free(x);
1078                 } else {
1079                         X509* x = SSL_get_peer_certificate(c->ssl);
1080                         if(x) {
1081                                 log_cert(VERB_ALGO, "peer certificate", x);
1082                                 X509_free(x);
1083                         }
1084                         log_addr(VERB_ALGO, "SSL connection failed: "
1085                                 "failed to authenticate",
1086                                 &c->repinfo.addr, c->repinfo.addrlen);
1087                         return 0;
1088                 }
1089         } else {
1090                 /* unauthenticated, the verify peer flag was not set
1091                  * in c->ssl when the ssl object was created from ssl_ctx */
1092                 log_addr(VERB_ALGO, "SSL connection", &c->repinfo.addr,
1093                         c->repinfo.addrlen);
1094         }
1095
1096         /* setup listen rw correctly */
1097         if(c->tcp_is_reading) {
1098                 if(c->ssl_shake_state != comm_ssl_shake_read)
1099                         comm_point_listen_for_rw(c, 1, 0);
1100         } else {
1101                 comm_point_listen_for_rw(c, 1, 1);
1102         }
1103         c->ssl_shake_state = comm_ssl_shake_none;
1104         return 1;
1105 }
1106 #endif /* HAVE_SSL */
1107
1108 /** ssl read callback on TCP */
1109 static int
1110 ssl_handle_read(struct comm_point* c)
1111 {
1112 #ifdef HAVE_SSL
1113         int r;
1114         if(c->ssl_shake_state != comm_ssl_shake_none) {
1115                 if(!ssl_handshake(c))
1116                         return 0;
1117                 if(c->ssl_shake_state != comm_ssl_shake_none)
1118                         return 1;
1119         }
1120         if(c->tcp_byte_count < sizeof(uint16_t)) {
1121                 /* read length bytes */
1122                 ERR_clear_error();
1123                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1124                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1125                         c->tcp_byte_count))) <= 0) {
1126                         int want = SSL_get_error(c->ssl, r);
1127                         if(want == SSL_ERROR_ZERO_RETURN) {
1128                                 return 0; /* shutdown, closed */
1129                         } else if(want == SSL_ERROR_WANT_READ) {
1130                                 return 1; /* read more later */
1131                         } else if(want == SSL_ERROR_WANT_WRITE) {
1132                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1133                                 comm_point_listen_for_rw(c, 0, 1);
1134                                 return 1;
1135                         } else if(want == SSL_ERROR_SYSCALL) {
1136                                 if(errno != 0)
1137                                         log_err("SSL_read syscall: %s",
1138                                                 strerror(errno));
1139                                 return 0;
1140                         }
1141                         log_crypto_err("could not SSL_read");
1142                         return 0;
1143                 }
1144                 c->tcp_byte_count += r;
1145                 if(c->tcp_byte_count != sizeof(uint16_t))
1146                         return 1;
1147                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1148                         sldns_buffer_capacity(c->buffer)) {
1149                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1150                         return 0;
1151                 }
1152                 sldns_buffer_set_limit(c->buffer,
1153                         sldns_buffer_read_u16_at(c->buffer, 0));
1154                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1155                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1156                         return 0;
1157                 }
1158                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1159                         (int)sldns_buffer_limit(c->buffer));
1160         }
1161         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1162         ERR_clear_error();
1163         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1164                 (int)sldns_buffer_remaining(c->buffer));
1165         if(r <= 0) {
1166                 int want = SSL_get_error(c->ssl, r);
1167                 if(want == SSL_ERROR_ZERO_RETURN) {
1168                         return 0; /* shutdown, closed */
1169                 } else if(want == SSL_ERROR_WANT_READ) {
1170                         return 1; /* read more later */
1171                 } else if(want == SSL_ERROR_WANT_WRITE) {
1172                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1173                         comm_point_listen_for_rw(c, 0, 1);
1174                         return 1;
1175                 } else if(want == SSL_ERROR_SYSCALL) {
1176                         if(errno != 0)
1177                                 log_err("SSL_read syscall: %s",
1178                                         strerror(errno));
1179                         return 0;
1180                 }
1181                 log_crypto_err("could not SSL_read");
1182                 return 0;
1183         }
1184         sldns_buffer_skip(c->buffer, (ssize_t)r);
1185         if(sldns_buffer_remaining(c->buffer) <= 0) {
1186                 tcp_callback_reader(c);
1187         }
1188         return 1;
1189 #else
1190         (void)c;
1191         return 0;
1192 #endif /* HAVE_SSL */
1193 }
1194
1195 /** ssl write callback on TCP */
1196 static int
1197 ssl_handle_write(struct comm_point* c)
1198 {
1199 #ifdef HAVE_SSL
1200         int r;
1201         if(c->ssl_shake_state != comm_ssl_shake_none) {
1202                 if(!ssl_handshake(c))
1203                         return 0;
1204                 if(c->ssl_shake_state != comm_ssl_shake_none)
1205                         return 1;
1206         }
1207         /* ignore return, if fails we may simply block */
1208         (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1209         if(c->tcp_byte_count < sizeof(uint16_t)) {
1210                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1211                 ERR_clear_error();
1212                 r = SSL_write(c->ssl,
1213                         (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1214                         (int)(sizeof(uint16_t)-c->tcp_byte_count));
1215                 if(r <= 0) {
1216                         int want = SSL_get_error(c->ssl, r);
1217                         if(want == SSL_ERROR_ZERO_RETURN) {
1218                                 return 0; /* closed */
1219                         } else if(want == SSL_ERROR_WANT_READ) {
1220                                 c->ssl_shake_state = comm_ssl_shake_read;
1221                                 comm_point_listen_for_rw(c, 1, 0);
1222                                 return 1; /* wait for read condition */
1223                         } else if(want == SSL_ERROR_WANT_WRITE) {
1224                                 return 1; /* write more later */
1225                         } else if(want == SSL_ERROR_SYSCALL) {
1226                                 if(errno != 0)
1227                                         log_err("SSL_write syscall: %s",
1228                                                 strerror(errno));
1229                                 return 0;
1230                         }
1231                         log_crypto_err("could not SSL_write");
1232                         return 0;
1233                 }
1234                 c->tcp_byte_count += r;
1235                 if(c->tcp_byte_count < sizeof(uint16_t))
1236                         return 1;
1237                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1238                         sizeof(uint16_t));
1239                 if(sldns_buffer_remaining(c->buffer) == 0) {
1240                         tcp_callback_writer(c);
1241                         return 1;
1242                 }
1243         }
1244         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1245         ERR_clear_error();
1246         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1247                 (int)sldns_buffer_remaining(c->buffer));
1248         if(r <= 0) {
1249                 int want = SSL_get_error(c->ssl, r);
1250                 if(want == SSL_ERROR_ZERO_RETURN) {
1251                         return 0; /* closed */
1252                 } else if(want == SSL_ERROR_WANT_READ) {
1253                         c->ssl_shake_state = comm_ssl_shake_read;
1254                         comm_point_listen_for_rw(c, 1, 0);
1255                         return 1; /* wait for read condition */
1256                 } else if(want == SSL_ERROR_WANT_WRITE) {
1257                         return 1; /* write more later */
1258                 } else if(want == SSL_ERROR_SYSCALL) {
1259                         if(errno != 0)
1260                                 log_err("SSL_write syscall: %s",
1261                                         strerror(errno));
1262                         return 0;
1263                 }
1264                 log_crypto_err("could not SSL_write");
1265                 return 0;
1266         }
1267         sldns_buffer_skip(c->buffer, (ssize_t)r);
1268
1269         if(sldns_buffer_remaining(c->buffer) == 0) {
1270                 tcp_callback_writer(c);
1271         }
1272         return 1;
1273 #else
1274         (void)c;
1275         return 0;
1276 #endif /* HAVE_SSL */
1277 }
1278
1279 /** handle ssl tcp connection with dns contents */
1280 static int
1281 ssl_handle_it(struct comm_point* c)
1282 {
1283         if(c->tcp_is_reading)
1284                 return ssl_handle_read(c);
1285         return ssl_handle_write(c);
1286 }
1287
1288 /** Handle tcp reading callback. 
1289  * @param fd: file descriptor of socket.
1290  * @param c: comm point to read from into buffer.
1291  * @param short_ok: if true, very short packets are OK (for comm_local).
1292  * @return: 0 on error 
1293  */
1294 static int
1295 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1296 {
1297         ssize_t r;
1298         log_assert(c->type == comm_tcp || c->type == comm_local);
1299         if(c->ssl)
1300                 return ssl_handle_it(c);
1301         if(!c->tcp_is_reading)
1302                 return 0;
1303
1304         log_assert(fd != -1);
1305         if(c->tcp_byte_count < sizeof(uint16_t)) {
1306                 /* read length bytes */
1307                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1308                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1309                 if(r == 0)
1310                         return 0;
1311                 else if(r == -1) {
1312 #ifndef USE_WINSOCK
1313                         if(errno == EINTR || errno == EAGAIN)
1314                                 return 1;
1315 #ifdef ECONNRESET
1316                         if(errno == ECONNRESET && verbosity < 2)
1317                                 return 0; /* silence reset by peer */
1318 #endif
1319                         log_err_addr("read (in tcp s)", strerror(errno),
1320                                 &c->repinfo.addr, c->repinfo.addrlen);
1321 #else /* USE_WINSOCK */
1322                         if(WSAGetLastError() == WSAECONNRESET)
1323                                 return 0;
1324                         if(WSAGetLastError() == WSAEINPROGRESS)
1325                                 return 1;
1326                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1327                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1328                                         UB_EV_READ);
1329                                 return 1;
1330                         }
1331                         log_err_addr("read (in tcp s)", 
1332                                 wsa_strerror(WSAGetLastError()),
1333                                 &c->repinfo.addr, c->repinfo.addrlen);
1334 #endif
1335                         return 0;
1336                 } 
1337                 c->tcp_byte_count += r;
1338                 if(c->tcp_byte_count != sizeof(uint16_t))
1339                         return 1;
1340                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1341                         sldns_buffer_capacity(c->buffer)) {
1342                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1343                         return 0;
1344                 }
1345                 sldns_buffer_set_limit(c->buffer, 
1346                         sldns_buffer_read_u16_at(c->buffer, 0));
1347                 if(!short_ok && 
1348                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1349                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1350                         return 0;
1351                 }
1352                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1353                         (int)sldns_buffer_limit(c->buffer));
1354         }
1355
1356         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1357         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1358                 sldns_buffer_remaining(c->buffer), 0);
1359         if(r == 0) {
1360                 return 0;
1361         } else if(r == -1) {
1362 #ifndef USE_WINSOCK
1363                 if(errno == EINTR || errno == EAGAIN)
1364                         return 1;
1365                 log_err_addr("read (in tcp r)", strerror(errno),
1366                         &c->repinfo.addr, c->repinfo.addrlen);
1367 #else /* USE_WINSOCK */
1368                 if(WSAGetLastError() == WSAECONNRESET)
1369                         return 0;
1370                 if(WSAGetLastError() == WSAEINPROGRESS)
1371                         return 1;
1372                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1373                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1374                         return 1;
1375                 }
1376                 log_err_addr("read (in tcp r)",
1377                         wsa_strerror(WSAGetLastError()),
1378                         &c->repinfo.addr, c->repinfo.addrlen);
1379 #endif
1380                 return 0;
1381         }
1382         sldns_buffer_skip(c->buffer, r);
1383         if(sldns_buffer_remaining(c->buffer) <= 0) {
1384                 tcp_callback_reader(c);
1385         }
1386         return 1;
1387 }
1388
1389 /** 
1390  * Handle tcp writing callback. 
1391  * @param fd: file descriptor of socket.
1392  * @param c: comm point to write buffer out of.
1393  * @return: 0 on error
1394  */
1395 static int
1396 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1397 {
1398         ssize_t r;
1399         struct sldns_buffer *buffer;
1400         log_assert(c->type == comm_tcp);
1401 #ifdef USE_DNSCRYPT
1402         buffer = c->dnscrypt_buffer;
1403 #else
1404         buffer = c->buffer;
1405 #endif
1406         if(c->tcp_is_reading && !c->ssl)
1407                 return 0;
1408         log_assert(fd != -1);
1409         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1410                 /* check for pending error from nonblocking connect */
1411                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1412                 int error = 0;
1413                 socklen_t len = (socklen_t)sizeof(error);
1414                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1415                         &len) < 0){
1416 #ifndef USE_WINSOCK
1417                         error = errno; /* on solaris errno is error */
1418 #else /* USE_WINSOCK */
1419                         error = WSAGetLastError();
1420 #endif
1421                 }
1422 #ifndef USE_WINSOCK
1423 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1424                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1425                         return 1; /* try again later */
1426                 else
1427 #endif
1428                 if(error != 0 && verbosity < 2)
1429                         return 0; /* silence lots of chatter in the logs */
1430                 else if(error != 0) {
1431                         log_err_addr("tcp connect", strerror(error),
1432                                 &c->repinfo.addr, c->repinfo.addrlen);
1433 #else /* USE_WINSOCK */
1434                 /* examine error */
1435                 if(error == WSAEINPROGRESS)
1436                         return 1;
1437                 else if(error == WSAEWOULDBLOCK) {
1438                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1439                         return 1;
1440                 } else if(error != 0 && verbosity < 2)
1441                         return 0;
1442                 else if(error != 0) {
1443                         log_err_addr("tcp connect", wsa_strerror(error),
1444                                 &c->repinfo.addr, c->repinfo.addrlen);
1445 #endif /* USE_WINSOCK */
1446                         return 0;
1447                 }
1448         }
1449         if(c->ssl)
1450                 return ssl_handle_it(c);
1451
1452 #ifdef USE_MSG_FASTOPEN
1453         /* Only try this on first use of a connection that uses tfo, 
1454            otherwise fall through to normal write */
1455         /* Also, TFO support on WINDOWS not implemented at the moment */
1456         if(c->tcp_do_fastopen == 1) {
1457                 /* this form of sendmsg() does both a connect() and send() so need to
1458                    look for various flavours of error*/
1459                 uint16_t len = htons(sldns_buffer_limit(buffer));
1460                 struct msghdr msg;
1461                 struct iovec iov[2];
1462                 c->tcp_do_fastopen = 0;
1463                 memset(&msg, 0, sizeof(msg));
1464                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1465                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1466                 iov[1].iov_base = sldns_buffer_begin(buffer);
1467                 iov[1].iov_len = sldns_buffer_limit(buffer);
1468                 log_assert(iov[0].iov_len > 0);
1469                 log_assert(iov[1].iov_len > 0);
1470                 msg.msg_name = &c->repinfo.addr;
1471                 msg.msg_namelen = c->repinfo.addrlen;
1472                 msg.msg_iov = iov;
1473                 msg.msg_iovlen = 2;
1474                 r = sendmsg(fd, &msg, MSG_FASTOPEN);
1475                 if (r == -1) {
1476 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1477                         /* Handshake is underway, maybe because no TFO cookie available.
1478                            Come back to write the message*/
1479                         if(errno == EINPROGRESS || errno == EWOULDBLOCK)
1480                                 return 1;
1481 #endif
1482                         if(errno == EINTR || errno == EAGAIN)
1483                                 return 1;
1484                         /* Not handling EISCONN here as shouldn't ever hit that case.*/
1485                         if(errno != EPIPE && errno != 0 && verbosity < 2)
1486                                 return 0; /* silence lots of chatter in the logs */
1487                         if(errno != EPIPE && errno != 0) {
1488                                 log_err_addr("tcp sendmsg", strerror(errno),
1489                                         &c->repinfo.addr, c->repinfo.addrlen);
1490                                 return 0;
1491                         }
1492                         /* fallthrough to nonFASTOPEN
1493                          * (MSG_FASTOPEN on Linux 3 produces EPIPE)
1494                          * we need to perform connect() */
1495                         if(connect(fd, (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen) == -1) {
1496 #ifdef EINPROGRESS
1497                                 if(errno == EINPROGRESS)
1498                                         return 1; /* wait until connect done*/
1499 #endif
1500 #ifdef USE_WINSOCK
1501                                 if(WSAGetLastError() == WSAEINPROGRESS ||
1502                                         WSAGetLastError() == WSAEWOULDBLOCK)
1503                                         return 1; /* wait until connect done*/
1504 #endif
1505                                 if(tcp_connect_errno_needs_log(
1506                                         (struct sockaddr *)&c->repinfo.addr, c->repinfo.addrlen)) {
1507                                         log_err_addr("outgoing tcp: connect after EPIPE for fastopen",
1508                                                 strerror(errno), &c->repinfo.addr, c->repinfo.addrlen);
1509                                 }
1510                                 return 0;
1511                         }
1512
1513                 } else {
1514                         c->tcp_byte_count += r;
1515                         if(c->tcp_byte_count < sizeof(uint16_t))
1516                                 return 1;
1517                         sldns_buffer_set_position(buffer, c->tcp_byte_count - 
1518                                 sizeof(uint16_t));
1519                         if(sldns_buffer_remaining(buffer) == 0) {
1520                                 tcp_callback_writer(c);
1521                                 return 1;
1522                         }
1523                 }
1524         }
1525 #endif /* USE_MSG_FASTOPEN */
1526
1527         if(c->tcp_byte_count < sizeof(uint16_t)) {
1528                 uint16_t len = htons(sldns_buffer_limit(buffer));
1529 #ifdef HAVE_WRITEV
1530                 struct iovec iov[2];
1531                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1532                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1533                 iov[1].iov_base = sldns_buffer_begin(buffer);
1534                 iov[1].iov_len = sldns_buffer_limit(buffer);
1535                 log_assert(iov[0].iov_len > 0);
1536                 log_assert(iov[1].iov_len > 0);
1537                 r = writev(fd, iov, 2);
1538 #else /* HAVE_WRITEV */
1539                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1540                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1541 #endif /* HAVE_WRITEV */
1542                 if(r == -1) {
1543 #ifndef USE_WINSOCK
1544 #  ifdef EPIPE
1545                         if(errno == EPIPE && verbosity < 2)
1546                                 return 0; /* silence 'broken pipe' */
1547   #endif
1548                         if(errno == EINTR || errno == EAGAIN)
1549                                 return 1;
1550 #  ifdef HAVE_WRITEV
1551                         log_err_addr("tcp writev", strerror(errno),
1552                                 &c->repinfo.addr, c->repinfo.addrlen);
1553 #  else /* HAVE_WRITEV */
1554                         log_err_addr("tcp send s", strerror(errno),
1555                                 &c->repinfo.addr, c->repinfo.addrlen);
1556 #  endif /* HAVE_WRITEV */
1557 #else
1558                         if(WSAGetLastError() == WSAENOTCONN)
1559                                 return 1;
1560                         if(WSAGetLastError() == WSAEINPROGRESS)
1561                                 return 1;
1562                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1563                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1564                                         UB_EV_WRITE);
1565                                 return 1; 
1566                         }
1567                         log_err_addr("tcp send s",
1568                                 wsa_strerror(WSAGetLastError()),
1569                                 &c->repinfo.addr, c->repinfo.addrlen);
1570 #endif
1571                         return 0;
1572                 }
1573                 c->tcp_byte_count += r;
1574                 if(c->tcp_byte_count < sizeof(uint16_t))
1575                         return 1;
1576                 sldns_buffer_set_position(buffer, c->tcp_byte_count - 
1577                         sizeof(uint16_t));
1578                 if(sldns_buffer_remaining(buffer) == 0) {
1579                         tcp_callback_writer(c);
1580                         return 1;
1581                 }
1582         }
1583         log_assert(sldns_buffer_remaining(buffer) > 0);
1584         r = send(fd, (void*)sldns_buffer_current(buffer), 
1585                 sldns_buffer_remaining(buffer), 0);
1586         if(r == -1) {
1587 #ifndef USE_WINSOCK
1588                 if(errno == EINTR || errno == EAGAIN)
1589                         return 1;
1590                 log_err_addr("tcp send r", strerror(errno),
1591                         &c->repinfo.addr, c->repinfo.addrlen);
1592 #else
1593                 if(WSAGetLastError() == WSAEINPROGRESS)
1594                         return 1;
1595                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1596                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1597                         return 1; 
1598                 }
1599                 log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1600                         &c->repinfo.addr, c->repinfo.addrlen);
1601 #endif
1602                 return 0;
1603         }
1604         sldns_buffer_skip(buffer, r);
1605
1606         if(sldns_buffer_remaining(buffer) == 0) {
1607                 tcp_callback_writer(c);
1608         }
1609         
1610         return 1;
1611 }
1612
1613 void 
1614 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1615 {
1616         struct comm_point* c = (struct comm_point*)arg;
1617         log_assert(c->type == comm_tcp);
1618         ub_comm_base_now(c->ev->base);
1619
1620 #ifdef USE_DNSCRYPT
1621         /* Initialize if this is a dnscrypt socket */
1622         if(c->tcp_parent) {
1623                 c->dnscrypt = c->tcp_parent->dnscrypt;
1624         }
1625         if(c->dnscrypt && c->dnscrypt_buffer == c->buffer) {
1626                 c->dnscrypt_buffer = sldns_buffer_new(sldns_buffer_capacity(c->buffer));
1627                 if(!c->dnscrypt_buffer) {
1628                         log_err("Could not allocate dnscrypt buffer");
1629                         reclaim_tcp_handler(c);
1630                         if(!c->tcp_do_close) {
1631                                 fptr_ok(fptr_whitelist_comm_point(
1632                                         c->callback));
1633                                 (void)(*c->callback)(c, c->cb_arg, 
1634                                         NETEVENT_CLOSED, NULL);
1635                         }
1636                         return;
1637                 }
1638         }
1639 #endif
1640
1641         if(event&UB_EV_READ) {
1642                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1643                         reclaim_tcp_handler(c);
1644                         if(!c->tcp_do_close) {
1645                                 fptr_ok(fptr_whitelist_comm_point(
1646                                         c->callback));
1647                                 (void)(*c->callback)(c, c->cb_arg, 
1648                                         NETEVENT_CLOSED, NULL);
1649                         }
1650                 }
1651                 return;
1652         }
1653         if(event&UB_EV_WRITE) {
1654                 if(!comm_point_tcp_handle_write(fd, c)) {
1655                         reclaim_tcp_handler(c);
1656                         if(!c->tcp_do_close) {
1657                                 fptr_ok(fptr_whitelist_comm_point(
1658                                         c->callback));
1659                                 (void)(*c->callback)(c, c->cb_arg, 
1660                                         NETEVENT_CLOSED, NULL);
1661                         }
1662                 }
1663                 return;
1664         }
1665         if(event&UB_EV_TIMEOUT) {
1666                 verbose(VERB_QUERY, "tcp took too long, dropped");
1667                 reclaim_tcp_handler(c);
1668                 if(!c->tcp_do_close) {
1669                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1670                         (void)(*c->callback)(c, c->cb_arg,
1671                                 NETEVENT_TIMEOUT, NULL);
1672                 }
1673                 return;
1674         }
1675         log_err("Ignored event %d for tcphdl.", event);
1676 }
1677
1678 /** Make http handler free for next assignment */
1679 static void
1680 reclaim_http_handler(struct comm_point* c)
1681 {
1682         log_assert(c->type == comm_http);
1683         if(c->ssl) {
1684 #ifdef HAVE_SSL
1685                 SSL_shutdown(c->ssl);
1686                 SSL_free(c->ssl);
1687                 c->ssl = NULL;
1688 #endif
1689         }
1690         comm_point_close(c);
1691         if(c->tcp_parent) {
1692                 c->tcp_parent->cur_tcp_count--;
1693                 c->tcp_free = c->tcp_parent->tcp_free;
1694                 c->tcp_parent->tcp_free = c;
1695                 if(!c->tcp_free) {
1696                         /* re-enable listening on accept socket */
1697                         comm_point_start_listening(c->tcp_parent, -1, -1);
1698                 }
1699         }
1700 }
1701
1702 /** read more data for http (with ssl) */
1703 static int
1704 ssl_http_read_more(struct comm_point* c)
1705 {
1706 #ifdef HAVE_SSL
1707         int r;
1708         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1709         ERR_clear_error();
1710         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1711                 (int)sldns_buffer_remaining(c->buffer));
1712         if(r <= 0) {
1713                 int want = SSL_get_error(c->ssl, r);
1714                 if(want == SSL_ERROR_ZERO_RETURN) {
1715                         return 0; /* shutdown, closed */
1716                 } else if(want == SSL_ERROR_WANT_READ) {
1717                         return 1; /* read more later */
1718                 } else if(want == SSL_ERROR_WANT_WRITE) {
1719                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1720                         comm_point_listen_for_rw(c, 0, 1);
1721                         return 1;
1722                 } else if(want == SSL_ERROR_SYSCALL) {
1723                         if(errno != 0)
1724                                 log_err("SSL_read syscall: %s",
1725                                         strerror(errno));
1726                         return 0;
1727                 }
1728                 log_crypto_err("could not SSL_read");
1729                 return 0;
1730         }
1731         sldns_buffer_skip(c->buffer, (ssize_t)r);
1732         return 1;
1733 #else
1734         (void)c;
1735         return 0;
1736 #endif /* HAVE_SSL */
1737 }
1738
1739 /** read more data for http */
1740 static int
1741 http_read_more(int fd, struct comm_point* c)
1742 {
1743         ssize_t r;
1744         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1745         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1746                 sldns_buffer_remaining(c->buffer), 0);
1747         if(r == 0) {
1748                 return 0;
1749         } else if(r == -1) {
1750 #ifndef USE_WINSOCK
1751                 if(errno == EINTR || errno == EAGAIN)
1752                         return 1;
1753                 log_err_addr("read (in http r)", strerror(errno),
1754                         &c->repinfo.addr, c->repinfo.addrlen);
1755 #else /* USE_WINSOCK */
1756                 if(WSAGetLastError() == WSAECONNRESET)
1757                         return 0;
1758                 if(WSAGetLastError() == WSAEINPROGRESS)
1759                         return 1;
1760                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1761                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1762                         return 1;
1763                 }
1764                 log_err_addr("read (in http r)",
1765                         wsa_strerror(WSAGetLastError()),
1766                         &c->repinfo.addr, c->repinfo.addrlen);
1767 #endif
1768                 return 0;
1769         }
1770         sldns_buffer_skip(c->buffer, r);
1771         return 1;
1772 }
1773
1774 /** return true if http header has been read (one line complete) */
1775 static int
1776 http_header_done(sldns_buffer* buf)
1777 {
1778         size_t i;
1779         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
1780                 /* there was a \r before the \n, but we ignore that */
1781                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n')
1782                         return 1;
1783         }
1784         return 0;
1785 }
1786
1787 /** return character string into buffer for header line, moves buffer
1788  * past that line and puts zero terminator into linefeed-newline */
1789 static char*
1790 http_header_line(sldns_buffer* buf)
1791 {
1792         char* result = (char*)sldns_buffer_current(buf);
1793         size_t i;
1794         for(i=sldns_buffer_position(buf); i<sldns_buffer_limit(buf); i++) {
1795                 /* terminate the string on the \r */
1796                 if((char)sldns_buffer_read_u8_at(buf, i) == '\r')
1797                         sldns_buffer_write_u8_at(buf, i, 0);
1798                 /* terminate on the \n and skip past the it and done */
1799                 if((char)sldns_buffer_read_u8_at(buf, i) == '\n') {
1800                         sldns_buffer_write_u8_at(buf, i, 0);
1801                         sldns_buffer_set_position(buf, i+1);
1802                         return result;
1803                 }
1804         }
1805         return NULL;
1806 }
1807
1808 /** move unread buffer to start and clear rest for putting the rest into it */
1809 static void
1810 http_moveover_buffer(sldns_buffer* buf)
1811 {
1812         size_t pos = sldns_buffer_position(buf);
1813         size_t len = sldns_buffer_remaining(buf);
1814         sldns_buffer_clear(buf);
1815         memmove(sldns_buffer_begin(buf), sldns_buffer_at(buf, pos), len);
1816         sldns_buffer_set_position(buf, len);
1817 }
1818
1819 /** a http header is complete, process it */
1820 static int
1821 http_process_initial_header(struct comm_point* c)
1822 {
1823         char* line = http_header_line(c->buffer);
1824         if(!line) return 1;
1825         verbose(VERB_ALGO, "http header: %s", line);
1826         if(strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
1827                 /* check returncode */
1828                 if(line[9] != '2') {
1829                         verbose(VERB_ALGO, "http bad status %s", line+9);
1830                         return 0;
1831                 }
1832         } else if(strncasecmp(line, "Content-Length: ", 16) == 0) {
1833                 if(!c->http_is_chunked)
1834                         c->tcp_byte_count = (size_t)atoi(line+16);
1835         } else if(strncasecmp(line, "Transfer-Encoding: chunked", 19+7) == 0) {
1836                 c->tcp_byte_count = 0;
1837                 c->http_is_chunked = 1;
1838         } else if(line[0] == 0) {
1839                 /* end of initial headers */
1840                 c->http_in_headers = 0;
1841                 if(c->http_is_chunked)
1842                         c->http_in_chunk_headers = 1;
1843                 /* remove header text from front of buffer
1844                  * the buffer is going to be used to return the data segment
1845                  * itself and we don't want the header to get returned
1846                  * prepended with it */
1847                 http_moveover_buffer(c->buffer);
1848                 sldns_buffer_flip(c->buffer);
1849                 return 1;
1850         }
1851         /* ignore other headers */
1852         return 1;
1853 }
1854
1855 /** a chunk header is complete, process it, return 0=fail, 1=continue next
1856  * header line, 2=done with chunked transfer*/
1857 static int
1858 http_process_chunk_header(struct comm_point* c)
1859 {
1860         char* line = http_header_line(c->buffer);
1861         if(!line) return 1;
1862         if(c->http_in_chunk_headers == 3) {
1863                 verbose(VERB_ALGO, "http chunk trailer: %s", line);
1864                 /* are we done ? */
1865                 if(line[0] == 0 && c->tcp_byte_count == 0) {
1866                         /* callback of http reader when NETEVENT_DONE,
1867                          * end of data, with no data in buffer */
1868                         sldns_buffer_set_position(c->buffer, 0);
1869                         sldns_buffer_set_limit(c->buffer, 0);
1870                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1871                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
1872                         /* return that we are done */
1873                         return 2;
1874                 }
1875                 if(line[0] == 0) {
1876                         /* continue with header of the next chunk */
1877                         c->http_in_chunk_headers = 1;
1878                         /* remove header text from front of buffer */
1879                         http_moveover_buffer(c->buffer);
1880                         sldns_buffer_flip(c->buffer);
1881                         return 1;
1882                 }
1883                 /* ignore further trail headers */
1884                 return 1;
1885         }
1886         verbose(VERB_ALGO, "http chunk header: %s", line);
1887         if(c->http_in_chunk_headers == 1) {
1888                 /* read chunked start line */
1889                 char* end = NULL;
1890                 c->tcp_byte_count = (size_t)strtol(line, &end, 16);
1891                 if(end == line)
1892                         return 0;
1893                 c->http_in_chunk_headers = 0;
1894                 /* remove header text from front of buffer */
1895                 http_moveover_buffer(c->buffer);
1896                 sldns_buffer_flip(c->buffer);
1897                 if(c->tcp_byte_count == 0) {
1898                         /* done with chunks, process chunk_trailer lines */
1899                         c->http_in_chunk_headers = 3;
1900                 }
1901                 return 1;
1902         }
1903         /* ignore other headers */
1904         return 1;
1905 }
1906
1907 /** handle nonchunked data segment */
1908 static int
1909 http_nonchunk_segment(struct comm_point* c)
1910 {
1911         /* c->buffer at position..limit has new data we read in.
1912          * the buffer itself is full of nonchunked data.
1913          * we are looking to read tcp_byte_count more data
1914          * and then the transfer is done. */
1915         size_t remainbufferlen;
1916         size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
1917         if(c->tcp_byte_count <= got_now) {
1918                 /* done, this is the last data fragment */
1919                 c->http_stored = 0;
1920                 sldns_buffer_set_position(c->buffer, 0);
1921                 fptr_ok(fptr_whitelist_comm_point(c->callback));
1922                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_DONE, NULL);
1923                 return 1;
1924         }
1925         c->tcp_byte_count -= got_now;
1926         /* if we have the buffer space,
1927          * read more data collected into the buffer */
1928         remainbufferlen = sldns_buffer_capacity(c->buffer) -
1929                 sldns_buffer_limit(c->buffer);
1930         if(remainbufferlen >= c->tcp_byte_count ||
1931                 remainbufferlen >= 2048) {
1932                 size_t total = sldns_buffer_limit(c->buffer);
1933                 sldns_buffer_clear(c->buffer);
1934                 sldns_buffer_set_position(c->buffer, total);
1935                 c->http_stored = total;
1936                 /* return and wait to read more */
1937                 return 1;
1938         }
1939         /* call callback with this data amount, then
1940          * wait for more */
1941         c->http_stored = 0;
1942         sldns_buffer_set_position(c->buffer, 0);
1943         fptr_ok(fptr_whitelist_comm_point(c->callback));
1944         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
1945         /* c->callback has to buffer_clear(c->buffer). */
1946         /* return and wait to read more */
1947         return 1;
1948 }
1949
1950 /** handle nonchunked data segment, return 0=fail, 1=wait, 2=process more */
1951 static int
1952 http_chunked_segment(struct comm_point* c)
1953 {
1954         /* the c->buffer has from position..limit new data we read. */
1955         /* the current chunk has length tcp_byte_count.
1956          * once we read that read more chunk headers.
1957          */
1958         size_t remainbufferlen;
1959         size_t got_now = sldns_buffer_limit(c->buffer) - c->http_stored;
1960         if(c->tcp_byte_count <= got_now) {
1961                 /* the chunk has completed (with perhaps some extra data
1962                  * from next chunk header and next chunk) */
1963                 /* save too much info into temp buffer */
1964                 size_t fraglen;
1965                 struct comm_reply repinfo;
1966                 c->http_stored = 0;
1967                 sldns_buffer_skip(c->buffer, (ssize_t)c->tcp_byte_count);
1968                 sldns_buffer_clear(c->http_temp);
1969                 sldns_buffer_write(c->http_temp,
1970                         sldns_buffer_current(c->buffer),
1971                         sldns_buffer_remaining(c->buffer));
1972                 sldns_buffer_flip(c->http_temp);
1973
1974                 /* callback with this fragment */
1975                 fraglen = sldns_buffer_position(c->buffer);
1976                 sldns_buffer_set_position(c->buffer, 0);
1977                 sldns_buffer_set_limit(c->buffer, fraglen);
1978                 repinfo = c->repinfo;
1979                 fptr_ok(fptr_whitelist_comm_point(c->callback));
1980                 (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &repinfo);
1981                 /* c->callback has to buffer_clear(). */
1982
1983                 /* is commpoint deleted? */
1984                 if(!repinfo.c) {
1985                         return 1;
1986                 }
1987                 /* copy waiting info */
1988                 sldns_buffer_clear(c->buffer);
1989                 sldns_buffer_write(c->buffer,
1990                         sldns_buffer_begin(c->http_temp),
1991                         sldns_buffer_remaining(c->http_temp));
1992                 sldns_buffer_flip(c->buffer);
1993                 /* process end of chunk trailer header lines, until
1994                  * an empty line */
1995                 c->http_in_chunk_headers = 3;
1996                 /* process more data in buffer (if any) */
1997                 return 2;
1998         }
1999         c->tcp_byte_count -= got_now;
2000
2001         /* if we have the buffer space,
2002          * read more data collected into the buffer */
2003         remainbufferlen = sldns_buffer_capacity(c->buffer) -
2004                 sldns_buffer_limit(c->buffer);
2005         if(remainbufferlen >= c->tcp_byte_count ||
2006                 remainbufferlen >= 2048) {
2007                 size_t total = sldns_buffer_limit(c->buffer);
2008                 sldns_buffer_clear(c->buffer);
2009                 sldns_buffer_set_position(c->buffer, total);
2010                 c->http_stored = total;
2011                 /* return and wait to read more */
2012                 return 1;
2013         }
2014         
2015         /* callback of http reader for a new part of the data */
2016         c->http_stored = 0;
2017         sldns_buffer_set_position(c->buffer, 0);
2018         fptr_ok(fptr_whitelist_comm_point(c->callback));
2019         (void)(*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, NULL);
2020         /* c->callback has to buffer_clear(c->buffer). */
2021         /* return and wait to read more */
2022         return 1;
2023 }
2024
2025 /**
2026  * Handle http reading callback. 
2027  * @param fd: file descriptor of socket.
2028  * @param c: comm point to read from into buffer.
2029  * @return: 0 on error 
2030  */
2031 static int
2032 comm_point_http_handle_read(int fd, struct comm_point* c)
2033 {
2034         log_assert(c->type == comm_http);
2035         log_assert(fd != -1);
2036
2037         /* if we are in ssl handshake, handle SSL handshake */
2038 #ifdef HAVE_SSL
2039         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2040                 if(!ssl_handshake(c))
2041                         return 0;
2042                 if(c->ssl_shake_state != comm_ssl_shake_none)
2043                         return 1;
2044         }
2045 #endif /* HAVE_SSL */
2046
2047         if(!c->tcp_is_reading)
2048                 return 1;
2049         /* read more data */
2050         if(c->ssl) {
2051                 if(!ssl_http_read_more(c))
2052                         return 0;
2053         } else {
2054                 if(!http_read_more(fd, c))
2055                         return 0;
2056         }
2057
2058         sldns_buffer_flip(c->buffer);
2059         while(sldns_buffer_remaining(c->buffer) > 0) {
2060                 /* if we are reading headers, read more headers */
2061                 if(c->http_in_headers || c->http_in_chunk_headers) {
2062                         /* if header is done, process the header */
2063                         if(!http_header_done(c->buffer)) {
2064                                 /* copy remaining data to front of buffer
2065                                  * and set rest for writing into it */
2066                                 http_moveover_buffer(c->buffer);
2067                                 /* return and wait to read more */
2068                                 return 1;
2069                         }
2070                         if(!c->http_in_chunk_headers) {
2071                                 /* process initial headers */
2072                                 if(!http_process_initial_header(c))
2073                                         return 0;
2074                         } else {
2075                                 /* process chunk headers */
2076                                 int r = http_process_chunk_header(c);
2077                                 if(r == 0) return 0;
2078                                 if(r == 2) return 1; /* done */
2079                                 /* r == 1, continue */
2080                         }
2081                         /* see if we have more to process */
2082                         continue;
2083                 }
2084
2085                 if(!c->http_is_chunked) {
2086                         /* if we are reading nonchunks, process that*/
2087                         return http_nonchunk_segment(c);
2088                 } else {
2089                         /* if we are reading chunks, read the chunk */
2090                         int r = http_chunked_segment(c);
2091                         if(r == 0) return 0;
2092                         if(r == 1) return 1;
2093                         continue;
2094                 }
2095         }
2096         /* broke out of the loop; could not process header instead need
2097          * to read more */
2098         /* moveover any remaining data and read more data */
2099         http_moveover_buffer(c->buffer);
2100         /* return and wait to read more */
2101         return 1;
2102 }
2103
2104 /** check pending connect for http */
2105 static int
2106 http_check_connect(int fd, struct comm_point* c)
2107 {
2108         /* check for pending error from nonblocking connect */
2109         /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
2110         int error = 0;
2111         socklen_t len = (socklen_t)sizeof(error);
2112         if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
2113                 &len) < 0){
2114 #ifndef USE_WINSOCK
2115                 error = errno; /* on solaris errno is error */
2116 #else /* USE_WINSOCK */
2117                 error = WSAGetLastError();
2118 #endif
2119         }
2120 #ifndef USE_WINSOCK
2121 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
2122         if(error == EINPROGRESS || error == EWOULDBLOCK)
2123                 return 1; /* try again later */
2124         else
2125 #endif
2126         if(error != 0 && verbosity < 2)
2127                 return 0; /* silence lots of chatter in the logs */
2128         else if(error != 0) {
2129                 log_err_addr("http connect", strerror(error),
2130                         &c->repinfo.addr, c->repinfo.addrlen);
2131 #else /* USE_WINSOCK */
2132         /* examine error */
2133         if(error == WSAEINPROGRESS)
2134                 return 1;
2135         else if(error == WSAEWOULDBLOCK) {
2136                 ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2137                 return 1;
2138         } else if(error != 0 && verbosity < 2)
2139                 return 0;
2140         else if(error != 0) {
2141                 log_err_addr("http connect", wsa_strerror(error),
2142                         &c->repinfo.addr, c->repinfo.addrlen);
2143 #endif /* USE_WINSOCK */
2144                 return 0;
2145         }
2146         /* keep on processing this socket */
2147         return 2;
2148 }
2149
2150 /** write more data for http (with ssl) */
2151 static int
2152 ssl_http_write_more(struct comm_point* c)
2153 {
2154 #ifdef HAVE_SSL
2155         int r;
2156         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2157         ERR_clear_error();
2158         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
2159                 (int)sldns_buffer_remaining(c->buffer));
2160         if(r <= 0) {
2161                 int want = SSL_get_error(c->ssl, r);
2162                 if(want == SSL_ERROR_ZERO_RETURN) {
2163                         return 0; /* closed */
2164                 } else if(want == SSL_ERROR_WANT_READ) {
2165                         c->ssl_shake_state = comm_ssl_shake_read;
2166                         comm_point_listen_for_rw(c, 1, 0);
2167                         return 1; /* wait for read condition */
2168                 } else if(want == SSL_ERROR_WANT_WRITE) {
2169                         return 1; /* write more later */
2170                 } else if(want == SSL_ERROR_SYSCALL) {
2171                         if(errno != 0)
2172                                 log_err("SSL_write syscall: %s",
2173                                         strerror(errno));
2174                         return 0;
2175                 }
2176                 log_crypto_err("could not SSL_write");
2177                 return 0;
2178         }
2179         sldns_buffer_skip(c->buffer, (ssize_t)r);
2180         return 1;
2181 #else
2182         (void)c;
2183         return 0;
2184 #endif /* HAVE_SSL */
2185 }
2186
2187 /** write more data for http */
2188 static int
2189 http_write_more(int fd, struct comm_point* c)
2190 {
2191         ssize_t r;
2192         log_assert(sldns_buffer_remaining(c->buffer) > 0);
2193         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
2194                 sldns_buffer_remaining(c->buffer), 0);
2195         if(r == -1) {
2196 #ifndef USE_WINSOCK
2197                 if(errno == EINTR || errno == EAGAIN)
2198                         return 1;
2199                 log_err_addr("http send r", strerror(errno),
2200                         &c->repinfo.addr, c->repinfo.addrlen);
2201 #else
2202                 if(WSAGetLastError() == WSAEINPROGRESS)
2203                         return 1;
2204                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
2205                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
2206                         return 1; 
2207                 }
2208                 log_err_addr("http send r", wsa_strerror(WSAGetLastError()),
2209                         &c->repinfo.addr, c->repinfo.addrlen);
2210 #endif
2211                 return 0;
2212         }
2213         sldns_buffer_skip(c->buffer, r);
2214         return 1;
2215 }
2216
2217 /** 
2218  * Handle http writing callback. 
2219  * @param fd: file descriptor of socket.
2220  * @param c: comm point to write buffer out of.
2221  * @return: 0 on error
2222  */
2223 static int
2224 comm_point_http_handle_write(int fd, struct comm_point* c)
2225 {
2226         log_assert(c->type == comm_http);
2227         log_assert(fd != -1);
2228
2229         /* check pending connect errors, if that fails, we wait for more,
2230          * or we can continue to write contents */
2231         if(c->tcp_check_nb_connect) {
2232                 int r = http_check_connect(fd, c);
2233                 if(r == 0) return 0;
2234                 if(r == 1) return 1;
2235                 c->tcp_check_nb_connect = 0;
2236         }
2237         /* if we are in ssl handshake, handle SSL handshake */
2238 #ifdef HAVE_SSL
2239         if(c->ssl && c->ssl_shake_state != comm_ssl_shake_none) {
2240                 if(!ssl_handshake(c))
2241                         return 0;
2242                 if(c->ssl_shake_state != comm_ssl_shake_none)
2243                         return 1;
2244         }
2245 #endif /* HAVE_SSL */
2246         if(c->tcp_is_reading)
2247                 return 1;
2248         /* if we are writing, write more */
2249         if(c->ssl) {
2250                 if(!ssl_http_write_more(c))
2251                         return 0;
2252         } else {
2253                 if(!http_write_more(fd, c))
2254                         return 0;
2255         }
2256
2257         /* we write a single buffer contents, that can contain
2258          * the http request, and then flip to read the results */
2259         /* see if write is done */
2260         if(sldns_buffer_remaining(c->buffer) == 0) {
2261                 sldns_buffer_clear(c->buffer);
2262                 if(c->tcp_do_toggle_rw)
2263                         c->tcp_is_reading = 1;
2264                 c->tcp_byte_count = 0;
2265                 /* switch from listening(write) to listening(read) */
2266                 comm_point_stop_listening(c);
2267                 comm_point_start_listening(c, -1, -1);
2268         }
2269         return 1;
2270 }
2271
2272 void 
2273 comm_point_http_handle_callback(int fd, short event, void* arg)
2274 {
2275         struct comm_point* c = (struct comm_point*)arg;
2276         log_assert(c->type == comm_http);
2277         ub_comm_base_now(c->ev->base);
2278
2279         if(event&UB_EV_READ) {
2280                 if(!comm_point_http_handle_read(fd, c)) {
2281                         reclaim_http_handler(c);
2282                         if(!c->tcp_do_close) {
2283                                 fptr_ok(fptr_whitelist_comm_point(
2284                                         c->callback));
2285                                 (void)(*c->callback)(c, c->cb_arg, 
2286                                         NETEVENT_CLOSED, NULL);
2287                         }
2288                 }
2289                 return;
2290         }
2291         if(event&UB_EV_WRITE) {
2292                 if(!comm_point_http_handle_write(fd, c)) {
2293                         reclaim_http_handler(c);
2294                         if(!c->tcp_do_close) {
2295                                 fptr_ok(fptr_whitelist_comm_point(
2296                                         c->callback));
2297                                 (void)(*c->callback)(c, c->cb_arg, 
2298                                         NETEVENT_CLOSED, NULL);
2299                         }
2300                 }
2301                 return;
2302         }
2303         if(event&UB_EV_TIMEOUT) {
2304                 verbose(VERB_QUERY, "http took too long, dropped");
2305                 reclaim_http_handler(c);
2306                 if(!c->tcp_do_close) {
2307                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2308                         (void)(*c->callback)(c, c->cb_arg,
2309                                 NETEVENT_TIMEOUT, NULL);
2310                 }
2311                 return;
2312         }
2313         log_err("Ignored event %d for httphdl.", event);
2314 }
2315
2316 void comm_point_local_handle_callback(int fd, short event, void* arg)
2317 {
2318         struct comm_point* c = (struct comm_point*)arg;
2319         log_assert(c->type == comm_local);
2320         ub_comm_base_now(c->ev->base);
2321
2322         if(event&UB_EV_READ) {
2323                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
2324                         fptr_ok(fptr_whitelist_comm_point(c->callback));
2325                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
2326                                 NULL);
2327                 }
2328                 return;
2329         }
2330         log_err("Ignored event %d for localhdl.", event);
2331 }
2332
2333 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
2334         short event, void* arg)
2335 {
2336         struct comm_point* c = (struct comm_point*)arg;
2337         int err = NETEVENT_NOERROR;
2338         log_assert(c->type == comm_raw);
2339         ub_comm_base_now(c->ev->base);
2340         
2341         if(event&UB_EV_TIMEOUT)
2342                 err = NETEVENT_TIMEOUT;
2343         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
2344         (void)(*c->callback)(c, c->cb_arg, err, NULL);
2345 }
2346
2347 struct comm_point* 
2348 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
2349         comm_point_callback_type* callback, void* callback_arg)
2350 {
2351         struct comm_point* c = (struct comm_point*)calloc(1,
2352                 sizeof(struct comm_point));
2353         short evbits;
2354         if(!c)
2355                 return NULL;
2356         c->ev = (struct internal_event*)calloc(1,
2357                 sizeof(struct internal_event));
2358         if(!c->ev) {
2359                 free(c);
2360                 return NULL;
2361         }
2362         c->ev->base = base;
2363         c->fd = fd;
2364         c->buffer = buffer;
2365         c->timeout = NULL;
2366         c->tcp_is_reading = 0;
2367         c->tcp_byte_count = 0;
2368         c->tcp_parent = NULL;
2369         c->max_tcp_count = 0;
2370         c->cur_tcp_count = 0;
2371         c->tcp_handlers = NULL;
2372         c->tcp_free = NULL;
2373         c->type = comm_udp;
2374         c->tcp_do_close = 0;
2375         c->do_not_close = 0;
2376         c->tcp_do_toggle_rw = 0;
2377         c->tcp_check_nb_connect = 0;
2378 #ifdef USE_MSG_FASTOPEN
2379         c->tcp_do_fastopen = 0;
2380 #endif
2381 #ifdef USE_DNSCRYPT
2382         c->dnscrypt = 0;
2383         c->dnscrypt_buffer = buffer;
2384 #endif
2385         c->inuse = 0;
2386         c->callback = callback;
2387         c->cb_arg = callback_arg;
2388         evbits = UB_EV_READ | UB_EV_PERSIST;
2389         /* ub_event stuff */
2390         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2391                 comm_point_udp_callback, c);
2392         if(c->ev->ev == NULL) {
2393                 log_err("could not baseset udp event");
2394                 comm_point_delete(c);
2395                 return NULL;
2396         }
2397         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
2398                 log_err("could not add udp event");
2399                 comm_point_delete(c);
2400                 return NULL;
2401         }
2402         return c;
2403 }
2404
2405 struct comm_point* 
2406 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
2407         sldns_buffer* buffer, 
2408         comm_point_callback_type* callback, void* callback_arg)
2409 {
2410         struct comm_point* c = (struct comm_point*)calloc(1,
2411                 sizeof(struct comm_point));
2412         short evbits;
2413         if(!c)
2414                 return NULL;
2415         c->ev = (struct internal_event*)calloc(1,
2416                 sizeof(struct internal_event));
2417         if(!c->ev) {
2418                 free(c);
2419                 return NULL;
2420         }
2421         c->ev->base = base;
2422         c->fd = fd;
2423         c->buffer = buffer;
2424         c->timeout = NULL;
2425         c->tcp_is_reading = 0;
2426         c->tcp_byte_count = 0;
2427         c->tcp_parent = NULL;
2428         c->max_tcp_count = 0;
2429         c->cur_tcp_count = 0;
2430         c->tcp_handlers = NULL;
2431         c->tcp_free = NULL;
2432         c->type = comm_udp;
2433         c->tcp_do_close = 0;
2434         c->do_not_close = 0;
2435 #ifdef USE_DNSCRYPT
2436         c->dnscrypt = 0;
2437         c->dnscrypt_buffer = buffer;
2438 #endif
2439         c->inuse = 0;
2440         c->tcp_do_toggle_rw = 0;
2441         c->tcp_check_nb_connect = 0;
2442 #ifdef USE_MSG_FASTOPEN
2443         c->tcp_do_fastopen = 0;
2444 #endif
2445         c->callback = callback;
2446         c->cb_arg = callback_arg;
2447         evbits = UB_EV_READ | UB_EV_PERSIST;
2448         /* ub_event stuff */
2449         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2450                 comm_point_udp_ancil_callback, c);
2451         if(c->ev->ev == NULL) {
2452                 log_err("could not baseset udp event");
2453                 comm_point_delete(c);
2454                 return NULL;
2455         }
2456         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
2457                 log_err("could not add udp event");
2458                 comm_point_delete(c);
2459                 return NULL;
2460         }
2461         return c;
2462 }
2463
2464 static struct comm_point* 
2465 comm_point_create_tcp_handler(struct comm_base *base, 
2466         struct comm_point* parent, size_t bufsize,
2467         comm_point_callback_type* callback, void* callback_arg)
2468 {
2469         struct comm_point* c = (struct comm_point*)calloc(1,
2470                 sizeof(struct comm_point));
2471         short evbits;
2472         if(!c)
2473                 return NULL;
2474         c->ev = (struct internal_event*)calloc(1,
2475                 sizeof(struct internal_event));
2476         if(!c->ev) {
2477                 free(c);
2478                 return NULL;
2479         }
2480         c->ev->base = base;
2481         c->fd = -1;
2482         c->buffer = sldns_buffer_new(bufsize);
2483         if(!c->buffer) {
2484                 free(c->ev);
2485                 free(c);
2486                 return NULL;
2487         }
2488         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
2489         if(!c->timeout) {
2490                 sldns_buffer_free(c->buffer);
2491                 free(c->ev);
2492                 free(c);
2493                 return NULL;
2494         }
2495         c->tcp_is_reading = 0;
2496         c->tcp_byte_count = 0;
2497         c->tcp_parent = parent;
2498         c->max_tcp_count = 0;
2499         c->cur_tcp_count = 0;
2500         c->tcp_handlers = NULL;
2501         c->tcp_free = NULL;
2502         c->type = comm_tcp;
2503         c->tcp_do_close = 0;
2504         c->do_not_close = 0;
2505         c->tcp_do_toggle_rw = 1;
2506         c->tcp_check_nb_connect = 0;
2507 #ifdef USE_MSG_FASTOPEN
2508         c->tcp_do_fastopen = 0;
2509 #endif
2510 #ifdef USE_DNSCRYPT
2511         c->dnscrypt = 0;
2512         /* We don't know just yet if this is a dnscrypt channel. Allocation
2513          * will be done when handling the callback. */
2514         c->dnscrypt_buffer = c->buffer;
2515 #endif
2516         c->repinfo.c = c;
2517         c->callback = callback;
2518         c->cb_arg = callback_arg;
2519         /* add to parent free list */
2520         c->tcp_free = parent->tcp_free;
2521         parent->tcp_free = c;
2522         /* ub_event stuff */
2523         evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
2524         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2525                 comm_point_tcp_handle_callback, c);
2526         if(c->ev->ev == NULL)
2527         {
2528                 log_err("could not basetset tcphdl event");
2529                 parent->tcp_free = c->tcp_free;
2530                 free(c->ev);
2531                 free(c);
2532                 return NULL;
2533         }
2534         return c;
2535 }
2536
2537 struct comm_point* 
2538 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
2539         comm_point_callback_type* callback, void* callback_arg)
2540 {
2541         struct comm_point* c = (struct comm_point*)calloc(1,
2542                 sizeof(struct comm_point));
2543         short evbits;
2544         int i;
2545         /* first allocate the TCP accept listener */
2546         if(!c)
2547                 return NULL;
2548         c->ev = (struct internal_event*)calloc(1,
2549                 sizeof(struct internal_event));
2550         if(!c->ev) {
2551                 free(c);
2552                 return NULL;
2553         }
2554         c->ev->base = base;
2555         c->fd = fd;
2556         c->buffer = NULL;
2557         c->timeout = NULL;
2558         c->tcp_is_reading = 0;
2559         c->tcp_byte_count = 0;
2560         c->tcp_parent = NULL;
2561         c->max_tcp_count = num;
2562         c->cur_tcp_count = 0;
2563         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
2564                 sizeof(struct comm_point*));
2565         if(!c->tcp_handlers) {
2566                 free(c->ev);
2567                 free(c);
2568                 return NULL;
2569         }
2570         c->tcp_free = NULL;
2571         c->type = comm_tcp_accept;
2572         c->tcp_do_close = 0;
2573         c->do_not_close = 0;
2574         c->tcp_do_toggle_rw = 0;
2575         c->tcp_check_nb_connect = 0;
2576 #ifdef USE_MSG_FASTOPEN
2577         c->tcp_do_fastopen = 0;
2578 #endif
2579 #ifdef USE_DNSCRYPT
2580         c->dnscrypt = 0;
2581         c->dnscrypt_buffer = NULL;
2582 #endif
2583         c->callback = NULL;
2584         c->cb_arg = NULL;
2585         evbits = UB_EV_READ | UB_EV_PERSIST;
2586         /* ub_event stuff */
2587         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2588                 comm_point_tcp_accept_callback, c);
2589         if(c->ev->ev == NULL) {
2590                 log_err("could not baseset tcpacc event");
2591                 comm_point_delete(c);
2592                 return NULL;
2593         }
2594         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2595                 log_err("could not add tcpacc event");
2596                 comm_point_delete(c);
2597                 return NULL;
2598         }
2599         /* now prealloc the tcp handlers */
2600         for(i=0; i<num; i++) {
2601                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
2602                         c, bufsize, callback, callback_arg);
2603                 if(!c->tcp_handlers[i]) {
2604                         comm_point_delete(c);
2605                         return NULL;
2606                 }
2607         }
2608         
2609         return c;
2610 }
2611
2612 struct comm_point* 
2613 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
2614         comm_point_callback_type* callback, void* callback_arg)
2615 {
2616         struct comm_point* c = (struct comm_point*)calloc(1,
2617                 sizeof(struct comm_point));
2618         short evbits;
2619         if(!c)
2620                 return NULL;
2621         c->ev = (struct internal_event*)calloc(1,
2622                 sizeof(struct internal_event));
2623         if(!c->ev) {
2624                 free(c);
2625                 return NULL;
2626         }
2627         c->ev->base = base;
2628         c->fd = -1;
2629         c->buffer = sldns_buffer_new(bufsize);
2630         if(!c->buffer) {
2631                 free(c->ev);
2632                 free(c);
2633                 return NULL;
2634         }
2635         c->timeout = NULL;
2636         c->tcp_is_reading = 0;
2637         c->tcp_byte_count = 0;
2638         c->tcp_parent = NULL;
2639         c->max_tcp_count = 0;
2640         c->cur_tcp_count = 0;
2641         c->tcp_handlers = NULL;
2642         c->tcp_free = NULL;
2643         c->type = comm_tcp;
2644         c->tcp_do_close = 0;
2645         c->do_not_close = 0;
2646         c->tcp_do_toggle_rw = 1;
2647         c->tcp_check_nb_connect = 1;
2648 #ifdef USE_MSG_FASTOPEN
2649         c->tcp_do_fastopen = 1;
2650 #endif
2651 #ifdef USE_DNSCRYPT
2652         c->dnscrypt = 0;
2653         c->dnscrypt_buffer = c->buffer;
2654 #endif
2655         c->repinfo.c = c;
2656         c->callback = callback;
2657         c->cb_arg = callback_arg;
2658         evbits = UB_EV_PERSIST | UB_EV_WRITE;
2659         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2660                 comm_point_tcp_handle_callback, c);
2661         if(c->ev->ev == NULL)
2662         {
2663                 log_err("could not baseset tcpout event");
2664                 sldns_buffer_free(c->buffer);
2665                 free(c->ev);
2666                 free(c);
2667                 return NULL;
2668         }
2669
2670         return c;
2671 }
2672
2673 struct comm_point* 
2674 comm_point_create_http_out(struct comm_base *base, size_t bufsize,
2675         comm_point_callback_type* callback, void* callback_arg,
2676         sldns_buffer* temp)
2677 {
2678         struct comm_point* c = (struct comm_point*)calloc(1,
2679                 sizeof(struct comm_point));
2680         short evbits;
2681         if(!c)
2682                 return NULL;
2683         c->ev = (struct internal_event*)calloc(1,
2684                 sizeof(struct internal_event));
2685         if(!c->ev) {
2686                 free(c);
2687                 return NULL;
2688         }
2689         c->ev->base = base;
2690         c->fd = -1;
2691         c->buffer = sldns_buffer_new(bufsize);
2692         if(!c->buffer) {
2693                 free(c->ev);
2694                 free(c);
2695                 return NULL;
2696         }
2697         c->timeout = NULL;
2698         c->tcp_is_reading = 0;
2699         c->tcp_byte_count = 0;
2700         c->tcp_parent = NULL;
2701         c->max_tcp_count = 0;
2702         c->cur_tcp_count = 0;
2703         c->tcp_handlers = NULL;
2704         c->tcp_free = NULL;
2705         c->type = comm_http;
2706         c->tcp_do_close = 0;
2707         c->do_not_close = 0;
2708         c->tcp_do_toggle_rw = 1;
2709         c->tcp_check_nb_connect = 1;
2710         c->http_in_headers = 1;
2711         c->http_in_chunk_headers = 0;
2712         c->http_is_chunked = 0;
2713         c->http_temp = temp;
2714 #ifdef USE_MSG_FASTOPEN
2715         c->tcp_do_fastopen = 1;
2716 #endif
2717 #ifdef USE_DNSCRYPT
2718         c->dnscrypt = 0;
2719         c->dnscrypt_buffer = c->buffer;
2720 #endif
2721         c->repinfo.c = c;
2722         c->callback = callback;
2723         c->cb_arg = callback_arg;
2724         evbits = UB_EV_PERSIST | UB_EV_WRITE;
2725         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2726                 comm_point_http_handle_callback, c);
2727         if(c->ev->ev == NULL)
2728         {
2729                 log_err("could not baseset tcpout event");
2730 #ifdef HAVE_SSL
2731                 SSL_free(c->ssl);
2732 #endif
2733                 sldns_buffer_free(c->buffer);
2734                 free(c->ev);
2735                 free(c);
2736                 return NULL;
2737         }
2738
2739         return c;
2740 }
2741
2742 struct comm_point* 
2743 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
2744         comm_point_callback_type* callback, void* callback_arg)
2745 {
2746         struct comm_point* c = (struct comm_point*)calloc(1,
2747                 sizeof(struct comm_point));
2748         short evbits;
2749         if(!c)
2750                 return NULL;
2751         c->ev = (struct internal_event*)calloc(1,
2752                 sizeof(struct internal_event));
2753         if(!c->ev) {
2754                 free(c);
2755                 return NULL;
2756         }
2757         c->ev->base = base;
2758         c->fd = fd;
2759         c->buffer = sldns_buffer_new(bufsize);
2760         if(!c->buffer) {
2761                 free(c->ev);
2762                 free(c);
2763                 return NULL;
2764         }
2765         c->timeout = NULL;
2766         c->tcp_is_reading = 1;
2767         c->tcp_byte_count = 0;
2768         c->tcp_parent = NULL;
2769         c->max_tcp_count = 0;
2770         c->cur_tcp_count = 0;
2771         c->tcp_handlers = NULL;
2772         c->tcp_free = NULL;
2773         c->type = comm_local;
2774         c->tcp_do_close = 0;
2775         c->do_not_close = 1;
2776         c->tcp_do_toggle_rw = 0;
2777         c->tcp_check_nb_connect = 0;
2778 #ifdef USE_MSG_FASTOPEN
2779         c->tcp_do_fastopen = 0;
2780 #endif
2781 #ifdef USE_DNSCRYPT
2782         c->dnscrypt = 0;
2783         c->dnscrypt_buffer = c->buffer;
2784 #endif
2785         c->callback = callback;
2786         c->cb_arg = callback_arg;
2787         /* ub_event stuff */
2788         evbits = UB_EV_PERSIST | UB_EV_READ;
2789         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2790                 comm_point_local_handle_callback, c);
2791         if(c->ev->ev == NULL) {
2792                 log_err("could not baseset localhdl event");
2793                 free(c->ev);
2794                 free(c);
2795                 return NULL;
2796         }
2797         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2798                 log_err("could not add localhdl event");
2799                 ub_event_free(c->ev->ev);
2800                 free(c->ev);
2801                 free(c);
2802                 return NULL;
2803         }
2804         return c;
2805 }
2806
2807 struct comm_point* 
2808 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
2809         comm_point_callback_type* callback, void* callback_arg)
2810 {
2811         struct comm_point* c = (struct comm_point*)calloc(1,
2812                 sizeof(struct comm_point));
2813         short evbits;
2814         if(!c)
2815                 return NULL;
2816         c->ev = (struct internal_event*)calloc(1,
2817                 sizeof(struct internal_event));
2818         if(!c->ev) {
2819                 free(c);
2820                 return NULL;
2821         }
2822         c->ev->base = base;
2823         c->fd = fd;
2824         c->buffer = NULL;
2825         c->timeout = NULL;
2826         c->tcp_is_reading = 0;
2827         c->tcp_byte_count = 0;
2828         c->tcp_parent = NULL;
2829         c->max_tcp_count = 0;
2830         c->cur_tcp_count = 0;
2831         c->tcp_handlers = NULL;
2832         c->tcp_free = NULL;
2833         c->type = comm_raw;
2834         c->tcp_do_close = 0;
2835         c->do_not_close = 1;
2836         c->tcp_do_toggle_rw = 0;
2837         c->tcp_check_nb_connect = 0;
2838 #ifdef USE_MSG_FASTOPEN
2839         c->tcp_do_fastopen = 0;
2840 #endif
2841 #ifdef USE_DNSCRYPT
2842         c->dnscrypt = 0;
2843         c->dnscrypt_buffer = c->buffer;
2844 #endif
2845         c->callback = callback;
2846         c->cb_arg = callback_arg;
2847         /* ub_event stuff */
2848         if(writing)
2849                 evbits = UB_EV_PERSIST | UB_EV_WRITE;
2850         else    evbits = UB_EV_PERSIST | UB_EV_READ;
2851         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
2852                 comm_point_raw_handle_callback, c);
2853         if(c->ev->ev == NULL) {
2854                 log_err("could not baseset rawhdl event");
2855                 free(c->ev);
2856                 free(c);
2857                 return NULL;
2858         }
2859         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
2860                 log_err("could not add rawhdl event");
2861                 ub_event_free(c->ev->ev);
2862                 free(c->ev);
2863                 free(c);
2864                 return NULL;
2865         }
2866         return c;
2867 }
2868
2869 void 
2870 comm_point_close(struct comm_point* c)
2871 {
2872         if(!c)
2873                 return;
2874         if(c->fd != -1)
2875                 if(ub_event_del(c->ev->ev) != 0) {
2876                         log_err("could not event_del on close");
2877                 }
2878         /* close fd after removing from event lists, or epoll.. is messed up */
2879         if(c->fd != -1 && !c->do_not_close) {
2880                 verbose(VERB_ALGO, "close fd %d", c->fd);
2881 #ifndef USE_WINSOCK
2882                 close(c->fd);
2883 #else
2884                 closesocket(c->fd);
2885 #endif
2886         }
2887         c->fd = -1;
2888 }
2889
2890 void 
2891 comm_point_delete(struct comm_point* c)
2892 {
2893         if(!c) 
2894                 return;
2895         if((c->type == comm_tcp || c->type == comm_http) && c->ssl) {
2896 #ifdef HAVE_SSL
2897                 SSL_shutdown(c->ssl);
2898                 SSL_free(c->ssl);
2899 #endif
2900         }
2901         comm_point_close(c);
2902         if(c->tcp_handlers) {
2903                 int i;
2904                 for(i=0; i<c->max_tcp_count; i++)
2905                         comm_point_delete(c->tcp_handlers[i]);
2906                 free(c->tcp_handlers);
2907         }
2908         free(c->timeout);
2909         if(c->type == comm_tcp || c->type == comm_local || c->type == comm_http) {
2910                 sldns_buffer_free(c->buffer);
2911 #ifdef USE_DNSCRYPT
2912                 if(c->dnscrypt && c->dnscrypt_buffer != c->buffer) {
2913                         sldns_buffer_free(c->dnscrypt_buffer);
2914                 }
2915 #endif
2916         }
2917         ub_event_free(c->ev->ev);
2918         free(c->ev);
2919         free(c);
2920 }
2921
2922 void 
2923 comm_point_send_reply(struct comm_reply *repinfo)
2924 {
2925         struct sldns_buffer* buffer;
2926         log_assert(repinfo && repinfo->c);
2927 #ifdef USE_DNSCRYPT
2928         buffer = repinfo->c->dnscrypt_buffer;
2929         if(!dnsc_handle_uncurved_request(repinfo)) {
2930                 return;
2931         }
2932 #else
2933         buffer = repinfo->c->buffer;
2934 #endif
2935         if(repinfo->c->type == comm_udp) {
2936                 if(repinfo->srctype)
2937                         comm_point_send_udp_msg_if(repinfo->c, 
2938                         buffer, (struct sockaddr*)&repinfo->addr, 
2939                         repinfo->addrlen, repinfo);
2940                 else
2941                         comm_point_send_udp_msg(repinfo->c, buffer,
2942                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
2943 #ifdef USE_DNSTAP
2944                 if(repinfo->c->dtenv != NULL &&
2945                    repinfo->c->dtenv->log_client_response_messages)
2946                         dt_msg_send_client_response(repinfo->c->dtenv,
2947                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2948 #endif
2949         } else {
2950 #ifdef USE_DNSTAP
2951                 if(repinfo->c->tcp_parent->dtenv != NULL &&
2952                    repinfo->c->tcp_parent->dtenv->log_client_response_messages)
2953                         dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
2954                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2955 #endif
2956                 comm_point_start_listening(repinfo->c, -1,
2957                         repinfo->c->tcp_timeout_msec);
2958         }
2959 }
2960
2961 void 
2962 comm_point_drop_reply(struct comm_reply* repinfo)
2963 {
2964         if(!repinfo)
2965                 return;
2966         log_assert(repinfo && repinfo->c);
2967         log_assert(repinfo->c->type != comm_tcp_accept);
2968         if(repinfo->c->type == comm_udp)
2969                 return;
2970         reclaim_tcp_handler(repinfo->c);
2971 }
2972
2973 void 
2974 comm_point_stop_listening(struct comm_point* c)
2975 {
2976         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
2977         if(ub_event_del(c->ev->ev) != 0) {
2978                 log_err("event_del error to stoplisten");
2979         }
2980 }
2981
2982 void 
2983 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
2984 {
2985         verbose(VERB_ALGO, "comm point start listening %d", 
2986                 c->fd==-1?newfd:c->fd);
2987         if(c->type == comm_tcp_accept && !c->tcp_free) {
2988                 /* no use to start listening no free slots. */
2989                 return;
2990         }
2991         if(msec != -1 && msec != 0) {
2992                 if(!c->timeout) {
2993                         c->timeout = (struct timeval*)malloc(sizeof(
2994                                 struct timeval));
2995                         if(!c->timeout) {
2996                                 log_err("cpsl: malloc failed. No net read.");
2997                                 return;
2998                         }
2999                 }
3000                 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
3001 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
3002                 c->timeout->tv_sec = msec/1000;
3003                 c->timeout->tv_usec = (msec%1000)*1000;
3004 #endif /* S_SPLINT_S */
3005         }
3006         if(c->type == comm_tcp || c->type == comm_http) {
3007                 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
3008                 if(c->tcp_is_reading)
3009                         ub_event_add_bits(c->ev->ev, UB_EV_READ);
3010                 else    ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
3011         }
3012         if(newfd != -1) {
3013                 if(c->fd != -1) {
3014 #ifndef USE_WINSOCK
3015                         close(c->fd);
3016 #else
3017                         closesocket(c->fd);
3018 #endif
3019                 }
3020                 c->fd = newfd;
3021                 ub_event_set_fd(c->ev->ev, c->fd);
3022         }
3023         if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
3024                 log_err("event_add failed. in cpsl.");
3025         }
3026 }
3027
3028 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
3029 {
3030         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
3031         if(ub_event_del(c->ev->ev) != 0) {
3032                 log_err("event_del error to cplf");
3033         }
3034         ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
3035         if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
3036         if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
3037         if(ub_event_add(c->ev->ev, c->timeout) != 0) {
3038                 log_err("event_add failed. in cplf.");
3039         }
3040 }
3041
3042 size_t comm_point_get_mem(struct comm_point* c)
3043 {
3044         size_t s;
3045         if(!c) 
3046                 return 0;
3047         s = sizeof(*c) + sizeof(*c->ev);
3048         if(c->timeout) 
3049                 s += sizeof(*c->timeout);
3050         if(c->type == comm_tcp || c->type == comm_local) {
3051                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
3052 #ifdef USE_DNSCRYPT
3053                 s += sizeof(*c->dnscrypt_buffer);
3054                 if(c->buffer != c->dnscrypt_buffer) {
3055                         s += sldns_buffer_capacity(c->dnscrypt_buffer);
3056                 }
3057 #endif
3058         }
3059         if(c->type == comm_tcp_accept) {
3060                 int i;
3061                 for(i=0; i<c->max_tcp_count; i++)
3062                         s += comm_point_get_mem(c->tcp_handlers[i]);
3063         }
3064         return s;
3065 }
3066
3067 struct comm_timer* 
3068 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
3069 {
3070         struct internal_timer *tm = (struct internal_timer*)calloc(1,
3071                 sizeof(struct internal_timer));
3072         if(!tm) {
3073                 log_err("malloc failed");
3074                 return NULL;
3075         }
3076         tm->super.ev_timer = tm;
3077         tm->base = base;
3078         tm->super.callback = cb;
3079         tm->super.cb_arg = cb_arg;
3080         tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 
3081                 comm_timer_callback, &tm->super);
3082         if(tm->ev == NULL) {
3083                 log_err("timer_create: event_base_set failed.");
3084                 free(tm);
3085                 return NULL;
3086         }
3087         return &tm->super;
3088 }
3089
3090 void 
3091 comm_timer_disable(struct comm_timer* timer)
3092 {
3093         if(!timer)
3094                 return;
3095         ub_timer_del(timer->ev_timer->ev);
3096         timer->ev_timer->enabled = 0;
3097 }
3098
3099 void 
3100 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
3101 {
3102         log_assert(tv);
3103         if(timer->ev_timer->enabled)
3104                 comm_timer_disable(timer);
3105         if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
3106                 comm_timer_callback, timer, tv) != 0)
3107                 log_err("comm_timer_set: evtimer_add failed.");
3108         timer->ev_timer->enabled = 1;
3109 }
3110
3111 void 
3112 comm_timer_delete(struct comm_timer* timer)
3113 {
3114         if(!timer)
3115                 return;
3116         comm_timer_disable(timer);
3117         /* Free the sub struct timer->ev_timer derived from the super struct timer.
3118          * i.e. assert(timer == timer->ev_timer)
3119          */
3120         ub_event_free(timer->ev_timer->ev);
3121         free(timer->ev_timer);
3122 }
3123
3124 void 
3125 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
3126 {
3127         struct comm_timer* tm = (struct comm_timer*)arg;
3128         if(!(event&UB_EV_TIMEOUT))
3129                 return;
3130         ub_comm_base_now(tm->ev_timer->base);
3131         tm->ev_timer->enabled = 0;
3132         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
3133         (*tm->callback)(tm->cb_arg);
3134 }
3135
3136 int 
3137 comm_timer_is_set(struct comm_timer* timer)
3138 {
3139         return (int)timer->ev_timer->enabled;
3140 }
3141
3142 size_t 
3143 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer))
3144 {
3145         return sizeof(struct internal_timer);
3146 }
3147
3148 struct comm_signal* 
3149 comm_signal_create(struct comm_base* base,
3150         void (*callback)(int, void*), void* cb_arg)
3151 {
3152         struct comm_signal* com = (struct comm_signal*)malloc(
3153                 sizeof(struct comm_signal));
3154         if(!com) {
3155                 log_err("malloc failed");
3156                 return NULL;
3157         }
3158         com->base = base;
3159         com->callback = callback;
3160         com->cb_arg = cb_arg;
3161         com->ev_signal = NULL;
3162         return com;
3163 }
3164
3165 void 
3166 comm_signal_callback(int sig, short event, void* arg)
3167 {
3168         struct comm_signal* comsig = (struct comm_signal*)arg;
3169         if(!(event & UB_EV_SIGNAL))
3170                 return;
3171         ub_comm_base_now(comsig->base);
3172         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
3173         (*comsig->callback)(sig, comsig->cb_arg);
3174 }
3175
3176 int 
3177 comm_signal_bind(struct comm_signal* comsig, int sig)
3178 {
3179         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
3180                 sizeof(struct internal_signal));
3181         if(!entry) {
3182                 log_err("malloc failed");
3183                 return 0;
3184         }
3185         log_assert(comsig);
3186         /* add signal event */
3187         entry->ev = ub_signal_new(comsig->base->eb->base, sig,
3188                 comm_signal_callback, comsig);
3189         if(entry->ev == NULL) {
3190                 log_err("Could not create signal event");
3191                 free(entry);
3192                 return 0;
3193         }
3194         if(ub_signal_add(entry->ev, NULL) != 0) {
3195                 log_err("Could not add signal handler");
3196                 ub_event_free(entry->ev);
3197                 free(entry);
3198                 return 0;
3199         }
3200         /* link into list */
3201         entry->next = comsig->ev_signal;
3202         comsig->ev_signal = entry;
3203         return 1;
3204 }
3205
3206 void 
3207 comm_signal_delete(struct comm_signal* comsig)
3208 {
3209         struct internal_signal* p, *np;
3210         if(!comsig)
3211                 return;
3212         p=comsig->ev_signal;
3213         while(p) {
3214                 np = p->next;
3215                 ub_signal_del(p->ev);
3216                 ub_event_free(p->ev);
3217                 free(p);
3218                 p = np;
3219         }
3220         free(comsig);
3221 }