]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - contrib/unbound/util/netevent.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/log.h"
44 #include "util/net_help.h"
45 #include "util/fptr_wlist.h"
46 #include "ldns/pkthdr.h"
47 #include "ldns/sbuffer.h"
48 #include "dnstap/dnstap.h"
49 #ifdef HAVE_OPENSSL_SSL_H
50 #include <openssl/ssl.h>
51 #endif
52 #ifdef HAVE_OPENSSL_ERR_H
53 #include <openssl/err.h>
54 #endif
55
56 /* -------- Start of local definitions -------- */
57 /** if CMSG_ALIGN is not defined on this platform, a workaround */
58 #ifndef CMSG_ALIGN
59 #  ifdef _CMSG_DATA_ALIGN
60 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
61 #  else
62 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
63 #  endif
64 #endif
65
66 /** if CMSG_LEN is not defined on this platform, a workaround */
67 #ifndef CMSG_LEN
68 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
69 #endif
70
71 /** if CMSG_SPACE is not defined on this platform, a workaround */
72 #ifndef CMSG_SPACE
73 #  ifdef _CMSG_HDR_ALIGN
74 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
75 #  else
76 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
77 #  endif
78 #endif
79
80 /** The TCP reading or writing query timeout in seconds */
81 #define TCP_QUERY_TIMEOUT 120 
82
83 #ifndef NONBLOCKING_IS_BROKEN
84 /** number of UDP reads to perform per read indication from select */
85 #define NUM_UDP_PER_SELECT 100
86 #else
87 #define NUM_UDP_PER_SELECT 1
88 #endif
89
90 /* We define libevent structures here to hide the libevent stuff. */
91
92 #ifdef USE_MINI_EVENT
93 #  ifdef USE_WINSOCK
94 #    include "util/winsock_event.h"
95 #  else
96 #    include "util/mini_event.h"
97 #  endif /* USE_WINSOCK */
98 #else /* USE_MINI_EVENT */
99    /* we use libevent */
100 #  ifdef HAVE_EVENT_H
101 #    include <event.h>
102 #  else
103 #    include "event2/event.h"
104 #    include "event2/event_struct.h"
105 #    include "event2/event_compat.h"
106 #  endif
107 #endif /* USE_MINI_EVENT */
108
109 /**
110  * The internal event structure for keeping libevent info for the event.
111  * Possibly other structures (list, tree) this is part of.
112  */
113 struct internal_event {
114         /** the comm base */
115         struct comm_base* base;
116         /** libevent event type, alloced here */
117         struct event ev;
118 };
119
120 /**
121  * Internal base structure, so that every thread has its own events.
122  */
123 struct internal_base {
124         /** libevent event_base type. */
125         struct event_base* base;
126         /** seconds time pointer points here */
127         time_t secs;
128         /** timeval with current time */
129         struct timeval now;
130         /** the event used for slow_accept timeouts */
131         struct event slow_accept;
132         /** true if slow_accept is enabled */
133         int slow_accept_enabled;
134 };
135
136 /**
137  * Internal timer structure, to store timer event in.
138  */
139 struct internal_timer {
140         /** the comm base */
141         struct comm_base* base;
142         /** libevent event type, alloced here */
143         struct event ev;
144         /** is timer enabled */
145         uint8_t enabled;
146 };
147
148 /**
149  * Internal signal structure, to store signal event in.
150  */
151 struct internal_signal {
152         /** libevent event type, alloced here */
153         struct event ev;
154         /** next in signal list */
155         struct internal_signal* next;
156 };
157
158 /** create a tcp handler with a parent */
159 static struct comm_point* comm_point_create_tcp_handler(
160         struct comm_base *base, struct comm_point* parent, size_t bufsize,
161         comm_point_callback_t* callback, void* callback_arg);
162
163 /* -------- End of local definitions -------- */
164
165 #ifdef USE_MINI_EVENT
166 /** minievent updates the time when it blocks. */
167 #define comm_base_now(x) /* nothing to do */
168 #else /* !USE_MINI_EVENT */
169 /** fillup the time values in the event base */
170 static void
171 comm_base_now(struct comm_base* b)
172 {
173         if(gettimeofday(&b->eb->now, NULL) < 0) {
174                 log_err("gettimeofday: %s", strerror(errno));
175         }
176         b->eb->secs = (time_t)b->eb->now.tv_sec;
177 }
178 #endif /* USE_MINI_EVENT */
179
180 struct comm_base* 
181 comm_base_create(int sigs)
182 {
183         struct comm_base* b = (struct comm_base*)calloc(1,
184                 sizeof(struct comm_base));
185         if(!b)
186                 return NULL;
187         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
188         if(!b->eb) {
189                 free(b);
190                 return NULL;
191         }
192 #ifdef USE_MINI_EVENT
193         (void)sigs;
194         /* use mini event time-sharing feature */
195         b->eb->base = event_init(&b->eb->secs, &b->eb->now);
196 #else
197 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
198         /* libev */
199         if(sigs)
200                 b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
201         else
202                 b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
203 #  else
204         (void)sigs;
205 #    ifdef HAVE_EVENT_BASE_NEW
206         b->eb->base = event_base_new();
207 #    else
208         b->eb->base = event_init();
209 #    endif
210 #  endif
211 #endif
212         if(!b->eb->base) {
213                 free(b->eb);
214                 free(b);
215                 return NULL;
216         }
217         comm_base_now(b);
218         /* avoid event_get_method call which causes crashes even when
219          * not printing, because its result is passed */
220         verbose(VERB_ALGO, 
221 #if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
222                 "libev"
223 #elif defined(USE_MINI_EVENT)
224                 "event "
225 #else
226                 "libevent "
227 #endif
228                 "%s uses %s method.", 
229                 event_get_version(), 
230 #ifdef HAVE_EVENT_BASE_GET_METHOD
231                 event_base_get_method(b->eb->base)
232 #else
233                 "not_obtainable"
234 #endif
235         );
236         return b;
237 }
238
239 struct comm_base*
240 comm_base_create_event(struct event_base* base)
241 {
242         struct comm_base* b = (struct comm_base*)calloc(1,
243                 sizeof(struct comm_base));
244         if(!b)
245                 return NULL;
246         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
247         if(!b->eb) {
248                 free(b);
249                 return NULL;
250         }
251         b->eb->base = base;
252         comm_base_now(b);
253         return b;
254 }
255
256 void 
257 comm_base_delete(struct comm_base* b)
258 {
259         if(!b)
260                 return;
261         if(b->eb->slow_accept_enabled) {
262                 if(event_del(&b->eb->slow_accept) != 0) {
263                         log_err("could not event_del slow_accept");
264                 }
265         }
266 #ifdef USE_MINI_EVENT
267         event_base_free(b->eb->base);
268 #elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
269         /* only libevent 1.2+ has it, but in 1.2 it is broken - 
270            assertion fails on signal handling ev that is not deleted
271            in libevent 1.3c (event_base_once appears) this is fixed. */
272         event_base_free(b->eb->base);
273 #endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
274         b->eb->base = NULL;
275         free(b->eb);
276         free(b);
277 }
278
279 void 
280 comm_base_delete_no_base(struct comm_base* b)
281 {
282         if(!b)
283                 return;
284         if(b->eb->slow_accept_enabled) {
285                 if(event_del(&b->eb->slow_accept) != 0) {
286                         log_err("could not event_del slow_accept");
287                 }
288         }
289         b->eb->base = NULL;
290         free(b->eb);
291         free(b);
292 }
293
294 void 
295 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
296 {
297         *tt = &b->eb->secs;
298         *tv = &b->eb->now;
299 }
300
301 void 
302 comm_base_dispatch(struct comm_base* b)
303 {
304         int retval;
305         retval = event_base_dispatch(b->eb->base);
306         if(retval != 0) {
307                 fatal_exit("event_dispatch returned error %d, "
308                         "errno is %s", retval, strerror(errno));
309         }
310 }
311
312 void comm_base_exit(struct comm_base* b)
313 {
314         if(event_base_loopexit(b->eb->base, NULL) != 0) {
315                 log_err("Could not loopexit");
316         }
317 }
318
319 void comm_base_set_slow_accept_handlers(struct comm_base* b,
320         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
321 {
322         b->stop_accept = stop_acc;
323         b->start_accept = start_acc;
324         b->cb_arg = arg;
325 }
326
327 struct event_base* comm_base_internal(struct comm_base* b)
328 {
329         return b->eb->base;
330 }
331
332 /** see if errno for udp has to be logged or not uses globals */
333 static int
334 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
335 {
336         /* do not log transient errors (unless high verbosity) */
337 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
338         switch(errno) {
339 #  ifdef ENETUNREACH
340                 case ENETUNREACH:
341 #  endif
342 #  ifdef EHOSTDOWN
343                 case EHOSTDOWN:
344 #  endif
345 #  ifdef EHOSTUNREACH
346                 case EHOSTUNREACH:
347 #  endif
348 #  ifdef ENETDOWN
349                 case ENETDOWN:
350 #  endif
351                         if(verbosity < VERB_ALGO)
352                                 return 0;
353                 default:
354                         break;
355         }
356 #endif
357         /* permission denied is gotten for every send if the
358          * network is disconnected (on some OS), squelch it */
359         if(errno == EPERM && verbosity < VERB_DETAIL)
360                 return 0;
361         /* squelch errors where people deploy AAAA ::ffff:bla for
362          * authority servers, which we try for intranets. */
363         if(errno == EINVAL && addr_is_ip4mapped(
364                 (struct sockaddr_storage*)addr, addrlen) &&
365                 verbosity < VERB_DETAIL)
366                 return 0;
367         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
368          * but a dns cache does not need it. */
369         if(errno == EACCES && addr_is_broadcast(
370                 (struct sockaddr_storage*)addr, addrlen) &&
371                 verbosity < VERB_DETAIL)
372                 return 0;
373         return 1;
374 }
375
376 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
377 {
378         return udp_send_errno_needs_log(addr, addrlen);
379 }
380
381 /* send a UDP reply */
382 int
383 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
384         struct sockaddr* addr, socklen_t addrlen) 
385 {
386         ssize_t sent;
387         log_assert(c->fd != -1);
388 #ifdef UNBOUND_DEBUG
389         if(sldns_buffer_remaining(packet) == 0)
390                 log_err("error: send empty UDP packet");
391 #endif
392         log_assert(addr && addrlen > 0);
393         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
394                 sldns_buffer_remaining(packet), 0,
395                 addr, addrlen);
396         if(sent == -1) {
397                 if(!udp_send_errno_needs_log(addr, addrlen))
398                         return 0;
399 #ifndef USE_WINSOCK
400                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
401 #else
402                 verbose(VERB_OPS, "sendto failed: %s", 
403                         wsa_strerror(WSAGetLastError()));
404 #endif
405                 log_addr(VERB_OPS, "remote address is", 
406                         (struct sockaddr_storage*)addr, addrlen);
407                 return 0;
408         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
409                 log_err("sent %d in place of %d bytes", 
410                         (int)sent, (int)sldns_buffer_remaining(packet));
411                 return 0;
412         }
413         return 1;
414 }
415
416 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
417 /** print debug ancillary info */
418 static void p_ancil(const char* str, struct comm_reply* r)
419 {
420         if(r->srctype != 4 && r->srctype != 6) {
421                 log_info("%s: unknown srctype %d", str, r->srctype);
422                 return;
423         }
424         if(r->srctype == 6) {
425                 char buf[1024];
426                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
427                         buf, (socklen_t)sizeof(buf)) == 0) {
428                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
429                 }
430                 buf[sizeof(buf)-1]=0;
431                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
432         } else if(r->srctype == 4) {
433 #ifdef IP_PKTINFO
434                 char buf1[1024], buf2[1024];
435                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
436                         buf1, (socklen_t)sizeof(buf1)) == 0) {
437                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
438                 }
439                 buf1[sizeof(buf1)-1]=0;
440 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
441                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
442                         buf2, (socklen_t)sizeof(buf2)) == 0) {
443                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
444                 }
445                 buf2[sizeof(buf2)-1]=0;
446 #else
447                 buf2[0]=0;
448 #endif
449                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
450                         buf1, buf2);
451 #elif defined(IP_RECVDSTADDR)
452                 char buf1[1024];
453                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
454                         buf1, (socklen_t)sizeof(buf1)) == 0) {
455                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
456                 }
457                 buf1[sizeof(buf1)-1]=0;
458                 log_info("%s: %s", str, buf1);
459 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
460         }
461 }
462 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
463
464 /** send a UDP reply over specified interface*/
465 static int
466 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
467         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
468 {
469 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
470         ssize_t sent;
471         struct msghdr msg;
472         struct iovec iov[1];
473         char control[256];
474 #ifndef S_SPLINT_S
475         struct cmsghdr *cmsg;
476 #endif /* S_SPLINT_S */
477
478         log_assert(c->fd != -1);
479 #ifdef UNBOUND_DEBUG
480         if(sldns_buffer_remaining(packet) == 0)
481                 log_err("error: send empty UDP packet");
482 #endif
483         log_assert(addr && addrlen > 0);
484
485         msg.msg_name = addr;
486         msg.msg_namelen = addrlen;
487         iov[0].iov_base = sldns_buffer_begin(packet);
488         iov[0].iov_len = sldns_buffer_remaining(packet);
489         msg.msg_iov = iov;
490         msg.msg_iovlen = 1;
491         msg.msg_control = control;
492 #ifndef S_SPLINT_S
493         msg.msg_controllen = sizeof(control);
494 #endif /* S_SPLINT_S */
495         msg.msg_flags = 0;
496
497 #ifndef S_SPLINT_S
498         cmsg = CMSG_FIRSTHDR(&msg);
499         if(r->srctype == 4) {
500 #ifdef IP_PKTINFO
501                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
502                 log_assert(msg.msg_controllen <= sizeof(control));
503                 cmsg->cmsg_level = IPPROTO_IP;
504                 cmsg->cmsg_type = IP_PKTINFO;
505                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
506                         sizeof(struct in_pktinfo));
507                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
508 #elif defined(IP_SENDSRCADDR)
509                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
510                 log_assert(msg.msg_controllen <= sizeof(control));
511                 cmsg->cmsg_level = IPPROTO_IP;
512                 cmsg->cmsg_type = IP_SENDSRCADDR;
513                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
514                         sizeof(struct in_addr));
515                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
516 #else
517                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
518                 msg.msg_control = NULL;
519 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
520         } else if(r->srctype == 6) {
521                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
522                 log_assert(msg.msg_controllen <= sizeof(control));
523                 cmsg->cmsg_level = IPPROTO_IPV6;
524                 cmsg->cmsg_type = IPV6_PKTINFO;
525                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
526                         sizeof(struct in6_pktinfo));
527                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
528         } else {
529                 /* try to pass all 0 to use default route */
530                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
531                 log_assert(msg.msg_controllen <= sizeof(control));
532                 cmsg->cmsg_level = IPPROTO_IPV6;
533                 cmsg->cmsg_type = IPV6_PKTINFO;
534                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
535                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
536         }
537 #endif /* S_SPLINT_S */
538         if(verbosity >= VERB_ALGO)
539                 p_ancil("send_udp over interface", r);
540         sent = sendmsg(c->fd, &msg, 0);
541         if(sent == -1) {
542                 if(!udp_send_errno_needs_log(addr, addrlen))
543                         return 0;
544                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
545                 log_addr(VERB_OPS, "remote address is", 
546                         (struct sockaddr_storage*)addr, addrlen);
547                 return 0;
548         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
549                 log_err("sent %d in place of %d bytes", 
550                         (int)sent, (int)sldns_buffer_remaining(packet));
551                 return 0;
552         }
553         return 1;
554 #else
555         (void)c;
556         (void)packet;
557         (void)addr;
558         (void)addrlen;
559         (void)r;
560         log_err("sendmsg: IPV6_PKTINFO not supported");
561         return 0;
562 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
563 }
564
565 void 
566 comm_point_udp_ancil_callback(int fd, short event, void* arg)
567 {
568 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
569         struct comm_reply rep;
570         struct msghdr msg;
571         struct iovec iov[1];
572         ssize_t rcv;
573         char ancil[256];
574         int i;
575 #ifndef S_SPLINT_S
576         struct cmsghdr* cmsg;
577 #endif /* S_SPLINT_S */
578
579         rep.c = (struct comm_point*)arg;
580         log_assert(rep.c->type == comm_udp);
581
582         if(!(event&EV_READ))
583                 return;
584         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
585         comm_base_now(rep.c->ev->base);
586         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
587                 sldns_buffer_clear(rep.c->buffer);
588                 rep.addrlen = (socklen_t)sizeof(rep.addr);
589                 log_assert(fd != -1);
590                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
591                 msg.msg_name = &rep.addr;
592                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
593                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
594                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
595                 msg.msg_iov = iov;
596                 msg.msg_iovlen = 1;
597                 msg.msg_control = ancil;
598 #ifndef S_SPLINT_S
599                 msg.msg_controllen = sizeof(ancil);
600 #endif /* S_SPLINT_S */
601                 msg.msg_flags = 0;
602                 rcv = recvmsg(fd, &msg, 0);
603                 if(rcv == -1) {
604                         if(errno != EAGAIN && errno != EINTR) {
605                                 log_err("recvmsg failed: %s", strerror(errno));
606                         }
607                         return;
608                 }
609                 rep.addrlen = msg.msg_namelen;
610                 sldns_buffer_skip(rep.c->buffer, rcv);
611                 sldns_buffer_flip(rep.c->buffer);
612                 rep.srctype = 0;
613 #ifndef S_SPLINT_S
614                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
615                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
616                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
617                                 cmsg->cmsg_type == IPV6_PKTINFO) {
618                                 rep.srctype = 6;
619                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
620                                         sizeof(struct in6_pktinfo));
621                                 break;
622 #ifdef IP_PKTINFO
623                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
624                                 cmsg->cmsg_type == IP_PKTINFO) {
625                                 rep.srctype = 4;
626                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
627                                         sizeof(struct in_pktinfo));
628                                 break;
629 #elif defined(IP_RECVDSTADDR)
630                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
631                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
632                                 rep.srctype = 4;
633                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
634                                         sizeof(struct in_addr));
635                                 break;
636 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
637                         }
638                 }
639                 if(verbosity >= VERB_ALGO)
640                         p_ancil("receive_udp on interface", &rep);
641 #endif /* S_SPLINT_S */
642                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
643                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
644                         /* send back immediate reply */
645                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
646                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
647                 }
648                 if(rep.c->fd == -1) /* commpoint closed */
649                         break;
650         }
651 #else
652         (void)fd;
653         (void)event;
654         (void)arg;
655         fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
656                 "Please disable interface-automatic");
657 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
658 }
659
660 void 
661 comm_point_udp_callback(int fd, short event, void* arg)
662 {
663         struct comm_reply rep;
664         ssize_t rcv;
665         int i;
666
667         rep.c = (struct comm_point*)arg;
668         log_assert(rep.c->type == comm_udp);
669
670         if(!(event&EV_READ))
671                 return;
672         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
673         comm_base_now(rep.c->ev->base);
674         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
675                 sldns_buffer_clear(rep.c->buffer);
676                 rep.addrlen = (socklen_t)sizeof(rep.addr);
677                 log_assert(fd != -1);
678                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
679                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
680                         sldns_buffer_remaining(rep.c->buffer), 0, 
681                         (struct sockaddr*)&rep.addr, &rep.addrlen);
682                 if(rcv == -1) {
683 #ifndef USE_WINSOCK
684                         if(errno != EAGAIN && errno != EINTR)
685                                 log_err("recvfrom %d failed: %s", 
686                                         fd, strerror(errno));
687 #else
688                         if(WSAGetLastError() != WSAEINPROGRESS &&
689                                 WSAGetLastError() != WSAECONNRESET &&
690                                 WSAGetLastError()!= WSAEWOULDBLOCK)
691                                 log_err("recvfrom failed: %s",
692                                         wsa_strerror(WSAGetLastError()));
693 #endif
694                         return;
695                 }
696                 sldns_buffer_skip(rep.c->buffer, rcv);
697                 sldns_buffer_flip(rep.c->buffer);
698                 rep.srctype = 0;
699                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
700                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
701                         /* send back immediate reply */
702                         (void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
703                                 (struct sockaddr*)&rep.addr, rep.addrlen);
704                 }
705                 if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
706                 another UDP port. Note rep.c cannot be reused with TCP fd. */
707                         break;
708         }
709 }
710
711 /** Use a new tcp handler for new query fd, set to read query */
712 static void
713 setup_tcp_handler(struct comm_point* c, int fd) 
714 {
715         log_assert(c->type == comm_tcp);
716         log_assert(c->fd == -1);
717         sldns_buffer_clear(c->buffer);
718         c->tcp_is_reading = 1;
719         c->tcp_byte_count = 0;
720         comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
721 }
722
723 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
724         short ATTR_UNUSED(event), void* arg)
725 {
726         struct comm_base* b = (struct comm_base*)arg;
727         /* timeout for the slow accept, re-enable accepts again */
728         if(b->start_accept) {
729                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
730                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
731                 (*b->start_accept)(b->cb_arg);
732                 b->eb->slow_accept_enabled = 0;
733         }
734 }
735
736 int comm_point_perform_accept(struct comm_point* c,
737         struct sockaddr_storage* addr, socklen_t* addrlen)
738 {
739         int new_fd;
740         *addrlen = (socklen_t)sizeof(*addr);
741         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
742         if(new_fd == -1) {
743 #ifndef USE_WINSOCK
744                 /* EINTR is signal interrupt. others are closed connection. */
745                 if(     errno == EINTR || errno == EAGAIN
746 #ifdef EWOULDBLOCK
747                         || errno == EWOULDBLOCK 
748 #endif
749 #ifdef ECONNABORTED
750                         || errno == ECONNABORTED 
751 #endif
752 #ifdef EPROTO
753                         || errno == EPROTO
754 #endif /* EPROTO */
755                         )
756                         return -1;
757 #if defined(ENFILE) && defined(EMFILE)
758                 if(errno == ENFILE || errno == EMFILE) {
759                         /* out of file descriptors, likely outside of our
760                          * control. stop accept() calls for some time */
761                         if(c->ev->base->stop_accept) {
762                                 struct comm_base* b = c->ev->base;
763                                 struct timeval tv;
764                                 verbose(VERB_ALGO, "out of file descriptors: "
765                                         "slow accept");
766                                 b->eb->slow_accept_enabled = 1;
767                                 fptr_ok(fptr_whitelist_stop_accept(
768                                         b->stop_accept));
769                                 (*b->stop_accept)(b->cb_arg);
770                                 /* set timeout, no mallocs */
771                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
772                                 tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
773                                 event_set(&b->eb->slow_accept, -1, EV_TIMEOUT, 
774                                         comm_base_handle_slow_accept, b);
775                                 if(event_base_set(b->eb->base,
776                                         &b->eb->slow_accept) != 0) {
777                                         /* we do not want to log here, because
778                                          * that would spam the logfiles.
779                                          * error: "event_base_set failed." */
780                                 }
781                                 if(event_add(&b->eb->slow_accept, &tv) != 0) {
782                                         /* we do not want to log here,
783                                          * error: "event_add failed." */
784                                 }
785                         }
786                         return -1;
787                 }
788 #endif
789                 log_err_addr("accept failed", strerror(errno), addr, *addrlen);
790 #else /* USE_WINSOCK */
791                 if(WSAGetLastError() == WSAEINPROGRESS ||
792                         WSAGetLastError() == WSAECONNRESET)
793                         return -1;
794                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
795                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
796                         return -1;
797                 }
798                 log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
799                         addr, *addrlen);
800 #endif
801                 return -1;
802         }
803         fd_set_nonblock(new_fd);
804         return new_fd;
805 }
806
807 #ifdef USE_WINSOCK
808 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
809         int ATTR_UNUSED(argi), long argl, long retvalue)
810 {
811         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
812                 (oper&BIO_CB_RETURN)?"return":"before",
813                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
814                 WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
815         /* on windows, check if previous operation caused EWOULDBLOCK */
816         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
817                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
818                 if(WSAGetLastError() == WSAEWOULDBLOCK)
819                         winsock_tcp_wouldblock((struct event*)
820                                 BIO_get_callback_arg(b), EV_READ);
821         }
822         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
823                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
824                 if(WSAGetLastError() == WSAEWOULDBLOCK)
825                         winsock_tcp_wouldblock((struct event*)
826                                 BIO_get_callback_arg(b), EV_WRITE);
827         }
828         /* return original return value */
829         return retvalue;
830 }
831
832 /** set win bio callbacks for nonblocking operations */
833 void
834 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
835 {
836         SSL* ssl = (SSL*)thessl;
837         /* set them both just in case, but usually they are the same BIO */
838         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
839         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
840         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
841         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
842 }
843 #endif
844
845 void 
846 comm_point_tcp_accept_callback(int fd, short event, void* arg)
847 {
848         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
849         int new_fd;
850         log_assert(c->type == comm_tcp_accept);
851         if(!(event & EV_READ)) {
852                 log_info("ignoring tcp accept event %d", (int)event);
853                 return;
854         }
855         comm_base_now(c->ev->base);
856         /* find free tcp handler. */
857         if(!c->tcp_free) {
858                 log_warn("accepted too many tcp, connections full");
859                 return;
860         }
861         /* accept incoming connection. */
862         c_hdl = c->tcp_free;
863         log_assert(fd != -1);
864         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
865                 &c_hdl->repinfo.addrlen);
866         if(new_fd == -1)
867                 return;
868         if(c->ssl) {
869                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
870                 if(!c_hdl->ssl) {
871                         c_hdl->fd = new_fd;
872                         comm_point_close(c_hdl);
873                         return;
874                 }
875                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
876 #ifdef USE_WINSOCK
877                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
878 #endif
879         }
880
881         /* grab the tcp handler buffers */
882         c->tcp_free = c_hdl->tcp_free;
883         if(!c->tcp_free) {
884                 /* stop accepting incoming queries for now. */
885                 comm_point_stop_listening(c);
886         }
887         /* addr is dropped. Not needed for tcp reply. */
888         setup_tcp_handler(c_hdl, new_fd);
889 }
890
891 /** Make tcp handler free for next assignment */
892 static void
893 reclaim_tcp_handler(struct comm_point* c)
894 {
895         log_assert(c->type == comm_tcp);
896         if(c->ssl) {
897 #ifdef HAVE_SSL
898                 SSL_shutdown(c->ssl);
899                 SSL_free(c->ssl);
900                 c->ssl = NULL;
901 #endif
902         }
903         comm_point_close(c);
904         if(c->tcp_parent) {
905                 c->tcp_free = c->tcp_parent->tcp_free;
906                 c->tcp_parent->tcp_free = c;
907                 if(!c->tcp_free) {
908                         /* re-enable listening on accept socket */
909                         comm_point_start_listening(c->tcp_parent, -1, -1);
910                 }
911         }
912 }
913
914 /** do the callback when writing is done */
915 static void
916 tcp_callback_writer(struct comm_point* c)
917 {
918         log_assert(c->type == comm_tcp);
919         sldns_buffer_clear(c->buffer);
920         if(c->tcp_do_toggle_rw)
921                 c->tcp_is_reading = 1;
922         c->tcp_byte_count = 0;
923         /* switch from listening(write) to listening(read) */
924         comm_point_stop_listening(c);
925         comm_point_start_listening(c, -1, -1);
926 }
927
928 /** do the callback when reading is done */
929 static void
930 tcp_callback_reader(struct comm_point* c)
931 {
932         log_assert(c->type == comm_tcp || c->type == comm_local);
933         sldns_buffer_flip(c->buffer);
934         if(c->tcp_do_toggle_rw)
935                 c->tcp_is_reading = 0;
936         c->tcp_byte_count = 0;
937         if(c->type == comm_tcp)
938                 comm_point_stop_listening(c);
939         fptr_ok(fptr_whitelist_comm_point(c->callback));
940         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
941                 comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
942         }
943 }
944
945 /** continue ssl handshake */
946 #ifdef HAVE_SSL
947 static int
948 ssl_handshake(struct comm_point* c)
949 {
950         int r;
951         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
952                 /* read condition satisfied back to writing */
953                 comm_point_listen_for_rw(c, 1, 1);
954                 c->ssl_shake_state = comm_ssl_shake_none;
955                 return 1;
956         }
957         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
958                 /* write condition satisfied, back to reading */
959                 comm_point_listen_for_rw(c, 1, 0);
960                 c->ssl_shake_state = comm_ssl_shake_none;
961                 return 1;
962         }
963
964         ERR_clear_error();
965         r = SSL_do_handshake(c->ssl);
966         if(r != 1) {
967                 int want = SSL_get_error(c->ssl, r);
968                 if(want == SSL_ERROR_WANT_READ) {
969                         if(c->ssl_shake_state == comm_ssl_shake_read)
970                                 return 1;
971                         c->ssl_shake_state = comm_ssl_shake_read;
972                         comm_point_listen_for_rw(c, 1, 0);
973                         return 1;
974                 } else if(want == SSL_ERROR_WANT_WRITE) {
975                         if(c->ssl_shake_state == comm_ssl_shake_write)
976                                 return 1;
977                         c->ssl_shake_state = comm_ssl_shake_write;
978                         comm_point_listen_for_rw(c, 0, 1);
979                         return 1;
980                 } else if(r == 0) {
981                         return 0; /* closed */
982                 } else if(want == SSL_ERROR_SYSCALL) {
983                         /* SYSCALL and errno==0 means closed uncleanly */
984                         if(errno != 0)
985                                 log_err("SSL_handshake syscall: %s",
986                                         strerror(errno));
987                         return 0;
988                 } else {
989                         log_crypto_err("ssl handshake failed");
990                         log_addr(1, "ssl handshake failed", &c->repinfo.addr,
991                                 c->repinfo.addrlen);
992                         return 0;
993                 }
994         }
995         /* this is where peer verification could take place */
996         log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
997                 c->repinfo.addrlen);
998
999         /* setup listen rw correctly */
1000         if(c->tcp_is_reading) {
1001                 if(c->ssl_shake_state != comm_ssl_shake_read)
1002                         comm_point_listen_for_rw(c, 1, 0);
1003         } else {
1004                 comm_point_listen_for_rw(c, 1, 1);
1005         }
1006         c->ssl_shake_state = comm_ssl_shake_none;
1007         return 1;
1008 }
1009 #endif /* HAVE_SSL */
1010
1011 /** ssl read callback on TCP */
1012 static int
1013 ssl_handle_read(struct comm_point* c)
1014 {
1015 #ifdef HAVE_SSL
1016         int r;
1017         if(c->ssl_shake_state != comm_ssl_shake_none) {
1018                 if(!ssl_handshake(c))
1019                         return 0;
1020                 if(c->ssl_shake_state != comm_ssl_shake_none)
1021                         return 1;
1022         }
1023         if(c->tcp_byte_count < sizeof(uint16_t)) {
1024                 /* read length bytes */
1025                 ERR_clear_error();
1026                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1027                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1028                         c->tcp_byte_count))) <= 0) {
1029                         int want = SSL_get_error(c->ssl, r);
1030                         if(want == SSL_ERROR_ZERO_RETURN) {
1031                                 return 0; /* shutdown, closed */
1032                         } else if(want == SSL_ERROR_WANT_READ) {
1033                                 return 1; /* read more later */
1034                         } else if(want == SSL_ERROR_WANT_WRITE) {
1035                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1036                                 comm_point_listen_for_rw(c, 0, 1);
1037                                 return 1;
1038                         } else if(want == SSL_ERROR_SYSCALL) {
1039                                 if(errno != 0)
1040                                         log_err("SSL_read syscall: %s",
1041                                                 strerror(errno));
1042                                 return 0;
1043                         }
1044                         log_crypto_err("could not SSL_read");
1045                         return 0;
1046                 }
1047                 c->tcp_byte_count += r;
1048                 if(c->tcp_byte_count != sizeof(uint16_t))
1049                         return 1;
1050                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1051                         sldns_buffer_capacity(c->buffer)) {
1052                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1053                         return 0;
1054                 }
1055                 sldns_buffer_set_limit(c->buffer,
1056                         sldns_buffer_read_u16_at(c->buffer, 0));
1057                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1058                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1059                         return 0;
1060                 }
1061                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1062                         (int)sldns_buffer_limit(c->buffer));
1063         }
1064         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1065         ERR_clear_error();
1066         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1067                 (int)sldns_buffer_remaining(c->buffer));
1068         if(r <= 0) {
1069                 int want = SSL_get_error(c->ssl, r);
1070                 if(want == SSL_ERROR_ZERO_RETURN) {
1071                         return 0; /* shutdown, closed */
1072                 } else if(want == SSL_ERROR_WANT_READ) {
1073                         return 1; /* read more later */
1074                 } else if(want == SSL_ERROR_WANT_WRITE) {
1075                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1076                         comm_point_listen_for_rw(c, 0, 1);
1077                         return 1;
1078                 } else if(want == SSL_ERROR_SYSCALL) {
1079                         if(errno != 0)
1080                                 log_err("SSL_read syscall: %s",
1081                                         strerror(errno));
1082                         return 0;
1083                 }
1084                 log_crypto_err("could not SSL_read");
1085                 return 0;
1086         }
1087         sldns_buffer_skip(c->buffer, (ssize_t)r);
1088         if(sldns_buffer_remaining(c->buffer) <= 0) {
1089                 tcp_callback_reader(c);
1090         }
1091         return 1;
1092 #else
1093         (void)c;
1094         return 0;
1095 #endif /* HAVE_SSL */
1096 }
1097
1098 /** ssl write callback on TCP */
1099 static int
1100 ssl_handle_write(struct comm_point* c)
1101 {
1102 #ifdef HAVE_SSL
1103         int r;
1104         if(c->ssl_shake_state != comm_ssl_shake_none) {
1105                 if(!ssl_handshake(c))
1106                         return 0;
1107                 if(c->ssl_shake_state != comm_ssl_shake_none)
1108                         return 1;
1109         }
1110         /* ignore return, if fails we may simply block */
1111         (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1112         if(c->tcp_byte_count < sizeof(uint16_t)) {
1113                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1114                 ERR_clear_error();
1115                 r = SSL_write(c->ssl,
1116                         (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1117                         (int)(sizeof(uint16_t)-c->tcp_byte_count));
1118                 if(r <= 0) {
1119                         int want = SSL_get_error(c->ssl, r);
1120                         if(want == SSL_ERROR_ZERO_RETURN) {
1121                                 return 0; /* closed */
1122                         } else if(want == SSL_ERROR_WANT_READ) {
1123                                 c->ssl_shake_state = comm_ssl_shake_read;
1124                                 comm_point_listen_for_rw(c, 1, 0);
1125                                 return 1; /* wait for read condition */
1126                         } else if(want == SSL_ERROR_WANT_WRITE) {
1127                                 return 1; /* write more later */
1128                         } else if(want == SSL_ERROR_SYSCALL) {
1129                                 if(errno != 0)
1130                                         log_err("SSL_write syscall: %s",
1131                                                 strerror(errno));
1132                                 return 0;
1133                         }
1134                         log_crypto_err("could not SSL_write");
1135                         return 0;
1136                 }
1137                 c->tcp_byte_count += r;
1138                 if(c->tcp_byte_count < sizeof(uint16_t))
1139                         return 1;
1140                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1141                         sizeof(uint16_t));
1142                 if(sldns_buffer_remaining(c->buffer) == 0) {
1143                         tcp_callback_writer(c);
1144                         return 1;
1145                 }
1146         }
1147         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1148         ERR_clear_error();
1149         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1150                 (int)sldns_buffer_remaining(c->buffer));
1151         if(r <= 0) {
1152                 int want = SSL_get_error(c->ssl, r);
1153                 if(want == SSL_ERROR_ZERO_RETURN) {
1154                         return 0; /* closed */
1155                 } else if(want == SSL_ERROR_WANT_READ) {
1156                         c->ssl_shake_state = comm_ssl_shake_read;
1157                         comm_point_listen_for_rw(c, 1, 0);
1158                         return 1; /* wait for read condition */
1159                 } else if(want == SSL_ERROR_WANT_WRITE) {
1160                         return 1; /* write more later */
1161                 } else if(want == SSL_ERROR_SYSCALL) {
1162                         if(errno != 0)
1163                                 log_err("SSL_write syscall: %s",
1164                                         strerror(errno));
1165                         return 0;
1166                 }
1167                 log_crypto_err("could not SSL_write");
1168                 return 0;
1169         }
1170         sldns_buffer_skip(c->buffer, (ssize_t)r);
1171
1172         if(sldns_buffer_remaining(c->buffer) == 0) {
1173                 tcp_callback_writer(c);
1174         }
1175         return 1;
1176 #else
1177         (void)c;
1178         return 0;
1179 #endif /* HAVE_SSL */
1180 }
1181
1182 /** handle ssl tcp connection with dns contents */
1183 static int
1184 ssl_handle_it(struct comm_point* c)
1185 {
1186         if(c->tcp_is_reading)
1187                 return ssl_handle_read(c);
1188         return ssl_handle_write(c);
1189 }
1190
1191 /** Handle tcp reading callback. 
1192  * @param fd: file descriptor of socket.
1193  * @param c: comm point to read from into buffer.
1194  * @param short_ok: if true, very short packets are OK (for comm_local).
1195  * @return: 0 on error 
1196  */
1197 static int
1198 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1199 {
1200         ssize_t r;
1201         log_assert(c->type == comm_tcp || c->type == comm_local);
1202         if(c->ssl)
1203                 return ssl_handle_it(c);
1204         if(!c->tcp_is_reading)
1205                 return 0;
1206
1207         log_assert(fd != -1);
1208         if(c->tcp_byte_count < sizeof(uint16_t)) {
1209                 /* read length bytes */
1210                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1211                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1212                 if(r == 0)
1213                         return 0;
1214                 else if(r == -1) {
1215 #ifndef USE_WINSOCK
1216                         if(errno == EINTR || errno == EAGAIN)
1217                                 return 1;
1218 #ifdef ECONNRESET
1219                         if(errno == ECONNRESET && verbosity < 2)
1220                                 return 0; /* silence reset by peer */
1221 #endif
1222                         log_err_addr("read (in tcp s)", strerror(errno),
1223                                 &c->repinfo.addr, c->repinfo.addrlen);
1224 #else /* USE_WINSOCK */
1225                         if(WSAGetLastError() == WSAECONNRESET)
1226                                 return 0;
1227                         if(WSAGetLastError() == WSAEINPROGRESS)
1228                                 return 1;
1229                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1230                                 winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1231                                 return 1;
1232                         }
1233                         log_err_addr("read (in tcp s)", 
1234                                 wsa_strerror(WSAGetLastError()),
1235                                 &c->repinfo.addr, c->repinfo.addrlen);
1236 #endif
1237                         return 0;
1238                 } 
1239                 c->tcp_byte_count += r;
1240                 if(c->tcp_byte_count != sizeof(uint16_t))
1241                         return 1;
1242                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1243                         sldns_buffer_capacity(c->buffer)) {
1244                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1245                         return 0;
1246                 }
1247                 sldns_buffer_set_limit(c->buffer, 
1248                         sldns_buffer_read_u16_at(c->buffer, 0));
1249                 if(!short_ok && 
1250                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1251                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1252                         return 0;
1253                 }
1254                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1255                         (int)sldns_buffer_limit(c->buffer));
1256         }
1257
1258         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1259         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1260                 sldns_buffer_remaining(c->buffer), 0);
1261         if(r == 0) {
1262                 return 0;
1263         } else if(r == -1) {
1264 #ifndef USE_WINSOCK
1265                 if(errno == EINTR || errno == EAGAIN)
1266                         return 1;
1267                 log_err_addr("read (in tcp r)", strerror(errno),
1268                         &c->repinfo.addr, c->repinfo.addrlen);
1269 #else /* USE_WINSOCK */
1270                 if(WSAGetLastError() == WSAECONNRESET)
1271                         return 0;
1272                 if(WSAGetLastError() == WSAEINPROGRESS)
1273                         return 1;
1274                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1275                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1276                         return 1;
1277                 }
1278                 log_err_addr("read (in tcp r)",
1279                         wsa_strerror(WSAGetLastError()),
1280                         &c->repinfo.addr, c->repinfo.addrlen);
1281 #endif
1282                 return 0;
1283         }
1284         sldns_buffer_skip(c->buffer, r);
1285         if(sldns_buffer_remaining(c->buffer) <= 0) {
1286                 tcp_callback_reader(c);
1287         }
1288         return 1;
1289 }
1290
1291 /** 
1292  * Handle tcp writing callback. 
1293  * @param fd: file descriptor of socket.
1294  * @param c: comm point to write buffer out of.
1295  * @return: 0 on error
1296  */
1297 static int
1298 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1299 {
1300         ssize_t r;
1301         log_assert(c->type == comm_tcp);
1302         if(c->tcp_is_reading && !c->ssl)
1303                 return 0;
1304         log_assert(fd != -1);
1305         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1306                 /* check for pending error from nonblocking connect */
1307                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1308                 int error = 0;
1309                 socklen_t len = (socklen_t)sizeof(error);
1310                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1311                         &len) < 0){
1312 #ifndef USE_WINSOCK
1313                         error = errno; /* on solaris errno is error */
1314 #else /* USE_WINSOCK */
1315                         error = WSAGetLastError();
1316 #endif
1317                 }
1318 #ifndef USE_WINSOCK
1319 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1320                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1321                         return 1; /* try again later */
1322                 else
1323 #endif
1324                 if(error != 0 && verbosity < 2)
1325                         return 0; /* silence lots of chatter in the logs */
1326                 else if(error != 0) {
1327                         log_err_addr("tcp connect", strerror(error),
1328                                 &c->repinfo.addr, c->repinfo.addrlen);
1329 #else /* USE_WINSOCK */
1330                 /* examine error */
1331                 if(error == WSAEINPROGRESS)
1332                         return 1;
1333                 else if(error == WSAEWOULDBLOCK) {
1334                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1335                         return 1;
1336                 } else if(error != 0 && verbosity < 2)
1337                         return 0;
1338                 else if(error != 0) {
1339                         log_err_addr("tcp connect", wsa_strerror(error),
1340                                 &c->repinfo.addr, c->repinfo.addrlen);
1341 #endif /* USE_WINSOCK */
1342                         return 0;
1343                 }
1344         }
1345         if(c->ssl)
1346                 return ssl_handle_it(c);
1347
1348         if(c->tcp_byte_count < sizeof(uint16_t)) {
1349                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1350 #ifdef HAVE_WRITEV
1351                 struct iovec iov[2];
1352                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1353                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1354                 iov[1].iov_base = sldns_buffer_begin(c->buffer);
1355                 iov[1].iov_len = sldns_buffer_limit(c->buffer);
1356                 log_assert(iov[0].iov_len > 0);
1357                 log_assert(iov[1].iov_len > 0);
1358                 r = writev(fd, iov, 2);
1359 #else /* HAVE_WRITEV */
1360                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1361                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1362 #endif /* HAVE_WRITEV */
1363                 if(r == -1) {
1364 #ifndef USE_WINSOCK
1365 #  ifdef EPIPE
1366                         if(errno == EPIPE && verbosity < 2)
1367                                 return 0; /* silence 'broken pipe' */
1368   #endif
1369                         if(errno == EINTR || errno == EAGAIN)
1370                                 return 1;
1371 #  ifdef HAVE_WRITEV
1372                         log_err_addr("tcp writev", strerror(errno),
1373                                 &c->repinfo.addr, c->repinfo.addrlen);
1374 #  else /* HAVE_WRITEV */
1375                         log_err_addr("tcp send s", strerror(errno),
1376                                 &c->repinfo.addr, c->repinfo.addrlen);
1377 #  endif /* HAVE_WRITEV */
1378 #else
1379                         if(WSAGetLastError() == WSAENOTCONN)
1380                                 return 1;
1381                         if(WSAGetLastError() == WSAEINPROGRESS)
1382                                 return 1;
1383                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1384                                 winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1385                                 return 1; 
1386                         }
1387                         log_err_addr("tcp send s",
1388                                 wsa_strerror(WSAGetLastError()),
1389                                 &c->repinfo.addr, c->repinfo.addrlen);
1390 #endif
1391                         return 0;
1392                 }
1393                 c->tcp_byte_count += r;
1394                 if(c->tcp_byte_count < sizeof(uint16_t))
1395                         return 1;
1396                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 
1397                         sizeof(uint16_t));
1398                 if(sldns_buffer_remaining(c->buffer) == 0) {
1399                         tcp_callback_writer(c);
1400                         return 1;
1401                 }
1402         }
1403         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1404         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
1405                 sldns_buffer_remaining(c->buffer), 0);
1406         if(r == -1) {
1407 #ifndef USE_WINSOCK
1408                 if(errno == EINTR || errno == EAGAIN)
1409                         return 1;
1410                 log_err_addr("tcp send r", strerror(errno),
1411                         &c->repinfo.addr, c->repinfo.addrlen);
1412 #else
1413                 if(WSAGetLastError() == WSAEINPROGRESS)
1414                         return 1;
1415                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1416                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1417                         return 1; 
1418                 }
1419                 log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1420                         &c->repinfo.addr, c->repinfo.addrlen);
1421 #endif
1422                 return 0;
1423         }
1424         sldns_buffer_skip(c->buffer, r);
1425
1426         if(sldns_buffer_remaining(c->buffer) == 0) {
1427                 tcp_callback_writer(c);
1428         }
1429         
1430         return 1;
1431 }
1432
1433 void 
1434 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1435 {
1436         struct comm_point* c = (struct comm_point*)arg;
1437         log_assert(c->type == comm_tcp);
1438         comm_base_now(c->ev->base);
1439
1440         if(event&EV_READ) {
1441                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1442                         reclaim_tcp_handler(c);
1443                         if(!c->tcp_do_close) {
1444                                 fptr_ok(fptr_whitelist_comm_point(
1445                                         c->callback));
1446                                 (void)(*c->callback)(c, c->cb_arg, 
1447                                         NETEVENT_CLOSED, NULL);
1448                         }
1449                 }
1450                 return;
1451         }
1452         if(event&EV_WRITE) {
1453                 if(!comm_point_tcp_handle_write(fd, c)) {
1454                         reclaim_tcp_handler(c);
1455                         if(!c->tcp_do_close) {
1456                                 fptr_ok(fptr_whitelist_comm_point(
1457                                         c->callback));
1458                                 (void)(*c->callback)(c, c->cb_arg, 
1459                                         NETEVENT_CLOSED, NULL);
1460                         }
1461                 }
1462                 return;
1463         }
1464         if(event&EV_TIMEOUT) {
1465                 verbose(VERB_QUERY, "tcp took too long, dropped");
1466                 reclaim_tcp_handler(c);
1467                 if(!c->tcp_do_close) {
1468                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1469                         (void)(*c->callback)(c, c->cb_arg,
1470                                 NETEVENT_TIMEOUT, NULL);
1471                 }
1472                 return;
1473         }
1474         log_err("Ignored event %d for tcphdl.", event);
1475 }
1476
1477 void comm_point_local_handle_callback(int fd, short event, void* arg)
1478 {
1479         struct comm_point* c = (struct comm_point*)arg;
1480         log_assert(c->type == comm_local);
1481         comm_base_now(c->ev->base);
1482
1483         if(event&EV_READ) {
1484                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
1485                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1486                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
1487                                 NULL);
1488                 }
1489                 return;
1490         }
1491         log_err("Ignored event %d for localhdl.", event);
1492 }
1493
1494 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
1495         short event, void* arg)
1496 {
1497         struct comm_point* c = (struct comm_point*)arg;
1498         int err = NETEVENT_NOERROR;
1499         log_assert(c->type == comm_raw);
1500         comm_base_now(c->ev->base);
1501         
1502         if(event&EV_TIMEOUT)
1503                 err = NETEVENT_TIMEOUT;
1504         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1505         (void)(*c->callback)(c, c->cb_arg, err, NULL);
1506 }
1507
1508 struct comm_point* 
1509 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
1510         comm_point_callback_t* callback, void* callback_arg)
1511 {
1512         struct comm_point* c = (struct comm_point*)calloc(1,
1513                 sizeof(struct comm_point));
1514         short evbits;
1515         if(!c)
1516                 return NULL;
1517         c->ev = (struct internal_event*)calloc(1,
1518                 sizeof(struct internal_event));
1519         if(!c->ev) {
1520                 free(c);
1521                 return NULL;
1522         }
1523         c->ev->base = base;
1524         c->fd = fd;
1525         c->buffer = buffer;
1526         c->timeout = NULL;
1527         c->tcp_is_reading = 0;
1528         c->tcp_byte_count = 0;
1529         c->tcp_parent = NULL;
1530         c->max_tcp_count = 0;
1531         c->tcp_handlers = NULL;
1532         c->tcp_free = NULL;
1533         c->type = comm_udp;
1534         c->tcp_do_close = 0;
1535         c->do_not_close = 0;
1536         c->tcp_do_toggle_rw = 0;
1537         c->tcp_check_nb_connect = 0;
1538         c->inuse = 0;
1539         c->callback = callback;
1540         c->cb_arg = callback_arg;
1541         evbits = EV_READ | EV_PERSIST;
1542         /* libevent stuff */
1543         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1544         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1545                 log_err("could not baseset udp event");
1546                 comm_point_delete(c);
1547                 return NULL;
1548         }
1549         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1550                 log_err("could not add udp event");
1551                 comm_point_delete(c);
1552                 return NULL;
1553         }
1554         return c;
1555 }
1556
1557 struct comm_point* 
1558 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
1559         sldns_buffer* buffer, 
1560         comm_point_callback_t* callback, void* callback_arg)
1561 {
1562         struct comm_point* c = (struct comm_point*)calloc(1,
1563                 sizeof(struct comm_point));
1564         short evbits;
1565         if(!c)
1566                 return NULL;
1567         c->ev = (struct internal_event*)calloc(1,
1568                 sizeof(struct internal_event));
1569         if(!c->ev) {
1570                 free(c);
1571                 return NULL;
1572         }
1573         c->ev->base = base;
1574         c->fd = fd;
1575         c->buffer = buffer;
1576         c->timeout = NULL;
1577         c->tcp_is_reading = 0;
1578         c->tcp_byte_count = 0;
1579         c->tcp_parent = NULL;
1580         c->max_tcp_count = 0;
1581         c->tcp_handlers = NULL;
1582         c->tcp_free = NULL;
1583         c->type = comm_udp;
1584         c->tcp_do_close = 0;
1585         c->do_not_close = 0;
1586         c->inuse = 0;
1587         c->tcp_do_toggle_rw = 0;
1588         c->tcp_check_nb_connect = 0;
1589         c->callback = callback;
1590         c->cb_arg = callback_arg;
1591         evbits = EV_READ | EV_PERSIST;
1592         /* libevent stuff */
1593         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1594         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1595                 log_err("could not baseset udp event");
1596                 comm_point_delete(c);
1597                 return NULL;
1598         }
1599         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1600                 log_err("could not add udp event");
1601                 comm_point_delete(c);
1602                 return NULL;
1603         }
1604         return c;
1605 }
1606
1607 static struct comm_point* 
1608 comm_point_create_tcp_handler(struct comm_base *base, 
1609         struct comm_point* parent, size_t bufsize,
1610         comm_point_callback_t* callback, void* callback_arg)
1611 {
1612         struct comm_point* c = (struct comm_point*)calloc(1,
1613                 sizeof(struct comm_point));
1614         short evbits;
1615         if(!c)
1616                 return NULL;
1617         c->ev = (struct internal_event*)calloc(1,
1618                 sizeof(struct internal_event));
1619         if(!c->ev) {
1620                 free(c);
1621                 return NULL;
1622         }
1623         c->ev->base = base;
1624         c->fd = -1;
1625         c->buffer = sldns_buffer_new(bufsize);
1626         if(!c->buffer) {
1627                 free(c->ev);
1628                 free(c);
1629                 return NULL;
1630         }
1631         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1632         if(!c->timeout) {
1633                 sldns_buffer_free(c->buffer);
1634                 free(c->ev);
1635                 free(c);
1636                 return NULL;
1637         }
1638         c->tcp_is_reading = 0;
1639         c->tcp_byte_count = 0;
1640         c->tcp_parent = parent;
1641         c->max_tcp_count = 0;
1642         c->tcp_handlers = NULL;
1643         c->tcp_free = NULL;
1644         c->type = comm_tcp;
1645         c->tcp_do_close = 0;
1646         c->do_not_close = 0;
1647         c->tcp_do_toggle_rw = 1;
1648         c->tcp_check_nb_connect = 0;
1649         c->repinfo.c = c;
1650         c->callback = callback;
1651         c->cb_arg = callback_arg;
1652         /* add to parent free list */
1653         c->tcp_free = parent->tcp_free;
1654         parent->tcp_free = c;
1655         /* libevent stuff */
1656         evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1657         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1658         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1659         {
1660                 log_err("could not basetset tcphdl event");
1661                 parent->tcp_free = c->tcp_free;
1662                 free(c->ev);
1663                 free(c);
1664                 return NULL;
1665         }
1666         return c;
1667 }
1668
1669 struct comm_point* 
1670 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1671         comm_point_callback_t* callback, void* callback_arg)
1672 {
1673         struct comm_point* c = (struct comm_point*)calloc(1,
1674                 sizeof(struct comm_point));
1675         short evbits;
1676         int i;
1677         /* first allocate the TCP accept listener */
1678         if(!c)
1679                 return NULL;
1680         c->ev = (struct internal_event*)calloc(1,
1681                 sizeof(struct internal_event));
1682         if(!c->ev) {
1683                 free(c);
1684                 return NULL;
1685         }
1686         c->ev->base = base;
1687         c->fd = fd;
1688         c->buffer = NULL;
1689         c->timeout = NULL;
1690         c->tcp_is_reading = 0;
1691         c->tcp_byte_count = 0;
1692         c->tcp_parent = NULL;
1693         c->max_tcp_count = num;
1694         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1695                 sizeof(struct comm_point*));
1696         if(!c->tcp_handlers) {
1697                 free(c->ev);
1698                 free(c);
1699                 return NULL;
1700         }
1701         c->tcp_free = NULL;
1702         c->type = comm_tcp_accept;
1703         c->tcp_do_close = 0;
1704         c->do_not_close = 0;
1705         c->tcp_do_toggle_rw = 0;
1706         c->tcp_check_nb_connect = 0;
1707         c->callback = NULL;
1708         c->cb_arg = NULL;
1709         evbits = EV_READ | EV_PERSIST;
1710         /* libevent stuff */
1711         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1712         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1713                 event_add(&c->ev->ev, c->timeout) != 0 )
1714         {
1715                 log_err("could not add tcpacc event");
1716                 comm_point_delete(c);
1717                 return NULL;
1718         }
1719
1720         /* now prealloc the tcp handlers */
1721         for(i=0; i<num; i++) {
1722                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1723                         c, bufsize, callback, callback_arg);
1724                 if(!c->tcp_handlers[i]) {
1725                         comm_point_delete(c);
1726                         return NULL;
1727                 }
1728         }
1729         
1730         return c;
1731 }
1732
1733 struct comm_point* 
1734 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1735         comm_point_callback_t* callback, void* callback_arg)
1736 {
1737         struct comm_point* c = (struct comm_point*)calloc(1,
1738                 sizeof(struct comm_point));
1739         short evbits;
1740         if(!c)
1741                 return NULL;
1742         c->ev = (struct internal_event*)calloc(1,
1743                 sizeof(struct internal_event));
1744         if(!c->ev) {
1745                 free(c);
1746                 return NULL;
1747         }
1748         c->ev->base = base;
1749         c->fd = -1;
1750         c->buffer = sldns_buffer_new(bufsize);
1751         if(!c->buffer) {
1752                 free(c->ev);
1753                 free(c);
1754                 return NULL;
1755         }
1756         c->timeout = NULL;
1757         c->tcp_is_reading = 0;
1758         c->tcp_byte_count = 0;
1759         c->tcp_parent = NULL;
1760         c->max_tcp_count = 0;
1761         c->tcp_handlers = NULL;
1762         c->tcp_free = NULL;
1763         c->type = comm_tcp;
1764         c->tcp_do_close = 0;
1765         c->do_not_close = 0;
1766         c->tcp_do_toggle_rw = 1;
1767         c->tcp_check_nb_connect = 1;
1768         c->repinfo.c = c;
1769         c->callback = callback;
1770         c->cb_arg = callback_arg;
1771         evbits = EV_PERSIST | EV_WRITE;
1772         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1773         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1774         {
1775                 log_err("could not basetset tcpout event");
1776                 sldns_buffer_free(c->buffer);
1777                 free(c->ev);
1778                 free(c);
1779                 return NULL;
1780         }
1781
1782         return c;
1783 }
1784
1785 struct comm_point* 
1786 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1787         comm_point_callback_t* callback, void* callback_arg)
1788 {
1789         struct comm_point* c = (struct comm_point*)calloc(1,
1790                 sizeof(struct comm_point));
1791         short evbits;
1792         if(!c)
1793                 return NULL;
1794         c->ev = (struct internal_event*)calloc(1,
1795                 sizeof(struct internal_event));
1796         if(!c->ev) {
1797                 free(c);
1798                 return NULL;
1799         }
1800         c->ev->base = base;
1801         c->fd = fd;
1802         c->buffer = sldns_buffer_new(bufsize);
1803         if(!c->buffer) {
1804                 free(c->ev);
1805                 free(c);
1806                 return NULL;
1807         }
1808         c->timeout = NULL;
1809         c->tcp_is_reading = 1;
1810         c->tcp_byte_count = 0;
1811         c->tcp_parent = NULL;
1812         c->max_tcp_count = 0;
1813         c->tcp_handlers = NULL;
1814         c->tcp_free = NULL;
1815         c->type = comm_local;
1816         c->tcp_do_close = 0;
1817         c->do_not_close = 1;
1818         c->tcp_do_toggle_rw = 0;
1819         c->tcp_check_nb_connect = 0;
1820         c->callback = callback;
1821         c->cb_arg = callback_arg;
1822         /* libevent stuff */
1823         evbits = EV_PERSIST | EV_READ;
1824         event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback, 
1825                 c);
1826         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1827                 event_add(&c->ev->ev, c->timeout) != 0 )
1828         {
1829                 log_err("could not add localhdl event");
1830                 free(c->ev);
1831                 free(c);
1832                 return NULL;
1833         }
1834         return c;
1835 }
1836
1837 struct comm_point* 
1838 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
1839         comm_point_callback_t* callback, void* callback_arg)
1840 {
1841         struct comm_point* c = (struct comm_point*)calloc(1,
1842                 sizeof(struct comm_point));
1843         short evbits;
1844         if(!c)
1845                 return NULL;
1846         c->ev = (struct internal_event*)calloc(1,
1847                 sizeof(struct internal_event));
1848         if(!c->ev) {
1849                 free(c);
1850                 return NULL;
1851         }
1852         c->ev->base = base;
1853         c->fd = fd;
1854         c->buffer = NULL;
1855         c->timeout = NULL;
1856         c->tcp_is_reading = 0;
1857         c->tcp_byte_count = 0;
1858         c->tcp_parent = NULL;
1859         c->max_tcp_count = 0;
1860         c->tcp_handlers = NULL;
1861         c->tcp_free = NULL;
1862         c->type = comm_raw;
1863         c->tcp_do_close = 0;
1864         c->do_not_close = 1;
1865         c->tcp_do_toggle_rw = 0;
1866         c->tcp_check_nb_connect = 0;
1867         c->callback = callback;
1868         c->cb_arg = callback_arg;
1869         /* libevent stuff */
1870         if(writing)
1871                 evbits = EV_PERSIST | EV_WRITE;
1872         else    evbits = EV_PERSIST | EV_READ;
1873         event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback, 
1874                 c);
1875         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1876                 event_add(&c->ev->ev, c->timeout) != 0 )
1877         {
1878                 log_err("could not add rawhdl event");
1879                 free(c->ev);
1880                 free(c);
1881                 return NULL;
1882         }
1883         return c;
1884 }
1885
1886 void 
1887 comm_point_close(struct comm_point* c)
1888 {
1889         if(!c)
1890                 return;
1891         if(c->fd != -1)
1892                 if(event_del(&c->ev->ev) != 0) {
1893                         log_err("could not event_del on close");
1894                 }
1895         /* close fd after removing from event lists, or epoll.. is messed up */
1896         if(c->fd != -1 && !c->do_not_close) {
1897                 verbose(VERB_ALGO, "close fd %d", c->fd);
1898 #ifndef USE_WINSOCK
1899                 close(c->fd);
1900 #else
1901                 closesocket(c->fd);
1902 #endif
1903         }
1904         c->fd = -1;
1905 }
1906
1907 void 
1908 comm_point_delete(struct comm_point* c)
1909 {
1910         if(!c) 
1911                 return;
1912         if(c->type == comm_tcp && c->ssl) {
1913 #ifdef HAVE_SSL
1914                 SSL_shutdown(c->ssl);
1915                 SSL_free(c->ssl);
1916 #endif
1917         }
1918         comm_point_close(c);
1919         if(c->tcp_handlers) {
1920                 int i;
1921                 for(i=0; i<c->max_tcp_count; i++)
1922                         comm_point_delete(c->tcp_handlers[i]);
1923                 free(c->tcp_handlers);
1924         }
1925         free(c->timeout);
1926         if(c->type == comm_tcp || c->type == comm_local)
1927                 sldns_buffer_free(c->buffer);
1928         free(c->ev);
1929         free(c);
1930 }
1931
1932 void 
1933 comm_point_send_reply(struct comm_reply *repinfo)
1934 {
1935         log_assert(repinfo && repinfo->c);
1936         if(repinfo->c->type == comm_udp) {
1937                 if(repinfo->srctype)
1938                         comm_point_send_udp_msg_if(repinfo->c, 
1939                         repinfo->c->buffer, (struct sockaddr*)&repinfo->addr, 
1940                         repinfo->addrlen, repinfo);
1941                 else
1942                         comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
1943                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
1944 #ifdef USE_DNSTAP
1945                 if(repinfo->c->dtenv != NULL &&
1946                    repinfo->c->dtenv->log_client_response_messages)
1947                         dt_msg_send_client_response(repinfo->c->dtenv,
1948                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
1949 #endif
1950         } else {
1951 #ifdef USE_DNSTAP
1952                 if(repinfo->c->tcp_parent->dtenv != NULL &&
1953                    repinfo->c->tcp_parent->dtenv->log_client_response_messages)
1954                         dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
1955                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
1956 #endif
1957                 comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
1958         }
1959 }
1960
1961 void 
1962 comm_point_drop_reply(struct comm_reply* repinfo)
1963 {
1964         if(!repinfo)
1965                 return;
1966         log_assert(repinfo && repinfo->c);
1967         log_assert(repinfo->c->type != comm_tcp_accept);
1968         if(repinfo->c->type == comm_udp)
1969                 return;
1970         reclaim_tcp_handler(repinfo->c);
1971 }
1972
1973 void 
1974 comm_point_stop_listening(struct comm_point* c)
1975 {
1976         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
1977         if(event_del(&c->ev->ev) != 0) {
1978                 log_err("event_del error to stoplisten");
1979         }
1980 }
1981
1982 void 
1983 comm_point_start_listening(struct comm_point* c, int newfd, int sec)
1984 {
1985         verbose(VERB_ALGO, "comm point start listening %d", 
1986                 c->fd==-1?newfd:c->fd);
1987         if(c->type == comm_tcp_accept && !c->tcp_free) {
1988                 /* no use to start listening no free slots. */
1989                 return;
1990         }
1991         if(sec != -1 && sec != 0) {
1992                 if(!c->timeout) {
1993                         c->timeout = (struct timeval*)malloc(sizeof(
1994                                 struct timeval));
1995                         if(!c->timeout) {
1996                                 log_err("cpsl: malloc failed. No net read.");
1997                                 return;
1998                         }
1999                 }
2000                 c->ev->ev.ev_events |= EV_TIMEOUT;
2001 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
2002                 c->timeout->tv_sec = sec;
2003                 c->timeout->tv_usec = 0;
2004 #endif /* S_SPLINT_S */
2005         }
2006         if(c->type == comm_tcp) {
2007                 c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2008                 if(c->tcp_is_reading)
2009                         c->ev->ev.ev_events |= EV_READ;
2010                 else    c->ev->ev.ev_events |= EV_WRITE;
2011         }
2012         if(newfd != -1) {
2013                 if(c->fd != -1) {
2014 #ifndef USE_WINSOCK
2015                         close(c->fd);
2016 #else
2017                         closesocket(c->fd);
2018 #endif
2019                 }
2020                 c->fd = newfd;
2021                 c->ev->ev.ev_fd = c->fd;
2022         }
2023         if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
2024                 log_err("event_add failed. in cpsl.");
2025         }
2026 }
2027
2028 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
2029 {
2030         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
2031         if(event_del(&c->ev->ev) != 0) {
2032                 log_err("event_del error to cplf");
2033         }
2034         c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2035         if(rd) c->ev->ev.ev_events |= EV_READ;
2036         if(wr) c->ev->ev.ev_events |= EV_WRITE;
2037         if(event_add(&c->ev->ev, c->timeout) != 0) {
2038                 log_err("event_add failed. in cplf.");
2039         }
2040 }
2041
2042 size_t comm_point_get_mem(struct comm_point* c)
2043 {
2044         size_t s;
2045         if(!c) 
2046                 return 0;
2047         s = sizeof(*c) + sizeof(*c->ev);
2048         if(c->timeout) 
2049                 s += sizeof(*c->timeout);
2050         if(c->type == comm_tcp || c->type == comm_local)
2051                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
2052         if(c->type == comm_tcp_accept) {
2053                 int i;
2054                 for(i=0; i<c->max_tcp_count; i++)
2055                         s += comm_point_get_mem(c->tcp_handlers[i]);
2056         }
2057         return s;
2058 }
2059
2060 struct comm_timer* 
2061 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2062 {
2063         struct comm_timer *tm = (struct comm_timer*)calloc(1,
2064                 sizeof(struct comm_timer));
2065         if(!tm)
2066                 return NULL;
2067         tm->ev_timer = (struct internal_timer*)calloc(1,
2068                 sizeof(struct internal_timer));
2069         if(!tm->ev_timer) {
2070                 log_err("malloc failed");
2071                 free(tm);
2072                 return NULL;
2073         }
2074         tm->ev_timer->base = base;
2075         tm->callback = cb;
2076         tm->cb_arg = cb_arg;
2077         event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT, 
2078                 comm_timer_callback, tm);
2079         if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2080                 log_err("timer_create: event_base_set failed.");
2081                 free(tm->ev_timer);
2082                 free(tm);
2083                 return NULL;
2084         }
2085         return tm;
2086 }
2087
2088 void 
2089 comm_timer_disable(struct comm_timer* timer)
2090 {
2091         if(!timer)
2092                 return;
2093         evtimer_del(&timer->ev_timer->ev);
2094         timer->ev_timer->enabled = 0;
2095 }
2096
2097 void 
2098 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2099 {
2100         log_assert(tv);
2101         if(timer->ev_timer->enabled)
2102                 comm_timer_disable(timer);
2103         event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2104                 comm_timer_callback, timer);
2105         if(event_base_set(timer->ev_timer->base->eb->base, 
2106                 &timer->ev_timer->ev) != 0)
2107                 log_err("comm_timer_set: set_base failed.");
2108         if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2109                 log_err("comm_timer_set: evtimer_add failed.");
2110         timer->ev_timer->enabled = 1;
2111 }
2112
2113 void 
2114 comm_timer_delete(struct comm_timer* timer)
2115 {
2116         if(!timer)
2117                 return;
2118         comm_timer_disable(timer);
2119         free(timer->ev_timer);
2120         free(timer);
2121 }
2122
2123 void 
2124 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2125 {
2126         struct comm_timer* tm = (struct comm_timer*)arg;
2127         if(!(event&EV_TIMEOUT))
2128                 return;
2129         comm_base_now(tm->ev_timer->base);
2130         tm->ev_timer->enabled = 0;
2131         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2132         (*tm->callback)(tm->cb_arg);
2133 }
2134
2135 int 
2136 comm_timer_is_set(struct comm_timer* timer)
2137 {
2138         return (int)timer->ev_timer->enabled;
2139 }
2140
2141 size_t 
2142 comm_timer_get_mem(struct comm_timer* timer)
2143 {
2144         return sizeof(*timer) + sizeof(struct internal_timer);
2145 }
2146
2147 struct comm_signal* 
2148 comm_signal_create(struct comm_base* base,
2149         void (*callback)(int, void*), void* cb_arg)
2150 {
2151         struct comm_signal* com = (struct comm_signal*)malloc(
2152                 sizeof(struct comm_signal));
2153         if(!com) {
2154                 log_err("malloc failed");
2155                 return NULL;
2156         }
2157         com->base = base;
2158         com->callback = callback;
2159         com->cb_arg = cb_arg;
2160         com->ev_signal = NULL;
2161         return com;
2162 }
2163
2164 void 
2165 comm_signal_callback(int sig, short event, void* arg)
2166 {
2167         struct comm_signal* comsig = (struct comm_signal*)arg;
2168         if(!(event & EV_SIGNAL))
2169                 return;
2170         comm_base_now(comsig->base);
2171         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2172         (*comsig->callback)(sig, comsig->cb_arg);
2173 }
2174
2175 int 
2176 comm_signal_bind(struct comm_signal* comsig, int sig)
2177 {
2178         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
2179                 sizeof(struct internal_signal));
2180         if(!entry) {
2181                 log_err("malloc failed");
2182                 return 0;
2183         }
2184         log_assert(comsig);
2185         /* add signal event */
2186         signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2187         if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2188                 log_err("Could not set signal base");
2189                 free(entry);
2190                 return 0;
2191         }
2192         if(signal_add(&entry->ev, NULL) != 0) {
2193                 log_err("Could not add signal handler");
2194                 free(entry);
2195                 return 0;
2196         }
2197         /* link into list */
2198         entry->next = comsig->ev_signal;
2199         comsig->ev_signal = entry;
2200         return 1;
2201 }
2202
2203 void 
2204 comm_signal_delete(struct comm_signal* comsig)
2205 {
2206         struct internal_signal* p, *np;
2207         if(!comsig)
2208                 return;
2209         p=comsig->ev_signal;
2210         while(p) {
2211                 np = p->next;
2212                 signal_del(&p->ev);
2213                 free(p);
2214                 p = np;
2215         }
2216         free(comsig);
2217 }