]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/util/netevent.c
Bring down 0.4.5 vendor files and other catchups with the distribution tarball.
[FreeBSD/FreeBSD.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/log.h"
44 #include "util/net_help.h"
45 #include "util/fptr_wlist.h"
46 #include "sldns/pkthdr.h"
47 #include "sldns/sbuffer.h"
48 #include "dnstap/dnstap.h"
49 #ifdef HAVE_OPENSSL_SSL_H
50 #include <openssl/ssl.h>
51 #endif
52 #ifdef HAVE_OPENSSL_ERR_H
53 #include <openssl/err.h>
54 #endif
55
56 /* -------- Start of local definitions -------- */
57 /** if CMSG_ALIGN is not defined on this platform, a workaround */
58 #ifndef CMSG_ALIGN
59 #  ifdef __CMSG_ALIGN
60 #    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
61 #  elif defined(CMSG_DATA_ALIGN)
62 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
63 #  else
64 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
65 #  endif
66 #endif
67
68 /** if CMSG_LEN is not defined on this platform, a workaround */
69 #ifndef CMSG_LEN
70 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
71 #endif
72
73 /** if CMSG_SPACE is not defined on this platform, a workaround */
74 #ifndef CMSG_SPACE
75 #  ifdef _CMSG_HDR_ALIGN
76 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
77 #  else
78 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
79 #  endif
80 #endif
81
82 /** The TCP reading or writing query timeout in seconds */
83 #define TCP_QUERY_TIMEOUT 120 
84
85 #ifndef NONBLOCKING_IS_BROKEN
86 /** number of UDP reads to perform per read indication from select */
87 #define NUM_UDP_PER_SELECT 100
88 #else
89 #define NUM_UDP_PER_SELECT 1
90 #endif
91
92 /* We define libevent structures here to hide the libevent stuff. */
93
94 #ifdef USE_MINI_EVENT
95 #  ifdef USE_WINSOCK
96 #    include "util/winsock_event.h"
97 #  else
98 #    include "util/mini_event.h"
99 #  endif /* USE_WINSOCK */
100 #else /* USE_MINI_EVENT */
101    /* we use libevent */
102 #  ifdef HAVE_EVENT_H
103 #    include <event.h>
104 #  else
105 #    include "event2/event.h"
106 #    include "event2/event_struct.h"
107 #    include "event2/event_compat.h"
108 #  endif
109 #endif /* USE_MINI_EVENT */
110
111 /**
112  * The internal event structure for keeping libevent info for the event.
113  * Possibly other structures (list, tree) this is part of.
114  */
115 struct internal_event {
116         /** the comm base */
117         struct comm_base* base;
118         /** libevent event type, alloced here */
119         struct event ev;
120 };
121
122 /**
123  * Internal base structure, so that every thread has its own events.
124  */
125 struct internal_base {
126         /** libevent event_base type. */
127         struct event_base* base;
128         /** seconds time pointer points here */
129         time_t secs;
130         /** timeval with current time */
131         struct timeval now;
132         /** the event used for slow_accept timeouts */
133         struct event slow_accept;
134         /** true if slow_accept is enabled */
135         int slow_accept_enabled;
136 };
137
138 /**
139  * Internal timer structure, to store timer event in.
140  */
141 struct internal_timer {
142         /** the comm base */
143         struct comm_base* base;
144         /** libevent event type, alloced here */
145         struct event ev;
146         /** is timer enabled */
147         uint8_t enabled;
148 };
149
150 /**
151  * Internal signal structure, to store signal event in.
152  */
153 struct internal_signal {
154         /** libevent event type, alloced here */
155         struct event ev;
156         /** next in signal list */
157         struct internal_signal* next;
158 };
159
160 /** create a tcp handler with a parent */
161 static struct comm_point* comm_point_create_tcp_handler(
162         struct comm_base *base, struct comm_point* parent, size_t bufsize,
163         comm_point_callback_t* callback, void* callback_arg);
164
165 /* -------- End of local definitions -------- */
166
167 #ifdef USE_MINI_EVENT
168 /** minievent updates the time when it blocks. */
169 #define comm_base_now(x) /* nothing to do */
170 #else /* !USE_MINI_EVENT */
171 /** fillup the time values in the event base */
172 static void
173 comm_base_now(struct comm_base* b)
174 {
175         if(gettimeofday(&b->eb->now, NULL) < 0) {
176                 log_err("gettimeofday: %s", strerror(errno));
177         }
178         b->eb->secs = (time_t)b->eb->now.tv_sec;
179 }
180 #endif /* USE_MINI_EVENT */
181
182 struct comm_base* 
183 comm_base_create(int sigs)
184 {
185         struct comm_base* b = (struct comm_base*)calloc(1,
186                 sizeof(struct comm_base));
187         if(!b)
188                 return NULL;
189         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
190         if(!b->eb) {
191                 free(b);
192                 return NULL;
193         }
194 #ifdef USE_MINI_EVENT
195         (void)sigs;
196         /* use mini event time-sharing feature */
197         b->eb->base = event_init(&b->eb->secs, &b->eb->now);
198 #else
199 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
200         /* libev */
201         if(sigs)
202                 b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
203         else
204                 b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
205 #  else
206         (void)sigs;
207 #    ifdef HAVE_EVENT_BASE_NEW
208         b->eb->base = event_base_new();
209 #    else
210         b->eb->base = event_init();
211 #    endif
212 #  endif
213 #endif
214         if(!b->eb->base) {
215                 free(b->eb);
216                 free(b);
217                 return NULL;
218         }
219         comm_base_now(b);
220         /* avoid event_get_method call which causes crashes even when
221          * not printing, because its result is passed */
222         verbose(VERB_ALGO, 
223 #if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
224                 "libev"
225 #elif defined(USE_MINI_EVENT)
226                 "event "
227 #else
228                 "libevent "
229 #endif
230                 "%s uses %s method.", 
231                 event_get_version(), 
232 #ifdef HAVE_EVENT_BASE_GET_METHOD
233                 event_base_get_method(b->eb->base)
234 #else
235                 "not_obtainable"
236 #endif
237         );
238         return b;
239 }
240
241 struct comm_base*
242 comm_base_create_event(struct event_base* base)
243 {
244         struct comm_base* b = (struct comm_base*)calloc(1,
245                 sizeof(struct comm_base));
246         if(!b)
247                 return NULL;
248         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
249         if(!b->eb) {
250                 free(b);
251                 return NULL;
252         }
253         b->eb->base = base;
254         comm_base_now(b);
255         return b;
256 }
257
258 void 
259 comm_base_delete(struct comm_base* b)
260 {
261         if(!b)
262                 return;
263         if(b->eb->slow_accept_enabled) {
264                 if(event_del(&b->eb->slow_accept) != 0) {
265                         log_err("could not event_del slow_accept");
266                 }
267         }
268 #ifdef USE_MINI_EVENT
269         event_base_free(b->eb->base);
270 #elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
271         /* only libevent 1.2+ has it, but in 1.2 it is broken - 
272            assertion fails on signal handling ev that is not deleted
273            in libevent 1.3c (event_base_once appears) this is fixed. */
274         event_base_free(b->eb->base);
275 #endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
276         b->eb->base = NULL;
277         free(b->eb);
278         free(b);
279 }
280
281 void 
282 comm_base_delete_no_base(struct comm_base* b)
283 {
284         if(!b)
285                 return;
286         if(b->eb->slow_accept_enabled) {
287                 if(event_del(&b->eb->slow_accept) != 0) {
288                         log_err("could not event_del slow_accept");
289                 }
290         }
291         b->eb->base = NULL;
292         free(b->eb);
293         free(b);
294 }
295
296 void 
297 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
298 {
299         *tt = &b->eb->secs;
300         *tv = &b->eb->now;
301 }
302
303 void 
304 comm_base_dispatch(struct comm_base* b)
305 {
306         int retval;
307         retval = event_base_dispatch(b->eb->base);
308         if(retval != 0) {
309                 fatal_exit("event_dispatch returned error %d, "
310                         "errno is %s", retval, strerror(errno));
311         }
312 }
313
314 void comm_base_exit(struct comm_base* b)
315 {
316         if(event_base_loopexit(b->eb->base, NULL) != 0) {
317                 log_err("Could not loopexit");
318         }
319 }
320
321 void comm_base_set_slow_accept_handlers(struct comm_base* b,
322         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
323 {
324         b->stop_accept = stop_acc;
325         b->start_accept = start_acc;
326         b->cb_arg = arg;
327 }
328
329 struct event_base* comm_base_internal(struct comm_base* b)
330 {
331         return b->eb->base;
332 }
333
334 /** see if errno for udp has to be logged or not uses globals */
335 static int
336 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
337 {
338         /* do not log transient errors (unless high verbosity) */
339 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
340         switch(errno) {
341 #  ifdef ENETUNREACH
342                 case ENETUNREACH:
343 #  endif
344 #  ifdef EHOSTDOWN
345                 case EHOSTDOWN:
346 #  endif
347 #  ifdef EHOSTUNREACH
348                 case EHOSTUNREACH:
349 #  endif
350 #  ifdef ENETDOWN
351                 case ENETDOWN:
352 #  endif
353                         if(verbosity < VERB_ALGO)
354                                 return 0;
355                 default:
356                         break;
357         }
358 #endif
359         /* permission denied is gotten for every send if the
360          * network is disconnected (on some OS), squelch it */
361         if( ((errno == EPERM)
362 #  ifdef EADDRNOTAVAIL
363                 /* 'Cannot assign requested address' also when disconnected */
364                 || (errno == EADDRNOTAVAIL)
365 #  endif
366                 ) && verbosity < VERB_DETAIL)
367                 return 0;
368         /* squelch errors where people deploy AAAA ::ffff:bla for
369          * authority servers, which we try for intranets. */
370         if(errno == EINVAL && addr_is_ip4mapped(
371                 (struct sockaddr_storage*)addr, addrlen) &&
372                 verbosity < VERB_DETAIL)
373                 return 0;
374         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
375          * but a dns cache does not need it. */
376         if(errno == EACCES && addr_is_broadcast(
377                 (struct sockaddr_storage*)addr, addrlen) &&
378                 verbosity < VERB_DETAIL)
379                 return 0;
380         return 1;
381 }
382
383 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
384 {
385         return udp_send_errno_needs_log(addr, addrlen);
386 }
387
388 /* send a UDP reply */
389 int
390 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
391         struct sockaddr* addr, socklen_t addrlen) 
392 {
393         ssize_t sent;
394         log_assert(c->fd != -1);
395 #ifdef UNBOUND_DEBUG
396         if(sldns_buffer_remaining(packet) == 0)
397                 log_err("error: send empty UDP packet");
398 #endif
399         log_assert(addr && addrlen > 0);
400         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
401                 sldns_buffer_remaining(packet), 0,
402                 addr, addrlen);
403         if(sent == -1) {
404                 /* try again and block, waiting for IO to complete,
405                  * we want to send the answer, and we will wait for
406                  * the ethernet interface buffer to have space. */
407 #ifndef USE_WINSOCK
408                 if(errno == EAGAIN || 
409 #  ifdef EWOULDBLOCK
410                         errno == EWOULDBLOCK ||
411 #  endif
412                         errno == ENOBUFS) {
413 #else
414                 if(WSAGetLastError() == WSAEINPROGRESS ||
415                         WSAGetLastError() == WSAENOBUFS ||
416                         WSAGetLastError() == WSAEWOULDBLOCK) {
417 #endif
418                         int e;
419                         fd_set_block(c->fd);
420                         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
421                                 sldns_buffer_remaining(packet), 0,
422                                 addr, addrlen);
423                         e = errno;
424                         fd_set_nonblock(c->fd);
425                         errno = e;
426                 }
427         }
428         if(sent == -1) {
429                 if(!udp_send_errno_needs_log(addr, addrlen))
430                         return 0;
431 #ifndef USE_WINSOCK
432                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
433 #else
434                 verbose(VERB_OPS, "sendto failed: %s", 
435                         wsa_strerror(WSAGetLastError()));
436 #endif
437                 log_addr(VERB_OPS, "remote address is", 
438                         (struct sockaddr_storage*)addr, addrlen);
439                 return 0;
440         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
441                 log_err("sent %d in place of %d bytes", 
442                         (int)sent, (int)sldns_buffer_remaining(packet));
443                 return 0;
444         }
445         return 1;
446 }
447
448 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
449 /** print debug ancillary info */
450 static void p_ancil(const char* str, struct comm_reply* r)
451 {
452         if(r->srctype != 4 && r->srctype != 6) {
453                 log_info("%s: unknown srctype %d", str, r->srctype);
454                 return;
455         }
456         if(r->srctype == 6) {
457                 char buf[1024];
458                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
459                         buf, (socklen_t)sizeof(buf)) == 0) {
460                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
461                 }
462                 buf[sizeof(buf)-1]=0;
463                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
464         } else if(r->srctype == 4) {
465 #ifdef IP_PKTINFO
466                 char buf1[1024], buf2[1024];
467                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
468                         buf1, (socklen_t)sizeof(buf1)) == 0) {
469                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
470                 }
471                 buf1[sizeof(buf1)-1]=0;
472 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
473                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
474                         buf2, (socklen_t)sizeof(buf2)) == 0) {
475                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
476                 }
477                 buf2[sizeof(buf2)-1]=0;
478 #else
479                 buf2[0]=0;
480 #endif
481                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
482                         buf1, buf2);
483 #elif defined(IP_RECVDSTADDR)
484                 char buf1[1024];
485                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
486                         buf1, (socklen_t)sizeof(buf1)) == 0) {
487                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
488                 }
489                 buf1[sizeof(buf1)-1]=0;
490                 log_info("%s: %s", str, buf1);
491 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
492         }
493 }
494 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
495
496 /** send a UDP reply over specified interface*/
497 static int
498 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
499         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
500 {
501 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
502         ssize_t sent;
503         struct msghdr msg;
504         struct iovec iov[1];
505         char control[256];
506 #ifndef S_SPLINT_S
507         struct cmsghdr *cmsg;
508 #endif /* S_SPLINT_S */
509
510         log_assert(c->fd != -1);
511 #ifdef UNBOUND_DEBUG
512         if(sldns_buffer_remaining(packet) == 0)
513                 log_err("error: send empty UDP packet");
514 #endif
515         log_assert(addr && addrlen > 0);
516
517         msg.msg_name = addr;
518         msg.msg_namelen = addrlen;
519         iov[0].iov_base = sldns_buffer_begin(packet);
520         iov[0].iov_len = sldns_buffer_remaining(packet);
521         msg.msg_iov = iov;
522         msg.msg_iovlen = 1;
523         msg.msg_control = control;
524 #ifndef S_SPLINT_S
525         msg.msg_controllen = sizeof(control);
526 #endif /* S_SPLINT_S */
527         msg.msg_flags = 0;
528
529 #ifndef S_SPLINT_S
530         cmsg = CMSG_FIRSTHDR(&msg);
531         if(r->srctype == 4) {
532 #ifdef IP_PKTINFO
533                 void* cmsg_data;
534                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
535                 log_assert(msg.msg_controllen <= sizeof(control));
536                 cmsg->cmsg_level = IPPROTO_IP;
537                 cmsg->cmsg_type = IP_PKTINFO;
538                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
539                         sizeof(struct in_pktinfo));
540                 /* unset the ifindex to not bypass the routing tables */
541                 cmsg_data = CMSG_DATA(cmsg);
542                 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
543                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
544 #elif defined(IP_SENDSRCADDR)
545                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
546                 log_assert(msg.msg_controllen <= sizeof(control));
547                 cmsg->cmsg_level = IPPROTO_IP;
548                 cmsg->cmsg_type = IP_SENDSRCADDR;
549                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
550                         sizeof(struct in_addr));
551                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
552 #else
553                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
554                 msg.msg_control = NULL;
555 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
556         } else if(r->srctype == 6) {
557                 void* cmsg_data;
558                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
559                 log_assert(msg.msg_controllen <= sizeof(control));
560                 cmsg->cmsg_level = IPPROTO_IPV6;
561                 cmsg->cmsg_type = IPV6_PKTINFO;
562                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
563                         sizeof(struct in6_pktinfo));
564                 /* unset the ifindex to not bypass the routing tables */
565                 cmsg_data = CMSG_DATA(cmsg);
566                 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
567                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
568         } else {
569                 /* try to pass all 0 to use default route */
570                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
571                 log_assert(msg.msg_controllen <= sizeof(control));
572                 cmsg->cmsg_level = IPPROTO_IPV6;
573                 cmsg->cmsg_type = IPV6_PKTINFO;
574                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
575                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
576         }
577 #endif /* S_SPLINT_S */
578         if(verbosity >= VERB_ALGO)
579                 p_ancil("send_udp over interface", r);
580         sent = sendmsg(c->fd, &msg, 0);
581         if(sent == -1) {
582                 /* try again and block, waiting for IO to complete,
583                  * we want to send the answer, and we will wait for
584                  * the ethernet interface buffer to have space. */
585 #ifndef USE_WINSOCK
586                 if(errno == EAGAIN || 
587 #  ifdef EWOULDBLOCK
588                         errno == EWOULDBLOCK ||
589 #  endif
590                         errno == ENOBUFS) {
591 #else
592                 if(WSAGetLastError() == WSAEINPROGRESS ||
593                         WSAGetLastError() == WSAENOBUFS ||
594                         WSAGetLastError() == WSAEWOULDBLOCK) {
595 #endif
596                         int e;
597                         fd_set_block(c->fd);
598                         sent = sendmsg(c->fd, &msg, 0);
599                         e = errno;
600                         fd_set_nonblock(c->fd);
601                         errno = e;
602                 }
603         }
604         if(sent == -1) {
605                 if(!udp_send_errno_needs_log(addr, addrlen))
606                         return 0;
607                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
608                 log_addr(VERB_OPS, "remote address is", 
609                         (struct sockaddr_storage*)addr, addrlen);
610 #ifdef __NetBSD__
611                 /* netbsd 7 has IP_PKTINFO for recv but not send */
612                 if(errno == EINVAL && r->srctype == 4)
613                         log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
614                                 "Please disable interface-automatic");
615 #endif
616                 return 0;
617         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
618                 log_err("sent %d in place of %d bytes", 
619                         (int)sent, (int)sldns_buffer_remaining(packet));
620                 return 0;
621         }
622         return 1;
623 #else
624         (void)c;
625         (void)packet;
626         (void)addr;
627         (void)addrlen;
628         (void)r;
629         log_err("sendmsg: IPV6_PKTINFO not supported");
630         return 0;
631 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
632 }
633
634 void 
635 comm_point_udp_ancil_callback(int fd, short event, void* arg)
636 {
637 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
638         struct comm_reply rep;
639         struct msghdr msg;
640         struct iovec iov[1];
641         ssize_t rcv;
642         char ancil[256];
643         int i;
644 #ifndef S_SPLINT_S
645         struct cmsghdr* cmsg;
646 #endif /* S_SPLINT_S */
647
648         rep.c = (struct comm_point*)arg;
649         log_assert(rep.c->type == comm_udp);
650
651         if(!(event&EV_READ))
652                 return;
653         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
654         comm_base_now(rep.c->ev->base);
655         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
656                 sldns_buffer_clear(rep.c->buffer);
657                 rep.addrlen = (socklen_t)sizeof(rep.addr);
658                 log_assert(fd != -1);
659                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
660                 msg.msg_name = &rep.addr;
661                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
662                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
663                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
664                 msg.msg_iov = iov;
665                 msg.msg_iovlen = 1;
666                 msg.msg_control = ancil;
667 #ifndef S_SPLINT_S
668                 msg.msg_controllen = sizeof(ancil);
669 #endif /* S_SPLINT_S */
670                 msg.msg_flags = 0;
671                 rcv = recvmsg(fd, &msg, 0);
672                 if(rcv == -1) {
673                         if(errno != EAGAIN && errno != EINTR) {
674                                 log_err("recvmsg failed: %s", strerror(errno));
675                         }
676                         return;
677                 }
678                 rep.addrlen = msg.msg_namelen;
679                 sldns_buffer_skip(rep.c->buffer, rcv);
680                 sldns_buffer_flip(rep.c->buffer);
681                 rep.srctype = 0;
682 #ifndef S_SPLINT_S
683                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
684                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
685                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
686                                 cmsg->cmsg_type == IPV6_PKTINFO) {
687                                 rep.srctype = 6;
688                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
689                                         sizeof(struct in6_pktinfo));
690                                 break;
691 #ifdef IP_PKTINFO
692                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
693                                 cmsg->cmsg_type == IP_PKTINFO) {
694                                 rep.srctype = 4;
695                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
696                                         sizeof(struct in_pktinfo));
697                                 break;
698 #elif defined(IP_RECVDSTADDR)
699                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
700                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
701                                 rep.srctype = 4;
702                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
703                                         sizeof(struct in_addr));
704                                 break;
705 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
706                         }
707                 }
708                 if(verbosity >= VERB_ALGO)
709                         p_ancil("receive_udp on interface", &rep);
710 #endif /* S_SPLINT_S */
711                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
712                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
713                         /* send back immediate reply */
714                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
715                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
716                 }
717                 if(rep.c->fd == -1) /* commpoint closed */
718                         break;
719         }
720 #else
721         (void)fd;
722         (void)event;
723         (void)arg;
724         fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
725                 "Please disable interface-automatic");
726 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
727 }
728
729 void 
730 comm_point_udp_callback(int fd, short event, void* arg)
731 {
732         struct comm_reply rep;
733         ssize_t rcv;
734         int i;
735
736         rep.c = (struct comm_point*)arg;
737         log_assert(rep.c->type == comm_udp);
738
739         if(!(event&EV_READ))
740                 return;
741         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
742         comm_base_now(rep.c->ev->base);
743         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
744                 sldns_buffer_clear(rep.c->buffer);
745                 rep.addrlen = (socklen_t)sizeof(rep.addr);
746                 log_assert(fd != -1);
747                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
748                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
749                         sldns_buffer_remaining(rep.c->buffer), 0, 
750                         (struct sockaddr*)&rep.addr, &rep.addrlen);
751                 if(rcv == -1) {
752 #ifndef USE_WINSOCK
753                         if(errno != EAGAIN && errno != EINTR)
754                                 log_err("recvfrom %d failed: %s", 
755                                         fd, strerror(errno));
756 #else
757                         if(WSAGetLastError() != WSAEINPROGRESS &&
758                                 WSAGetLastError() != WSAECONNRESET &&
759                                 WSAGetLastError()!= WSAEWOULDBLOCK)
760                                 log_err("recvfrom failed: %s",
761                                         wsa_strerror(WSAGetLastError()));
762 #endif
763                         return;
764                 }
765                 sldns_buffer_skip(rep.c->buffer, rcv);
766                 sldns_buffer_flip(rep.c->buffer);
767                 rep.srctype = 0;
768                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
769                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
770                         /* send back immediate reply */
771                         (void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
772                                 (struct sockaddr*)&rep.addr, rep.addrlen);
773                 }
774                 if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
775                 another UDP port. Note rep.c cannot be reused with TCP fd. */
776                         break;
777         }
778 }
779
780 /** Use a new tcp handler for new query fd, set to read query */
781 static void
782 setup_tcp_handler(struct comm_point* c, int fd) 
783 {
784         log_assert(c->type == comm_tcp);
785         log_assert(c->fd == -1);
786         sldns_buffer_clear(c->buffer);
787         c->tcp_is_reading = 1;
788         c->tcp_byte_count = 0;
789         comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
790 }
791
792 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
793         short ATTR_UNUSED(event), void* arg)
794 {
795         struct comm_base* b = (struct comm_base*)arg;
796         /* timeout for the slow accept, re-enable accepts again */
797         if(b->start_accept) {
798                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
799                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
800                 (*b->start_accept)(b->cb_arg);
801                 b->eb->slow_accept_enabled = 0;
802         }
803 }
804
805 int comm_point_perform_accept(struct comm_point* c,
806         struct sockaddr_storage* addr, socklen_t* addrlen)
807 {
808         int new_fd;
809         *addrlen = (socklen_t)sizeof(*addr);
810         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
811         if(new_fd == -1) {
812 #ifndef USE_WINSOCK
813                 /* EINTR is signal interrupt. others are closed connection. */
814                 if(     errno == EINTR || errno == EAGAIN
815 #ifdef EWOULDBLOCK
816                         || errno == EWOULDBLOCK 
817 #endif
818 #ifdef ECONNABORTED
819                         || errno == ECONNABORTED 
820 #endif
821 #ifdef EPROTO
822                         || errno == EPROTO
823 #endif /* EPROTO */
824                         )
825                         return -1;
826 #if defined(ENFILE) && defined(EMFILE)
827                 if(errno == ENFILE || errno == EMFILE) {
828                         /* out of file descriptors, likely outside of our
829                          * control. stop accept() calls for some time */
830                         if(c->ev->base->stop_accept) {
831                                 struct comm_base* b = c->ev->base;
832                                 struct timeval tv;
833                                 verbose(VERB_ALGO, "out of file descriptors: "
834                                         "slow accept");
835                                 b->eb->slow_accept_enabled = 1;
836                                 fptr_ok(fptr_whitelist_stop_accept(
837                                         b->stop_accept));
838                                 (*b->stop_accept)(b->cb_arg);
839                                 /* set timeout, no mallocs */
840                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
841                                 tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
842                                 event_set(&b->eb->slow_accept, -1, EV_TIMEOUT, 
843                                         comm_base_handle_slow_accept, b);
844                                 if(event_base_set(b->eb->base,
845                                         &b->eb->slow_accept) != 0) {
846                                         /* we do not want to log here, because
847                                          * that would spam the logfiles.
848                                          * error: "event_base_set failed." */
849                                 }
850                                 if(event_add(&b->eb->slow_accept, &tv) != 0) {
851                                         /* we do not want to log here,
852                                          * error: "event_add failed." */
853                                 }
854                         }
855                         return -1;
856                 }
857 #endif
858                 log_err_addr("accept failed", strerror(errno), addr, *addrlen);
859 #else /* USE_WINSOCK */
860                 if(WSAGetLastError() == WSAEINPROGRESS ||
861                         WSAGetLastError() == WSAECONNRESET)
862                         return -1;
863                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
864                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
865                         return -1;
866                 }
867                 log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
868                         addr, *addrlen);
869 #endif
870                 return -1;
871         }
872         fd_set_nonblock(new_fd);
873         return new_fd;
874 }
875
876 #ifdef USE_WINSOCK
877 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
878         int ATTR_UNUSED(argi), long argl, long retvalue)
879 {
880         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
881                 (oper&BIO_CB_RETURN)?"return":"before",
882                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
883                 WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
884         /* on windows, check if previous operation caused EWOULDBLOCK */
885         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
886                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
887                 if(WSAGetLastError() == WSAEWOULDBLOCK)
888                         winsock_tcp_wouldblock((struct event*)
889                                 BIO_get_callback_arg(b), EV_READ);
890         }
891         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
892                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
893                 if(WSAGetLastError() == WSAEWOULDBLOCK)
894                         winsock_tcp_wouldblock((struct event*)
895                                 BIO_get_callback_arg(b), EV_WRITE);
896         }
897         /* return original return value */
898         return retvalue;
899 }
900
901 /** set win bio callbacks for nonblocking operations */
902 void
903 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
904 {
905         SSL* ssl = (SSL*)thessl;
906         /* set them both just in case, but usually they are the same BIO */
907         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
908         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
909         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
910         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
911 }
912 #endif
913
914 void 
915 comm_point_tcp_accept_callback(int fd, short event, void* arg)
916 {
917         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
918         int new_fd;
919         log_assert(c->type == comm_tcp_accept);
920         if(!(event & EV_READ)) {
921                 log_info("ignoring tcp accept event %d", (int)event);
922                 return;
923         }
924         comm_base_now(c->ev->base);
925         /* find free tcp handler. */
926         if(!c->tcp_free) {
927                 log_warn("accepted too many tcp, connections full");
928                 return;
929         }
930         /* accept incoming connection. */
931         c_hdl = c->tcp_free;
932         log_assert(fd != -1);
933         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
934                 &c_hdl->repinfo.addrlen);
935         if(new_fd == -1)
936                 return;
937         if(c->ssl) {
938                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
939                 if(!c_hdl->ssl) {
940                         c_hdl->fd = new_fd;
941                         comm_point_close(c_hdl);
942                         return;
943                 }
944                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
945 #ifdef USE_WINSOCK
946                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
947 #endif
948         }
949
950         /* grab the tcp handler buffers */
951         c->cur_tcp_count++;
952         c->tcp_free = c_hdl->tcp_free;
953         if(!c->tcp_free) {
954                 /* stop accepting incoming queries for now. */
955                 comm_point_stop_listening(c);
956         }
957         setup_tcp_handler(c_hdl, new_fd);
958 }
959
960 /** Make tcp handler free for next assignment */
961 static void
962 reclaim_tcp_handler(struct comm_point* c)
963 {
964         log_assert(c->type == comm_tcp);
965         if(c->ssl) {
966 #ifdef HAVE_SSL
967                 SSL_shutdown(c->ssl);
968                 SSL_free(c->ssl);
969                 c->ssl = NULL;
970 #endif
971         }
972         comm_point_close(c);
973         if(c->tcp_parent) {
974                 c->tcp_parent->cur_tcp_count--;
975                 c->tcp_free = c->tcp_parent->tcp_free;
976                 c->tcp_parent->tcp_free = c;
977                 if(!c->tcp_free) {
978                         /* re-enable listening on accept socket */
979                         comm_point_start_listening(c->tcp_parent, -1, -1);
980                 }
981         }
982 }
983
984 /** do the callback when writing is done */
985 static void
986 tcp_callback_writer(struct comm_point* c)
987 {
988         log_assert(c->type == comm_tcp);
989         sldns_buffer_clear(c->buffer);
990         if(c->tcp_do_toggle_rw)
991                 c->tcp_is_reading = 1;
992         c->tcp_byte_count = 0;
993         /* switch from listening(write) to listening(read) */
994         comm_point_stop_listening(c);
995         comm_point_start_listening(c, -1, -1);
996 }
997
998 /** do the callback when reading is done */
999 static void
1000 tcp_callback_reader(struct comm_point* c)
1001 {
1002         log_assert(c->type == comm_tcp || c->type == comm_local);
1003         sldns_buffer_flip(c->buffer);
1004         if(c->tcp_do_toggle_rw)
1005                 c->tcp_is_reading = 0;
1006         c->tcp_byte_count = 0;
1007         if(c->type == comm_tcp)
1008                 comm_point_stop_listening(c);
1009         fptr_ok(fptr_whitelist_comm_point(c->callback));
1010         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
1011                 comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
1012         }
1013 }
1014
1015 /** continue ssl handshake */
1016 #ifdef HAVE_SSL
1017 static int
1018 ssl_handshake(struct comm_point* c)
1019 {
1020         int r;
1021         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
1022                 /* read condition satisfied back to writing */
1023                 comm_point_listen_for_rw(c, 1, 1);
1024                 c->ssl_shake_state = comm_ssl_shake_none;
1025                 return 1;
1026         }
1027         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
1028                 /* write condition satisfied, back to reading */
1029                 comm_point_listen_for_rw(c, 1, 0);
1030                 c->ssl_shake_state = comm_ssl_shake_none;
1031                 return 1;
1032         }
1033
1034         ERR_clear_error();
1035         r = SSL_do_handshake(c->ssl);
1036         if(r != 1) {
1037                 int want = SSL_get_error(c->ssl, r);
1038                 if(want == SSL_ERROR_WANT_READ) {
1039                         if(c->ssl_shake_state == comm_ssl_shake_read)
1040                                 return 1;
1041                         c->ssl_shake_state = comm_ssl_shake_read;
1042                         comm_point_listen_for_rw(c, 1, 0);
1043                         return 1;
1044                 } else if(want == SSL_ERROR_WANT_WRITE) {
1045                         if(c->ssl_shake_state == comm_ssl_shake_write)
1046                                 return 1;
1047                         c->ssl_shake_state = comm_ssl_shake_write;
1048                         comm_point_listen_for_rw(c, 0, 1);
1049                         return 1;
1050                 } else if(r == 0) {
1051                         return 0; /* closed */
1052                 } else if(want == SSL_ERROR_SYSCALL) {
1053                         /* SYSCALL and errno==0 means closed uncleanly */
1054                         if(errno != 0)
1055                                 log_err("SSL_handshake syscall: %s",
1056                                         strerror(errno));
1057                         return 0;
1058                 } else {
1059                         log_crypto_err("ssl handshake failed");
1060                         log_addr(1, "ssl handshake failed", &c->repinfo.addr,
1061                                 c->repinfo.addrlen);
1062                         return 0;
1063                 }
1064         }
1065         /* this is where peer verification could take place */
1066         log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
1067                 c->repinfo.addrlen);
1068
1069         /* setup listen rw correctly */
1070         if(c->tcp_is_reading) {
1071                 if(c->ssl_shake_state != comm_ssl_shake_read)
1072                         comm_point_listen_for_rw(c, 1, 0);
1073         } else {
1074                 comm_point_listen_for_rw(c, 1, 1);
1075         }
1076         c->ssl_shake_state = comm_ssl_shake_none;
1077         return 1;
1078 }
1079 #endif /* HAVE_SSL */
1080
1081 /** ssl read callback on TCP */
1082 static int
1083 ssl_handle_read(struct comm_point* c)
1084 {
1085 #ifdef HAVE_SSL
1086         int r;
1087         if(c->ssl_shake_state != comm_ssl_shake_none) {
1088                 if(!ssl_handshake(c))
1089                         return 0;
1090                 if(c->ssl_shake_state != comm_ssl_shake_none)
1091                         return 1;
1092         }
1093         if(c->tcp_byte_count < sizeof(uint16_t)) {
1094                 /* read length bytes */
1095                 ERR_clear_error();
1096                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1097                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1098                         c->tcp_byte_count))) <= 0) {
1099                         int want = SSL_get_error(c->ssl, r);
1100                         if(want == SSL_ERROR_ZERO_RETURN) {
1101                                 return 0; /* shutdown, closed */
1102                         } else if(want == SSL_ERROR_WANT_READ) {
1103                                 return 1; /* read more later */
1104                         } else if(want == SSL_ERROR_WANT_WRITE) {
1105                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1106                                 comm_point_listen_for_rw(c, 0, 1);
1107                                 return 1;
1108                         } else if(want == SSL_ERROR_SYSCALL) {
1109                                 if(errno != 0)
1110                                         log_err("SSL_read syscall: %s",
1111                                                 strerror(errno));
1112                                 return 0;
1113                         }
1114                         log_crypto_err("could not SSL_read");
1115                         return 0;
1116                 }
1117                 c->tcp_byte_count += r;
1118                 if(c->tcp_byte_count != sizeof(uint16_t))
1119                         return 1;
1120                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1121                         sldns_buffer_capacity(c->buffer)) {
1122                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1123                         return 0;
1124                 }
1125                 sldns_buffer_set_limit(c->buffer,
1126                         sldns_buffer_read_u16_at(c->buffer, 0));
1127                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1128                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1129                         return 0;
1130                 }
1131                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1132                         (int)sldns_buffer_limit(c->buffer));
1133         }
1134         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1135         ERR_clear_error();
1136         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1137                 (int)sldns_buffer_remaining(c->buffer));
1138         if(r <= 0) {
1139                 int want = SSL_get_error(c->ssl, r);
1140                 if(want == SSL_ERROR_ZERO_RETURN) {
1141                         return 0; /* shutdown, closed */
1142                 } else if(want == SSL_ERROR_WANT_READ) {
1143                         return 1; /* read more later */
1144                 } else if(want == SSL_ERROR_WANT_WRITE) {
1145                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1146                         comm_point_listen_for_rw(c, 0, 1);
1147                         return 1;
1148                 } else if(want == SSL_ERROR_SYSCALL) {
1149                         if(errno != 0)
1150                                 log_err("SSL_read syscall: %s",
1151                                         strerror(errno));
1152                         return 0;
1153                 }
1154                 log_crypto_err("could not SSL_read");
1155                 return 0;
1156         }
1157         sldns_buffer_skip(c->buffer, (ssize_t)r);
1158         if(sldns_buffer_remaining(c->buffer) <= 0) {
1159                 tcp_callback_reader(c);
1160         }
1161         return 1;
1162 #else
1163         (void)c;
1164         return 0;
1165 #endif /* HAVE_SSL */
1166 }
1167
1168 /** ssl write callback on TCP */
1169 static int
1170 ssl_handle_write(struct comm_point* c)
1171 {
1172 #ifdef HAVE_SSL
1173         int r;
1174         if(c->ssl_shake_state != comm_ssl_shake_none) {
1175                 if(!ssl_handshake(c))
1176                         return 0;
1177                 if(c->ssl_shake_state != comm_ssl_shake_none)
1178                         return 1;
1179         }
1180         /* ignore return, if fails we may simply block */
1181         (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1182         if(c->tcp_byte_count < sizeof(uint16_t)) {
1183                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1184                 ERR_clear_error();
1185                 r = SSL_write(c->ssl,
1186                         (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1187                         (int)(sizeof(uint16_t)-c->tcp_byte_count));
1188                 if(r <= 0) {
1189                         int want = SSL_get_error(c->ssl, r);
1190                         if(want == SSL_ERROR_ZERO_RETURN) {
1191                                 return 0; /* closed */
1192                         } else if(want == SSL_ERROR_WANT_READ) {
1193                                 c->ssl_shake_state = comm_ssl_shake_read;
1194                                 comm_point_listen_for_rw(c, 1, 0);
1195                                 return 1; /* wait for read condition */
1196                         } else if(want == SSL_ERROR_WANT_WRITE) {
1197                                 return 1; /* write more later */
1198                         } else if(want == SSL_ERROR_SYSCALL) {
1199                                 if(errno != 0)
1200                                         log_err("SSL_write syscall: %s",
1201                                                 strerror(errno));
1202                                 return 0;
1203                         }
1204                         log_crypto_err("could not SSL_write");
1205                         return 0;
1206                 }
1207                 c->tcp_byte_count += r;
1208                 if(c->tcp_byte_count < sizeof(uint16_t))
1209                         return 1;
1210                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1211                         sizeof(uint16_t));
1212                 if(sldns_buffer_remaining(c->buffer) == 0) {
1213                         tcp_callback_writer(c);
1214                         return 1;
1215                 }
1216         }
1217         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1218         ERR_clear_error();
1219         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1220                 (int)sldns_buffer_remaining(c->buffer));
1221         if(r <= 0) {
1222                 int want = SSL_get_error(c->ssl, r);
1223                 if(want == SSL_ERROR_ZERO_RETURN) {
1224                         return 0; /* closed */
1225                 } else if(want == SSL_ERROR_WANT_READ) {
1226                         c->ssl_shake_state = comm_ssl_shake_read;
1227                         comm_point_listen_for_rw(c, 1, 0);
1228                         return 1; /* wait for read condition */
1229                 } else if(want == SSL_ERROR_WANT_WRITE) {
1230                         return 1; /* write more later */
1231                 } else if(want == SSL_ERROR_SYSCALL) {
1232                         if(errno != 0)
1233                                 log_err("SSL_write syscall: %s",
1234                                         strerror(errno));
1235                         return 0;
1236                 }
1237                 log_crypto_err("could not SSL_write");
1238                 return 0;
1239         }
1240         sldns_buffer_skip(c->buffer, (ssize_t)r);
1241
1242         if(sldns_buffer_remaining(c->buffer) == 0) {
1243                 tcp_callback_writer(c);
1244         }
1245         return 1;
1246 #else
1247         (void)c;
1248         return 0;
1249 #endif /* HAVE_SSL */
1250 }
1251
1252 /** handle ssl tcp connection with dns contents */
1253 static int
1254 ssl_handle_it(struct comm_point* c)
1255 {
1256         if(c->tcp_is_reading)
1257                 return ssl_handle_read(c);
1258         return ssl_handle_write(c);
1259 }
1260
1261 /** Handle tcp reading callback. 
1262  * @param fd: file descriptor of socket.
1263  * @param c: comm point to read from into buffer.
1264  * @param short_ok: if true, very short packets are OK (for comm_local).
1265  * @return: 0 on error 
1266  */
1267 static int
1268 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1269 {
1270         ssize_t r;
1271         log_assert(c->type == comm_tcp || c->type == comm_local);
1272         if(c->ssl)
1273                 return ssl_handle_it(c);
1274         if(!c->tcp_is_reading)
1275                 return 0;
1276
1277         log_assert(fd != -1);
1278         if(c->tcp_byte_count < sizeof(uint16_t)) {
1279                 /* read length bytes */
1280                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1281                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1282                 if(r == 0)
1283                         return 0;
1284                 else if(r == -1) {
1285 #ifndef USE_WINSOCK
1286                         if(errno == EINTR || errno == EAGAIN)
1287                                 return 1;
1288 #ifdef ECONNRESET
1289                         if(errno == ECONNRESET && verbosity < 2)
1290                                 return 0; /* silence reset by peer */
1291 #endif
1292                         log_err_addr("read (in tcp s)", strerror(errno),
1293                                 &c->repinfo.addr, c->repinfo.addrlen);
1294 #else /* USE_WINSOCK */
1295                         if(WSAGetLastError() == WSAECONNRESET)
1296                                 return 0;
1297                         if(WSAGetLastError() == WSAEINPROGRESS)
1298                                 return 1;
1299                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1300                                 winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1301                                 return 1;
1302                         }
1303                         log_err_addr("read (in tcp s)", 
1304                                 wsa_strerror(WSAGetLastError()),
1305                                 &c->repinfo.addr, c->repinfo.addrlen);
1306 #endif
1307                         return 0;
1308                 } 
1309                 c->tcp_byte_count += r;
1310                 if(c->tcp_byte_count != sizeof(uint16_t))
1311                         return 1;
1312                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1313                         sldns_buffer_capacity(c->buffer)) {
1314                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1315                         return 0;
1316                 }
1317                 sldns_buffer_set_limit(c->buffer, 
1318                         sldns_buffer_read_u16_at(c->buffer, 0));
1319                 if(!short_ok && 
1320                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1321                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1322                         return 0;
1323                 }
1324                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1325                         (int)sldns_buffer_limit(c->buffer));
1326         }
1327
1328         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1329         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1330                 sldns_buffer_remaining(c->buffer), 0);
1331         if(r == 0) {
1332                 return 0;
1333         } else if(r == -1) {
1334 #ifndef USE_WINSOCK
1335                 if(errno == EINTR || errno == EAGAIN)
1336                         return 1;
1337                 log_err_addr("read (in tcp r)", strerror(errno),
1338                         &c->repinfo.addr, c->repinfo.addrlen);
1339 #else /* USE_WINSOCK */
1340                 if(WSAGetLastError() == WSAECONNRESET)
1341                         return 0;
1342                 if(WSAGetLastError() == WSAEINPROGRESS)
1343                         return 1;
1344                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1345                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1346                         return 1;
1347                 }
1348                 log_err_addr("read (in tcp r)",
1349                         wsa_strerror(WSAGetLastError()),
1350                         &c->repinfo.addr, c->repinfo.addrlen);
1351 #endif
1352                 return 0;
1353         }
1354         sldns_buffer_skip(c->buffer, r);
1355         if(sldns_buffer_remaining(c->buffer) <= 0) {
1356                 tcp_callback_reader(c);
1357         }
1358         return 1;
1359 }
1360
1361 /** 
1362  * Handle tcp writing callback. 
1363  * @param fd: file descriptor of socket.
1364  * @param c: comm point to write buffer out of.
1365  * @return: 0 on error
1366  */
1367 static int
1368 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1369 {
1370         ssize_t r;
1371         log_assert(c->type == comm_tcp);
1372         if(c->tcp_is_reading && !c->ssl)
1373                 return 0;
1374         log_assert(fd != -1);
1375         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1376                 /* check for pending error from nonblocking connect */
1377                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1378                 int error = 0;
1379                 socklen_t len = (socklen_t)sizeof(error);
1380                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1381                         &len) < 0){
1382 #ifndef USE_WINSOCK
1383                         error = errno; /* on solaris errno is error */
1384 #else /* USE_WINSOCK */
1385                         error = WSAGetLastError();
1386 #endif
1387                 }
1388 #ifndef USE_WINSOCK
1389 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1390                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1391                         return 1; /* try again later */
1392                 else
1393 #endif
1394                 if(error != 0 && verbosity < 2)
1395                         return 0; /* silence lots of chatter in the logs */
1396                 else if(error != 0) {
1397                         log_err_addr("tcp connect", strerror(error),
1398                                 &c->repinfo.addr, c->repinfo.addrlen);
1399 #else /* USE_WINSOCK */
1400                 /* examine error */
1401                 if(error == WSAEINPROGRESS)
1402                         return 1;
1403                 else if(error == WSAEWOULDBLOCK) {
1404                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1405                         return 1;
1406                 } else if(error != 0 && verbosity < 2)
1407                         return 0;
1408                 else if(error != 0) {
1409                         log_err_addr("tcp connect", wsa_strerror(error),
1410                                 &c->repinfo.addr, c->repinfo.addrlen);
1411 #endif /* USE_WINSOCK */
1412                         return 0;
1413                 }
1414         }
1415         if(c->ssl)
1416                 return ssl_handle_it(c);
1417
1418         if(c->tcp_byte_count < sizeof(uint16_t)) {
1419                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1420 #ifdef HAVE_WRITEV
1421                 struct iovec iov[2];
1422                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1423                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1424                 iov[1].iov_base = sldns_buffer_begin(c->buffer);
1425                 iov[1].iov_len = sldns_buffer_limit(c->buffer);
1426                 log_assert(iov[0].iov_len > 0);
1427                 log_assert(iov[1].iov_len > 0);
1428                 r = writev(fd, iov, 2);
1429 #else /* HAVE_WRITEV */
1430                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1431                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1432 #endif /* HAVE_WRITEV */
1433                 if(r == -1) {
1434 #ifndef USE_WINSOCK
1435 #  ifdef EPIPE
1436                         if(errno == EPIPE && verbosity < 2)
1437                                 return 0; /* silence 'broken pipe' */
1438   #endif
1439                         if(errno == EINTR || errno == EAGAIN)
1440                                 return 1;
1441 #  ifdef HAVE_WRITEV
1442                         log_err_addr("tcp writev", strerror(errno),
1443                                 &c->repinfo.addr, c->repinfo.addrlen);
1444 #  else /* HAVE_WRITEV */
1445                         log_err_addr("tcp send s", strerror(errno),
1446                                 &c->repinfo.addr, c->repinfo.addrlen);
1447 #  endif /* HAVE_WRITEV */
1448 #else
1449                         if(WSAGetLastError() == WSAENOTCONN)
1450                                 return 1;
1451                         if(WSAGetLastError() == WSAEINPROGRESS)
1452                                 return 1;
1453                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1454                                 winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1455                                 return 1; 
1456                         }
1457                         log_err_addr("tcp send s",
1458                                 wsa_strerror(WSAGetLastError()),
1459                                 &c->repinfo.addr, c->repinfo.addrlen);
1460 #endif
1461                         return 0;
1462                 }
1463                 c->tcp_byte_count += r;
1464                 if(c->tcp_byte_count < sizeof(uint16_t))
1465                         return 1;
1466                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 
1467                         sizeof(uint16_t));
1468                 if(sldns_buffer_remaining(c->buffer) == 0) {
1469                         tcp_callback_writer(c);
1470                         return 1;
1471                 }
1472         }
1473         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1474         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
1475                 sldns_buffer_remaining(c->buffer), 0);
1476         if(r == -1) {
1477 #ifndef USE_WINSOCK
1478                 if(errno == EINTR || errno == EAGAIN)
1479                         return 1;
1480                 log_err_addr("tcp send r", strerror(errno),
1481                         &c->repinfo.addr, c->repinfo.addrlen);
1482 #else
1483                 if(WSAGetLastError() == WSAEINPROGRESS)
1484                         return 1;
1485                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1486                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1487                         return 1; 
1488                 }
1489                 log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1490                         &c->repinfo.addr, c->repinfo.addrlen);
1491 #endif
1492                 return 0;
1493         }
1494         sldns_buffer_skip(c->buffer, r);
1495
1496         if(sldns_buffer_remaining(c->buffer) == 0) {
1497                 tcp_callback_writer(c);
1498         }
1499         
1500         return 1;
1501 }
1502
1503 void 
1504 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1505 {
1506         struct comm_point* c = (struct comm_point*)arg;
1507         log_assert(c->type == comm_tcp);
1508         comm_base_now(c->ev->base);
1509
1510         if(event&EV_READ) {
1511                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1512                         reclaim_tcp_handler(c);
1513                         if(!c->tcp_do_close) {
1514                                 fptr_ok(fptr_whitelist_comm_point(
1515                                         c->callback));
1516                                 (void)(*c->callback)(c, c->cb_arg, 
1517                                         NETEVENT_CLOSED, NULL);
1518                         }
1519                 }
1520                 return;
1521         }
1522         if(event&EV_WRITE) {
1523                 if(!comm_point_tcp_handle_write(fd, c)) {
1524                         reclaim_tcp_handler(c);
1525                         if(!c->tcp_do_close) {
1526                                 fptr_ok(fptr_whitelist_comm_point(
1527                                         c->callback));
1528                                 (void)(*c->callback)(c, c->cb_arg, 
1529                                         NETEVENT_CLOSED, NULL);
1530                         }
1531                 }
1532                 return;
1533         }
1534         if(event&EV_TIMEOUT) {
1535                 verbose(VERB_QUERY, "tcp took too long, dropped");
1536                 reclaim_tcp_handler(c);
1537                 if(!c->tcp_do_close) {
1538                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1539                         (void)(*c->callback)(c, c->cb_arg,
1540                                 NETEVENT_TIMEOUT, NULL);
1541                 }
1542                 return;
1543         }
1544         log_err("Ignored event %d for tcphdl.", event);
1545 }
1546
1547 void comm_point_local_handle_callback(int fd, short event, void* arg)
1548 {
1549         struct comm_point* c = (struct comm_point*)arg;
1550         log_assert(c->type == comm_local);
1551         comm_base_now(c->ev->base);
1552
1553         if(event&EV_READ) {
1554                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
1555                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1556                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
1557                                 NULL);
1558                 }
1559                 return;
1560         }
1561         log_err("Ignored event %d for localhdl.", event);
1562 }
1563
1564 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
1565         short event, void* arg)
1566 {
1567         struct comm_point* c = (struct comm_point*)arg;
1568         int err = NETEVENT_NOERROR;
1569         log_assert(c->type == comm_raw);
1570         comm_base_now(c->ev->base);
1571         
1572         if(event&EV_TIMEOUT)
1573                 err = NETEVENT_TIMEOUT;
1574         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1575         (void)(*c->callback)(c, c->cb_arg, err, NULL);
1576 }
1577
1578 struct comm_point* 
1579 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
1580         comm_point_callback_t* callback, void* callback_arg)
1581 {
1582         struct comm_point* c = (struct comm_point*)calloc(1,
1583                 sizeof(struct comm_point));
1584         short evbits;
1585         if(!c)
1586                 return NULL;
1587         c->ev = (struct internal_event*)calloc(1,
1588                 sizeof(struct internal_event));
1589         if(!c->ev) {
1590                 free(c);
1591                 return NULL;
1592         }
1593         c->ev->base = base;
1594         c->fd = fd;
1595         c->buffer = buffer;
1596         c->timeout = NULL;
1597         c->tcp_is_reading = 0;
1598         c->tcp_byte_count = 0;
1599         c->tcp_parent = NULL;
1600         c->max_tcp_count = 0;
1601         c->cur_tcp_count = 0;
1602         c->tcp_handlers = NULL;
1603         c->tcp_free = NULL;
1604         c->type = comm_udp;
1605         c->tcp_do_close = 0;
1606         c->do_not_close = 0;
1607         c->tcp_do_toggle_rw = 0;
1608         c->tcp_check_nb_connect = 0;
1609         c->inuse = 0;
1610         c->callback = callback;
1611         c->cb_arg = callback_arg;
1612         evbits = EV_READ | EV_PERSIST;
1613         /* libevent stuff */
1614         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1615         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1616                 log_err("could not baseset udp event");
1617                 comm_point_delete(c);
1618                 return NULL;
1619         }
1620         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1621                 log_err("could not add udp event");
1622                 comm_point_delete(c);
1623                 return NULL;
1624         }
1625         return c;
1626 }
1627
1628 struct comm_point* 
1629 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
1630         sldns_buffer* buffer, 
1631         comm_point_callback_t* callback, void* callback_arg)
1632 {
1633         struct comm_point* c = (struct comm_point*)calloc(1,
1634                 sizeof(struct comm_point));
1635         short evbits;
1636         if(!c)
1637                 return NULL;
1638         c->ev = (struct internal_event*)calloc(1,
1639                 sizeof(struct internal_event));
1640         if(!c->ev) {
1641                 free(c);
1642                 return NULL;
1643         }
1644         c->ev->base = base;
1645         c->fd = fd;
1646         c->buffer = buffer;
1647         c->timeout = NULL;
1648         c->tcp_is_reading = 0;
1649         c->tcp_byte_count = 0;
1650         c->tcp_parent = NULL;
1651         c->max_tcp_count = 0;
1652         c->cur_tcp_count = 0;
1653         c->tcp_handlers = NULL;
1654         c->tcp_free = NULL;
1655         c->type = comm_udp;
1656         c->tcp_do_close = 0;
1657         c->do_not_close = 0;
1658         c->inuse = 0;
1659         c->tcp_do_toggle_rw = 0;
1660         c->tcp_check_nb_connect = 0;
1661         c->callback = callback;
1662         c->cb_arg = callback_arg;
1663         evbits = EV_READ | EV_PERSIST;
1664         /* libevent stuff */
1665         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1666         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1667                 log_err("could not baseset udp event");
1668                 comm_point_delete(c);
1669                 return NULL;
1670         }
1671         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1672                 log_err("could not add udp event");
1673                 comm_point_delete(c);
1674                 return NULL;
1675         }
1676         return c;
1677 }
1678
1679 static struct comm_point* 
1680 comm_point_create_tcp_handler(struct comm_base *base, 
1681         struct comm_point* parent, size_t bufsize,
1682         comm_point_callback_t* callback, void* callback_arg)
1683 {
1684         struct comm_point* c = (struct comm_point*)calloc(1,
1685                 sizeof(struct comm_point));
1686         short evbits;
1687         if(!c)
1688                 return NULL;
1689         c->ev = (struct internal_event*)calloc(1,
1690                 sizeof(struct internal_event));
1691         if(!c->ev) {
1692                 free(c);
1693                 return NULL;
1694         }
1695         c->ev->base = base;
1696         c->fd = -1;
1697         c->buffer = sldns_buffer_new(bufsize);
1698         if(!c->buffer) {
1699                 free(c->ev);
1700                 free(c);
1701                 return NULL;
1702         }
1703         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1704         if(!c->timeout) {
1705                 sldns_buffer_free(c->buffer);
1706                 free(c->ev);
1707                 free(c);
1708                 return NULL;
1709         }
1710         c->tcp_is_reading = 0;
1711         c->tcp_byte_count = 0;
1712         c->tcp_parent = parent;
1713         c->max_tcp_count = 0;
1714         c->cur_tcp_count = 0;
1715         c->tcp_handlers = NULL;
1716         c->tcp_free = NULL;
1717         c->type = comm_tcp;
1718         c->tcp_do_close = 0;
1719         c->do_not_close = 0;
1720         c->tcp_do_toggle_rw = 1;
1721         c->tcp_check_nb_connect = 0;
1722         c->repinfo.c = c;
1723         c->callback = callback;
1724         c->cb_arg = callback_arg;
1725         /* add to parent free list */
1726         c->tcp_free = parent->tcp_free;
1727         parent->tcp_free = c;
1728         /* libevent stuff */
1729         evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1730         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1731         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1732         {
1733                 log_err("could not basetset tcphdl event");
1734                 parent->tcp_free = c->tcp_free;
1735                 free(c->ev);
1736                 free(c);
1737                 return NULL;
1738         }
1739         return c;
1740 }
1741
1742 struct comm_point* 
1743 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1744         comm_point_callback_t* callback, void* callback_arg)
1745 {
1746         struct comm_point* c = (struct comm_point*)calloc(1,
1747                 sizeof(struct comm_point));
1748         short evbits;
1749         int i;
1750         /* first allocate the TCP accept listener */
1751         if(!c)
1752                 return NULL;
1753         c->ev = (struct internal_event*)calloc(1,
1754                 sizeof(struct internal_event));
1755         if(!c->ev) {
1756                 free(c);
1757                 return NULL;
1758         }
1759         c->ev->base = base;
1760         c->fd = fd;
1761         c->buffer = NULL;
1762         c->timeout = NULL;
1763         c->tcp_is_reading = 0;
1764         c->tcp_byte_count = 0;
1765         c->tcp_parent = NULL;
1766         c->max_tcp_count = num;
1767         c->cur_tcp_count = 0;
1768         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1769                 sizeof(struct comm_point*));
1770         if(!c->tcp_handlers) {
1771                 free(c->ev);
1772                 free(c);
1773                 return NULL;
1774         }
1775         c->tcp_free = NULL;
1776         c->type = comm_tcp_accept;
1777         c->tcp_do_close = 0;
1778         c->do_not_close = 0;
1779         c->tcp_do_toggle_rw = 0;
1780         c->tcp_check_nb_connect = 0;
1781         c->callback = NULL;
1782         c->cb_arg = NULL;
1783         evbits = EV_READ | EV_PERSIST;
1784         /* libevent stuff */
1785         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1786         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1787                 event_add(&c->ev->ev, c->timeout) != 0 )
1788         {
1789                 log_err("could not add tcpacc event");
1790                 comm_point_delete(c);
1791                 return NULL;
1792         }
1793
1794         /* now prealloc the tcp handlers */
1795         for(i=0; i<num; i++) {
1796                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1797                         c, bufsize, callback, callback_arg);
1798                 if(!c->tcp_handlers[i]) {
1799                         comm_point_delete(c);
1800                         return NULL;
1801                 }
1802         }
1803         
1804         return c;
1805 }
1806
1807 struct comm_point* 
1808 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1809         comm_point_callback_t* callback, void* callback_arg)
1810 {
1811         struct comm_point* c = (struct comm_point*)calloc(1,
1812                 sizeof(struct comm_point));
1813         short evbits;
1814         if(!c)
1815                 return NULL;
1816         c->ev = (struct internal_event*)calloc(1,
1817                 sizeof(struct internal_event));
1818         if(!c->ev) {
1819                 free(c);
1820                 return NULL;
1821         }
1822         c->ev->base = base;
1823         c->fd = -1;
1824         c->buffer = sldns_buffer_new(bufsize);
1825         if(!c->buffer) {
1826                 free(c->ev);
1827                 free(c);
1828                 return NULL;
1829         }
1830         c->timeout = NULL;
1831         c->tcp_is_reading = 0;
1832         c->tcp_byte_count = 0;
1833         c->tcp_parent = NULL;
1834         c->max_tcp_count = 0;
1835         c->cur_tcp_count = 0;
1836         c->tcp_handlers = NULL;
1837         c->tcp_free = NULL;
1838         c->type = comm_tcp;
1839         c->tcp_do_close = 0;
1840         c->do_not_close = 0;
1841         c->tcp_do_toggle_rw = 1;
1842         c->tcp_check_nb_connect = 1;
1843         c->repinfo.c = c;
1844         c->callback = callback;
1845         c->cb_arg = callback_arg;
1846         evbits = EV_PERSIST | EV_WRITE;
1847         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1848         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1849         {
1850                 log_err("could not basetset tcpout event");
1851                 sldns_buffer_free(c->buffer);
1852                 free(c->ev);
1853                 free(c);
1854                 return NULL;
1855         }
1856
1857         return c;
1858 }
1859
1860 struct comm_point* 
1861 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1862         comm_point_callback_t* callback, void* callback_arg)
1863 {
1864         struct comm_point* c = (struct comm_point*)calloc(1,
1865                 sizeof(struct comm_point));
1866         short evbits;
1867         if(!c)
1868                 return NULL;
1869         c->ev = (struct internal_event*)calloc(1,
1870                 sizeof(struct internal_event));
1871         if(!c->ev) {
1872                 free(c);
1873                 return NULL;
1874         }
1875         c->ev->base = base;
1876         c->fd = fd;
1877         c->buffer = sldns_buffer_new(bufsize);
1878         if(!c->buffer) {
1879                 free(c->ev);
1880                 free(c);
1881                 return NULL;
1882         }
1883         c->timeout = NULL;
1884         c->tcp_is_reading = 1;
1885         c->tcp_byte_count = 0;
1886         c->tcp_parent = NULL;
1887         c->max_tcp_count = 0;
1888         c->cur_tcp_count = 0;
1889         c->tcp_handlers = NULL;
1890         c->tcp_free = NULL;
1891         c->type = comm_local;
1892         c->tcp_do_close = 0;
1893         c->do_not_close = 1;
1894         c->tcp_do_toggle_rw = 0;
1895         c->tcp_check_nb_connect = 0;
1896         c->callback = callback;
1897         c->cb_arg = callback_arg;
1898         /* libevent stuff */
1899         evbits = EV_PERSIST | EV_READ;
1900         event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback, 
1901                 c);
1902         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1903                 event_add(&c->ev->ev, c->timeout) != 0 )
1904         {
1905                 log_err("could not add localhdl event");
1906                 free(c->ev);
1907                 free(c);
1908                 return NULL;
1909         }
1910         return c;
1911 }
1912
1913 struct comm_point* 
1914 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
1915         comm_point_callback_t* callback, void* callback_arg)
1916 {
1917         struct comm_point* c = (struct comm_point*)calloc(1,
1918                 sizeof(struct comm_point));
1919         short evbits;
1920         if(!c)
1921                 return NULL;
1922         c->ev = (struct internal_event*)calloc(1,
1923                 sizeof(struct internal_event));
1924         if(!c->ev) {
1925                 free(c);
1926                 return NULL;
1927         }
1928         c->ev->base = base;
1929         c->fd = fd;
1930         c->buffer = NULL;
1931         c->timeout = NULL;
1932         c->tcp_is_reading = 0;
1933         c->tcp_byte_count = 0;
1934         c->tcp_parent = NULL;
1935         c->max_tcp_count = 0;
1936         c->cur_tcp_count = 0;
1937         c->tcp_handlers = NULL;
1938         c->tcp_free = NULL;
1939         c->type = comm_raw;
1940         c->tcp_do_close = 0;
1941         c->do_not_close = 1;
1942         c->tcp_do_toggle_rw = 0;
1943         c->tcp_check_nb_connect = 0;
1944         c->callback = callback;
1945         c->cb_arg = callback_arg;
1946         /* libevent stuff */
1947         if(writing)
1948                 evbits = EV_PERSIST | EV_WRITE;
1949         else    evbits = EV_PERSIST | EV_READ;
1950         event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback, 
1951                 c);
1952         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1953                 event_add(&c->ev->ev, c->timeout) != 0 )
1954         {
1955                 log_err("could not add rawhdl event");
1956                 free(c->ev);
1957                 free(c);
1958                 return NULL;
1959         }
1960         return c;
1961 }
1962
1963 void 
1964 comm_point_close(struct comm_point* c)
1965 {
1966         if(!c)
1967                 return;
1968         if(c->fd != -1)
1969                 if(event_del(&c->ev->ev) != 0) {
1970                         log_err("could not event_del on close");
1971                 }
1972         /* close fd after removing from event lists, or epoll.. is messed up */
1973         if(c->fd != -1 && !c->do_not_close) {
1974                 verbose(VERB_ALGO, "close fd %d", c->fd);
1975 #ifndef USE_WINSOCK
1976                 close(c->fd);
1977 #else
1978                 closesocket(c->fd);
1979 #endif
1980         }
1981         c->fd = -1;
1982 }
1983
1984 void 
1985 comm_point_delete(struct comm_point* c)
1986 {
1987         if(!c) 
1988                 return;
1989         if(c->type == comm_tcp && c->ssl) {
1990 #ifdef HAVE_SSL
1991                 SSL_shutdown(c->ssl);
1992                 SSL_free(c->ssl);
1993 #endif
1994         }
1995         comm_point_close(c);
1996         if(c->tcp_handlers) {
1997                 int i;
1998                 for(i=0; i<c->max_tcp_count; i++)
1999                         comm_point_delete(c->tcp_handlers[i]);
2000                 free(c->tcp_handlers);
2001         }
2002         free(c->timeout);
2003         if(c->type == comm_tcp || c->type == comm_local)
2004                 sldns_buffer_free(c->buffer);
2005         free(c->ev);
2006         free(c);
2007 }
2008
2009 void 
2010 comm_point_send_reply(struct comm_reply *repinfo)
2011 {
2012         log_assert(repinfo && repinfo->c);
2013         if(repinfo->c->type == comm_udp) {
2014                 if(repinfo->srctype)
2015                         comm_point_send_udp_msg_if(repinfo->c, 
2016                         repinfo->c->buffer, (struct sockaddr*)&repinfo->addr, 
2017                         repinfo->addrlen, repinfo);
2018                 else
2019                         comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
2020                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
2021 #ifdef USE_DNSTAP
2022                 if(repinfo->c->dtenv != NULL &&
2023                    repinfo->c->dtenv->log_client_response_messages)
2024                         dt_msg_send_client_response(repinfo->c->dtenv,
2025                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2026 #endif
2027         } else {
2028 #ifdef USE_DNSTAP
2029                 if(repinfo->c->tcp_parent->dtenv != NULL &&
2030                    repinfo->c->tcp_parent->dtenv->log_client_response_messages)
2031                         dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
2032                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2033 #endif
2034                 comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
2035         }
2036 }
2037
2038 void 
2039 comm_point_drop_reply(struct comm_reply* repinfo)
2040 {
2041         if(!repinfo)
2042                 return;
2043         log_assert(repinfo && repinfo->c);
2044         log_assert(repinfo->c->type != comm_tcp_accept);
2045         if(repinfo->c->type == comm_udp)
2046                 return;
2047         reclaim_tcp_handler(repinfo->c);
2048 }
2049
2050 void 
2051 comm_point_stop_listening(struct comm_point* c)
2052 {
2053         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
2054         if(event_del(&c->ev->ev) != 0) {
2055                 log_err("event_del error to stoplisten");
2056         }
2057 }
2058
2059 void 
2060 comm_point_start_listening(struct comm_point* c, int newfd, int sec)
2061 {
2062         verbose(VERB_ALGO, "comm point start listening %d", 
2063                 c->fd==-1?newfd:c->fd);
2064         if(c->type == comm_tcp_accept && !c->tcp_free) {
2065                 /* no use to start listening no free slots. */
2066                 return;
2067         }
2068         if(sec != -1 && sec != 0) {
2069                 if(!c->timeout) {
2070                         c->timeout = (struct timeval*)malloc(sizeof(
2071                                 struct timeval));
2072                         if(!c->timeout) {
2073                                 log_err("cpsl: malloc failed. No net read.");
2074                                 return;
2075                         }
2076                 }
2077                 c->ev->ev.ev_events |= EV_TIMEOUT;
2078 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
2079                 c->timeout->tv_sec = sec;
2080                 c->timeout->tv_usec = 0;
2081 #endif /* S_SPLINT_S */
2082         }
2083         if(c->type == comm_tcp) {
2084                 c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2085                 if(c->tcp_is_reading)
2086                         c->ev->ev.ev_events |= EV_READ;
2087                 else    c->ev->ev.ev_events |= EV_WRITE;
2088         }
2089         if(newfd != -1) {
2090                 if(c->fd != -1) {
2091 #ifndef USE_WINSOCK
2092                         close(c->fd);
2093 #else
2094                         closesocket(c->fd);
2095 #endif
2096                 }
2097                 c->fd = newfd;
2098                 c->ev->ev.ev_fd = c->fd;
2099         }
2100         if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
2101                 log_err("event_add failed. in cpsl.");
2102         }
2103 }
2104
2105 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
2106 {
2107         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
2108         if(event_del(&c->ev->ev) != 0) {
2109                 log_err("event_del error to cplf");
2110         }
2111         c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2112         if(rd) c->ev->ev.ev_events |= EV_READ;
2113         if(wr) c->ev->ev.ev_events |= EV_WRITE;
2114         if(event_add(&c->ev->ev, c->timeout) != 0) {
2115                 log_err("event_add failed. in cplf.");
2116         }
2117 }
2118
2119 size_t comm_point_get_mem(struct comm_point* c)
2120 {
2121         size_t s;
2122         if(!c) 
2123                 return 0;
2124         s = sizeof(*c) + sizeof(*c->ev);
2125         if(c->timeout) 
2126                 s += sizeof(*c->timeout);
2127         if(c->type == comm_tcp || c->type == comm_local)
2128                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
2129         if(c->type == comm_tcp_accept) {
2130                 int i;
2131                 for(i=0; i<c->max_tcp_count; i++)
2132                         s += comm_point_get_mem(c->tcp_handlers[i]);
2133         }
2134         return s;
2135 }
2136
2137 struct comm_timer* 
2138 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2139 {
2140         struct comm_timer *tm = (struct comm_timer*)calloc(1,
2141                 sizeof(struct comm_timer));
2142         if(!tm)
2143                 return NULL;
2144         tm->ev_timer = (struct internal_timer*)calloc(1,
2145                 sizeof(struct internal_timer));
2146         if(!tm->ev_timer) {
2147                 log_err("malloc failed");
2148                 free(tm);
2149                 return NULL;
2150         }
2151         tm->ev_timer->base = base;
2152         tm->callback = cb;
2153         tm->cb_arg = cb_arg;
2154         event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT, 
2155                 comm_timer_callback, tm);
2156         if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2157                 log_err("timer_create: event_base_set failed.");
2158                 free(tm->ev_timer);
2159                 free(tm);
2160                 return NULL;
2161         }
2162         return tm;
2163 }
2164
2165 void 
2166 comm_timer_disable(struct comm_timer* timer)
2167 {
2168         if(!timer)
2169                 return;
2170         evtimer_del(&timer->ev_timer->ev);
2171         timer->ev_timer->enabled = 0;
2172 }
2173
2174 void 
2175 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2176 {
2177         log_assert(tv);
2178         if(timer->ev_timer->enabled)
2179                 comm_timer_disable(timer);
2180         event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2181                 comm_timer_callback, timer);
2182         if(event_base_set(timer->ev_timer->base->eb->base, 
2183                 &timer->ev_timer->ev) != 0)
2184                 log_err("comm_timer_set: set_base failed.");
2185         if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2186                 log_err("comm_timer_set: evtimer_add failed.");
2187         timer->ev_timer->enabled = 1;
2188 }
2189
2190 void 
2191 comm_timer_delete(struct comm_timer* timer)
2192 {
2193         if(!timer)
2194                 return;
2195         comm_timer_disable(timer);
2196         free(timer->ev_timer);
2197         free(timer);
2198 }
2199
2200 void 
2201 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2202 {
2203         struct comm_timer* tm = (struct comm_timer*)arg;
2204         if(!(event&EV_TIMEOUT))
2205                 return;
2206         comm_base_now(tm->ev_timer->base);
2207         tm->ev_timer->enabled = 0;
2208         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2209         (*tm->callback)(tm->cb_arg);
2210 }
2211
2212 int 
2213 comm_timer_is_set(struct comm_timer* timer)
2214 {
2215         return (int)timer->ev_timer->enabled;
2216 }
2217
2218 size_t 
2219 comm_timer_get_mem(struct comm_timer* timer)
2220 {
2221         return sizeof(*timer) + sizeof(struct internal_timer);
2222 }
2223
2224 struct comm_signal* 
2225 comm_signal_create(struct comm_base* base,
2226         void (*callback)(int, void*), void* cb_arg)
2227 {
2228         struct comm_signal* com = (struct comm_signal*)malloc(
2229                 sizeof(struct comm_signal));
2230         if(!com) {
2231                 log_err("malloc failed");
2232                 return NULL;
2233         }
2234         com->base = base;
2235         com->callback = callback;
2236         com->cb_arg = cb_arg;
2237         com->ev_signal = NULL;
2238         return com;
2239 }
2240
2241 void 
2242 comm_signal_callback(int sig, short event, void* arg)
2243 {
2244         struct comm_signal* comsig = (struct comm_signal*)arg;
2245         if(!(event & EV_SIGNAL))
2246                 return;
2247         comm_base_now(comsig->base);
2248         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2249         (*comsig->callback)(sig, comsig->cb_arg);
2250 }
2251
2252 int 
2253 comm_signal_bind(struct comm_signal* comsig, int sig)
2254 {
2255         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
2256                 sizeof(struct internal_signal));
2257         if(!entry) {
2258                 log_err("malloc failed");
2259                 return 0;
2260         }
2261         log_assert(comsig);
2262         /* add signal event */
2263         signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2264         if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2265                 log_err("Could not set signal base");
2266                 free(entry);
2267                 return 0;
2268         }
2269         if(signal_add(&entry->ev, NULL) != 0) {
2270                 log_err("Could not add signal handler");
2271                 free(entry);
2272                 return 0;
2273         }
2274         /* link into list */
2275         entry->next = comsig->ev_signal;
2276         comsig->ev_signal = entry;
2277         return 1;
2278 }
2279
2280 void 
2281 comm_signal_delete(struct comm_signal* comsig)
2282 {
2283         struct internal_signal* p, *np;
2284         if(!comsig)
2285                 return;
2286         p=comsig->ev_signal;
2287         while(p) {
2288                 np = p->next;
2289                 signal_del(&p->ev);
2290                 free(p);
2291                 p = np;
2292         }
2293         free(comsig);
2294 }