]> CyberLeo.Net >> Repos - FreeBSD/releng/10.3.git/blob - contrib/unbound/util/netevent.c
- Copy stable/10@296371 to releng/10.3 in preparation for 10.3-RC1
[FreeBSD/releng/10.3.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/log.h"
44 #include "util/net_help.h"
45 #include "util/fptr_wlist.h"
46 #include "sldns/pkthdr.h"
47 #include "sldns/sbuffer.h"
48 #include "dnstap/dnstap.h"
49 #ifdef HAVE_OPENSSL_SSL_H
50 #include <openssl/ssl.h>
51 #endif
52 #ifdef HAVE_OPENSSL_ERR_H
53 #include <openssl/err.h>
54 #endif
55
56 /* -------- Start of local definitions -------- */
57 /** if CMSG_ALIGN is not defined on this platform, a workaround */
58 #ifndef CMSG_ALIGN
59 #  ifdef _CMSG_DATA_ALIGN
60 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
61 #  else
62 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
63 #  endif
64 #endif
65
66 /** if CMSG_LEN is not defined on this platform, a workaround */
67 #ifndef CMSG_LEN
68 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
69 #endif
70
71 /** if CMSG_SPACE is not defined on this platform, a workaround */
72 #ifndef CMSG_SPACE
73 #  ifdef _CMSG_HDR_ALIGN
74 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
75 #  else
76 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
77 #  endif
78 #endif
79
80 /** The TCP reading or writing query timeout in seconds */
81 #define TCP_QUERY_TIMEOUT 120 
82
83 #ifndef NONBLOCKING_IS_BROKEN
84 /** number of UDP reads to perform per read indication from select */
85 #define NUM_UDP_PER_SELECT 100
86 #else
87 #define NUM_UDP_PER_SELECT 1
88 #endif
89
90 /* We define libevent structures here to hide the libevent stuff. */
91
92 #ifdef USE_MINI_EVENT
93 #  ifdef USE_WINSOCK
94 #    include "util/winsock_event.h"
95 #  else
96 #    include "util/mini_event.h"
97 #  endif /* USE_WINSOCK */
98 #else /* USE_MINI_EVENT */
99    /* we use libevent */
100 #  ifdef HAVE_EVENT_H
101 #    include <event.h>
102 #  else
103 #    include "event2/event.h"
104 #    include "event2/event_struct.h"
105 #    include "event2/event_compat.h"
106 #  endif
107 #endif /* USE_MINI_EVENT */
108
109 /**
110  * The internal event structure for keeping libevent info for the event.
111  * Possibly other structures (list, tree) this is part of.
112  */
113 struct internal_event {
114         /** the comm base */
115         struct comm_base* base;
116         /** libevent event type, alloced here */
117         struct event ev;
118 };
119
120 /**
121  * Internal base structure, so that every thread has its own events.
122  */
123 struct internal_base {
124         /** libevent event_base type. */
125         struct event_base* base;
126         /** seconds time pointer points here */
127         time_t secs;
128         /** timeval with current time */
129         struct timeval now;
130         /** the event used for slow_accept timeouts */
131         struct event slow_accept;
132         /** true if slow_accept is enabled */
133         int slow_accept_enabled;
134 };
135
136 /**
137  * Internal timer structure, to store timer event in.
138  */
139 struct internal_timer {
140         /** the comm base */
141         struct comm_base* base;
142         /** libevent event type, alloced here */
143         struct event ev;
144         /** is timer enabled */
145         uint8_t enabled;
146 };
147
148 /**
149  * Internal signal structure, to store signal event in.
150  */
151 struct internal_signal {
152         /** libevent event type, alloced here */
153         struct event ev;
154         /** next in signal list */
155         struct internal_signal* next;
156 };
157
158 /** create a tcp handler with a parent */
159 static struct comm_point* comm_point_create_tcp_handler(
160         struct comm_base *base, struct comm_point* parent, size_t bufsize,
161         comm_point_callback_t* callback, void* callback_arg);
162
163 /* -------- End of local definitions -------- */
164
165 #ifdef USE_MINI_EVENT
166 /** minievent updates the time when it blocks. */
167 #define comm_base_now(x) /* nothing to do */
168 #else /* !USE_MINI_EVENT */
169 /** fillup the time values in the event base */
170 static void
171 comm_base_now(struct comm_base* b)
172 {
173         if(gettimeofday(&b->eb->now, NULL) < 0) {
174                 log_err("gettimeofday: %s", strerror(errno));
175         }
176         b->eb->secs = (time_t)b->eb->now.tv_sec;
177 }
178 #endif /* USE_MINI_EVENT */
179
180 struct comm_base* 
181 comm_base_create(int sigs)
182 {
183         struct comm_base* b = (struct comm_base*)calloc(1,
184                 sizeof(struct comm_base));
185         if(!b)
186                 return NULL;
187         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
188         if(!b->eb) {
189                 free(b);
190                 return NULL;
191         }
192 #ifdef USE_MINI_EVENT
193         (void)sigs;
194         /* use mini event time-sharing feature */
195         b->eb->base = event_init(&b->eb->secs, &b->eb->now);
196 #else
197 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
198         /* libev */
199         if(sigs)
200                 b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
201         else
202                 b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
203 #  else
204         (void)sigs;
205 #    ifdef HAVE_EVENT_BASE_NEW
206         b->eb->base = event_base_new();
207 #    else
208         b->eb->base = event_init();
209 #    endif
210 #  endif
211 #endif
212         if(!b->eb->base) {
213                 free(b->eb);
214                 free(b);
215                 return NULL;
216         }
217         comm_base_now(b);
218         /* avoid event_get_method call which causes crashes even when
219          * not printing, because its result is passed */
220         verbose(VERB_ALGO, 
221 #if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
222                 "libev"
223 #elif defined(USE_MINI_EVENT)
224                 "event "
225 #else
226                 "libevent "
227 #endif
228                 "%s uses %s method.", 
229                 event_get_version(), 
230 #ifdef HAVE_EVENT_BASE_GET_METHOD
231                 event_base_get_method(b->eb->base)
232 #else
233                 "not_obtainable"
234 #endif
235         );
236         return b;
237 }
238
239 struct comm_base*
240 comm_base_create_event(struct event_base* base)
241 {
242         struct comm_base* b = (struct comm_base*)calloc(1,
243                 sizeof(struct comm_base));
244         if(!b)
245                 return NULL;
246         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
247         if(!b->eb) {
248                 free(b);
249                 return NULL;
250         }
251         b->eb->base = base;
252         comm_base_now(b);
253         return b;
254 }
255
256 void 
257 comm_base_delete(struct comm_base* b)
258 {
259         if(!b)
260                 return;
261         if(b->eb->slow_accept_enabled) {
262                 if(event_del(&b->eb->slow_accept) != 0) {
263                         log_err("could not event_del slow_accept");
264                 }
265         }
266 #ifdef USE_MINI_EVENT
267         event_base_free(b->eb->base);
268 #elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
269         /* only libevent 1.2+ has it, but in 1.2 it is broken - 
270            assertion fails on signal handling ev that is not deleted
271            in libevent 1.3c (event_base_once appears) this is fixed. */
272         event_base_free(b->eb->base);
273 #endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
274         b->eb->base = NULL;
275         free(b->eb);
276         free(b);
277 }
278
279 void 
280 comm_base_delete_no_base(struct comm_base* b)
281 {
282         if(!b)
283                 return;
284         if(b->eb->slow_accept_enabled) {
285                 if(event_del(&b->eb->slow_accept) != 0) {
286                         log_err("could not event_del slow_accept");
287                 }
288         }
289         b->eb->base = NULL;
290         free(b->eb);
291         free(b);
292 }
293
294 void 
295 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
296 {
297         *tt = &b->eb->secs;
298         *tv = &b->eb->now;
299 }
300
301 void 
302 comm_base_dispatch(struct comm_base* b)
303 {
304         int retval;
305         retval = event_base_dispatch(b->eb->base);
306         if(retval != 0) {
307                 fatal_exit("event_dispatch returned error %d, "
308                         "errno is %s", retval, strerror(errno));
309         }
310 }
311
312 void comm_base_exit(struct comm_base* b)
313 {
314         if(event_base_loopexit(b->eb->base, NULL) != 0) {
315                 log_err("Could not loopexit");
316         }
317 }
318
319 void comm_base_set_slow_accept_handlers(struct comm_base* b,
320         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
321 {
322         b->stop_accept = stop_acc;
323         b->start_accept = start_acc;
324         b->cb_arg = arg;
325 }
326
327 struct event_base* comm_base_internal(struct comm_base* b)
328 {
329         return b->eb->base;
330 }
331
332 /** see if errno for udp has to be logged or not uses globals */
333 static int
334 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
335 {
336         /* do not log transient errors (unless high verbosity) */
337 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
338         switch(errno) {
339 #  ifdef ENETUNREACH
340                 case ENETUNREACH:
341 #  endif
342 #  ifdef EHOSTDOWN
343                 case EHOSTDOWN:
344 #  endif
345 #  ifdef EHOSTUNREACH
346                 case EHOSTUNREACH:
347 #  endif
348 #  ifdef ENETDOWN
349                 case ENETDOWN:
350 #  endif
351                         if(verbosity < VERB_ALGO)
352                                 return 0;
353                 default:
354                         break;
355         }
356 #endif
357         /* permission denied is gotten for every send if the
358          * network is disconnected (on some OS), squelch it */
359         if(errno == EPERM && verbosity < VERB_DETAIL)
360                 return 0;
361         /* squelch errors where people deploy AAAA ::ffff:bla for
362          * authority servers, which we try for intranets. */
363         if(errno == EINVAL && addr_is_ip4mapped(
364                 (struct sockaddr_storage*)addr, addrlen) &&
365                 verbosity < VERB_DETAIL)
366                 return 0;
367         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
368          * but a dns cache does not need it. */
369         if(errno == EACCES && addr_is_broadcast(
370                 (struct sockaddr_storage*)addr, addrlen) &&
371                 verbosity < VERB_DETAIL)
372                 return 0;
373         return 1;
374 }
375
376 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
377 {
378         return udp_send_errno_needs_log(addr, addrlen);
379 }
380
381 /* send a UDP reply */
382 int
383 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
384         struct sockaddr* addr, socklen_t addrlen) 
385 {
386         ssize_t sent;
387         log_assert(c->fd != -1);
388 #ifdef UNBOUND_DEBUG
389         if(sldns_buffer_remaining(packet) == 0)
390                 log_err("error: send empty UDP packet");
391 #endif
392         log_assert(addr && addrlen > 0);
393         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
394                 sldns_buffer_remaining(packet), 0,
395                 addr, addrlen);
396         if(sent == -1) {
397                 if(!udp_send_errno_needs_log(addr, addrlen))
398                         return 0;
399 #ifndef USE_WINSOCK
400                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
401 #else
402                 verbose(VERB_OPS, "sendto failed: %s", 
403                         wsa_strerror(WSAGetLastError()));
404 #endif
405                 log_addr(VERB_OPS, "remote address is", 
406                         (struct sockaddr_storage*)addr, addrlen);
407                 return 0;
408         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
409                 log_err("sent %d in place of %d bytes", 
410                         (int)sent, (int)sldns_buffer_remaining(packet));
411                 return 0;
412         }
413         return 1;
414 }
415
416 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
417 /** print debug ancillary info */
418 static void p_ancil(const char* str, struct comm_reply* r)
419 {
420         if(r->srctype != 4 && r->srctype != 6) {
421                 log_info("%s: unknown srctype %d", str, r->srctype);
422                 return;
423         }
424         if(r->srctype == 6) {
425                 char buf[1024];
426                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
427                         buf, (socklen_t)sizeof(buf)) == 0) {
428                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
429                 }
430                 buf[sizeof(buf)-1]=0;
431                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
432         } else if(r->srctype == 4) {
433 #ifdef IP_PKTINFO
434                 char buf1[1024], buf2[1024];
435                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
436                         buf1, (socklen_t)sizeof(buf1)) == 0) {
437                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
438                 }
439                 buf1[sizeof(buf1)-1]=0;
440 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
441                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
442                         buf2, (socklen_t)sizeof(buf2)) == 0) {
443                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
444                 }
445                 buf2[sizeof(buf2)-1]=0;
446 #else
447                 buf2[0]=0;
448 #endif
449                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
450                         buf1, buf2);
451 #elif defined(IP_RECVDSTADDR)
452                 char buf1[1024];
453                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
454                         buf1, (socklen_t)sizeof(buf1)) == 0) {
455                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
456                 }
457                 buf1[sizeof(buf1)-1]=0;
458                 log_info("%s: %s", str, buf1);
459 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
460         }
461 }
462 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
463
464 /** send a UDP reply over specified interface*/
465 static int
466 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
467         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
468 {
469 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
470         ssize_t sent;
471         struct msghdr msg;
472         struct iovec iov[1];
473         char control[256];
474 #ifndef S_SPLINT_S
475         struct cmsghdr *cmsg;
476 #endif /* S_SPLINT_S */
477
478         log_assert(c->fd != -1);
479 #ifdef UNBOUND_DEBUG
480         if(sldns_buffer_remaining(packet) == 0)
481                 log_err("error: send empty UDP packet");
482 #endif
483         log_assert(addr && addrlen > 0);
484
485         msg.msg_name = addr;
486         msg.msg_namelen = addrlen;
487         iov[0].iov_base = sldns_buffer_begin(packet);
488         iov[0].iov_len = sldns_buffer_remaining(packet);
489         msg.msg_iov = iov;
490         msg.msg_iovlen = 1;
491         msg.msg_control = control;
492 #ifndef S_SPLINT_S
493         msg.msg_controllen = sizeof(control);
494 #endif /* S_SPLINT_S */
495         msg.msg_flags = 0;
496
497 #ifndef S_SPLINT_S
498         cmsg = CMSG_FIRSTHDR(&msg);
499         if(r->srctype == 4) {
500 #ifdef IP_PKTINFO
501                 void* cmsg_data;
502                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
503                 log_assert(msg.msg_controllen <= sizeof(control));
504                 cmsg->cmsg_level = IPPROTO_IP;
505                 cmsg->cmsg_type = IP_PKTINFO;
506                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
507                         sizeof(struct in_pktinfo));
508                 /* unset the ifindex to not bypass the routing tables */
509                 cmsg_data = CMSG_DATA(cmsg);
510                 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
511                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
512 #elif defined(IP_SENDSRCADDR)
513                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
514                 log_assert(msg.msg_controllen <= sizeof(control));
515                 cmsg->cmsg_level = IPPROTO_IP;
516                 cmsg->cmsg_type = IP_SENDSRCADDR;
517                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
518                         sizeof(struct in_addr));
519                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
520 #else
521                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
522                 msg.msg_control = NULL;
523 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
524         } else if(r->srctype == 6) {
525                 void* cmsg_data;
526                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
527                 log_assert(msg.msg_controllen <= sizeof(control));
528                 cmsg->cmsg_level = IPPROTO_IPV6;
529                 cmsg->cmsg_type = IPV6_PKTINFO;
530                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
531                         sizeof(struct in6_pktinfo));
532                 /* unset the ifindex to not bypass the routing tables */
533                 cmsg_data = CMSG_DATA(cmsg);
534                 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
535                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
536         } else {
537                 /* try to pass all 0 to use default route */
538                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
539                 log_assert(msg.msg_controllen <= sizeof(control));
540                 cmsg->cmsg_level = IPPROTO_IPV6;
541                 cmsg->cmsg_type = IPV6_PKTINFO;
542                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
543                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
544         }
545 #endif /* S_SPLINT_S */
546         if(verbosity >= VERB_ALGO)
547                 p_ancil("send_udp over interface", r);
548         sent = sendmsg(c->fd, &msg, 0);
549         if(sent == -1) {
550                 if(!udp_send_errno_needs_log(addr, addrlen))
551                         return 0;
552                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
553                 log_addr(VERB_OPS, "remote address is", 
554                         (struct sockaddr_storage*)addr, addrlen);
555                 return 0;
556         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
557                 log_err("sent %d in place of %d bytes", 
558                         (int)sent, (int)sldns_buffer_remaining(packet));
559                 return 0;
560         }
561         return 1;
562 #else
563         (void)c;
564         (void)packet;
565         (void)addr;
566         (void)addrlen;
567         (void)r;
568         log_err("sendmsg: IPV6_PKTINFO not supported");
569         return 0;
570 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
571 }
572
573 void 
574 comm_point_udp_ancil_callback(int fd, short event, void* arg)
575 {
576 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
577         struct comm_reply rep;
578         struct msghdr msg;
579         struct iovec iov[1];
580         ssize_t rcv;
581         char ancil[256];
582         int i;
583 #ifndef S_SPLINT_S
584         struct cmsghdr* cmsg;
585 #endif /* S_SPLINT_S */
586
587         rep.c = (struct comm_point*)arg;
588         log_assert(rep.c->type == comm_udp);
589
590         if(!(event&EV_READ))
591                 return;
592         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
593         comm_base_now(rep.c->ev->base);
594         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
595                 sldns_buffer_clear(rep.c->buffer);
596                 rep.addrlen = (socklen_t)sizeof(rep.addr);
597                 log_assert(fd != -1);
598                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
599                 msg.msg_name = &rep.addr;
600                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
601                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
602                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
603                 msg.msg_iov = iov;
604                 msg.msg_iovlen = 1;
605                 msg.msg_control = ancil;
606 #ifndef S_SPLINT_S
607                 msg.msg_controllen = sizeof(ancil);
608 #endif /* S_SPLINT_S */
609                 msg.msg_flags = 0;
610                 rcv = recvmsg(fd, &msg, 0);
611                 if(rcv == -1) {
612                         if(errno != EAGAIN && errno != EINTR) {
613                                 log_err("recvmsg failed: %s", strerror(errno));
614                         }
615                         return;
616                 }
617                 rep.addrlen = msg.msg_namelen;
618                 sldns_buffer_skip(rep.c->buffer, rcv);
619                 sldns_buffer_flip(rep.c->buffer);
620                 rep.srctype = 0;
621 #ifndef S_SPLINT_S
622                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
623                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
624                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
625                                 cmsg->cmsg_type == IPV6_PKTINFO) {
626                                 rep.srctype = 6;
627                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
628                                         sizeof(struct in6_pktinfo));
629                                 break;
630 #ifdef IP_PKTINFO
631                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
632                                 cmsg->cmsg_type == IP_PKTINFO) {
633                                 rep.srctype = 4;
634                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
635                                         sizeof(struct in_pktinfo));
636                                 break;
637 #elif defined(IP_RECVDSTADDR)
638                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
639                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
640                                 rep.srctype = 4;
641                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
642                                         sizeof(struct in_addr));
643                                 break;
644 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
645                         }
646                 }
647                 if(verbosity >= VERB_ALGO)
648                         p_ancil("receive_udp on interface", &rep);
649 #endif /* S_SPLINT_S */
650                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
651                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
652                         /* send back immediate reply */
653                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
654                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
655                 }
656                 if(rep.c->fd == -1) /* commpoint closed */
657                         break;
658         }
659 #else
660         (void)fd;
661         (void)event;
662         (void)arg;
663         fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
664                 "Please disable interface-automatic");
665 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
666 }
667
668 void 
669 comm_point_udp_callback(int fd, short event, void* arg)
670 {
671         struct comm_reply rep;
672         ssize_t rcv;
673         int i;
674
675         rep.c = (struct comm_point*)arg;
676         log_assert(rep.c->type == comm_udp);
677
678         if(!(event&EV_READ))
679                 return;
680         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
681         comm_base_now(rep.c->ev->base);
682         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
683                 sldns_buffer_clear(rep.c->buffer);
684                 rep.addrlen = (socklen_t)sizeof(rep.addr);
685                 log_assert(fd != -1);
686                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
687                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
688                         sldns_buffer_remaining(rep.c->buffer), 0, 
689                         (struct sockaddr*)&rep.addr, &rep.addrlen);
690                 if(rcv == -1) {
691 #ifndef USE_WINSOCK
692                         if(errno != EAGAIN && errno != EINTR)
693                                 log_err("recvfrom %d failed: %s", 
694                                         fd, strerror(errno));
695 #else
696                         if(WSAGetLastError() != WSAEINPROGRESS &&
697                                 WSAGetLastError() != WSAECONNRESET &&
698                                 WSAGetLastError()!= WSAEWOULDBLOCK)
699                                 log_err("recvfrom failed: %s",
700                                         wsa_strerror(WSAGetLastError()));
701 #endif
702                         return;
703                 }
704                 sldns_buffer_skip(rep.c->buffer, rcv);
705                 sldns_buffer_flip(rep.c->buffer);
706                 rep.srctype = 0;
707                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
708                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
709                         /* send back immediate reply */
710                         (void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
711                                 (struct sockaddr*)&rep.addr, rep.addrlen);
712                 }
713                 if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
714                 another UDP port. Note rep.c cannot be reused with TCP fd. */
715                         break;
716         }
717 }
718
719 /** Use a new tcp handler for new query fd, set to read query */
720 static void
721 setup_tcp_handler(struct comm_point* c, int fd) 
722 {
723         log_assert(c->type == comm_tcp);
724         log_assert(c->fd == -1);
725         sldns_buffer_clear(c->buffer);
726         c->tcp_is_reading = 1;
727         c->tcp_byte_count = 0;
728         comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
729 }
730
731 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
732         short ATTR_UNUSED(event), void* arg)
733 {
734         struct comm_base* b = (struct comm_base*)arg;
735         /* timeout for the slow accept, re-enable accepts again */
736         if(b->start_accept) {
737                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
738                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
739                 (*b->start_accept)(b->cb_arg);
740                 b->eb->slow_accept_enabled = 0;
741         }
742 }
743
744 int comm_point_perform_accept(struct comm_point* c,
745         struct sockaddr_storage* addr, socklen_t* addrlen)
746 {
747         int new_fd;
748         *addrlen = (socklen_t)sizeof(*addr);
749         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
750         if(new_fd == -1) {
751 #ifndef USE_WINSOCK
752                 /* EINTR is signal interrupt. others are closed connection. */
753                 if(     errno == EINTR || errno == EAGAIN
754 #ifdef EWOULDBLOCK
755                         || errno == EWOULDBLOCK 
756 #endif
757 #ifdef ECONNABORTED
758                         || errno == ECONNABORTED 
759 #endif
760 #ifdef EPROTO
761                         || errno == EPROTO
762 #endif /* EPROTO */
763                         )
764                         return -1;
765 #if defined(ENFILE) && defined(EMFILE)
766                 if(errno == ENFILE || errno == EMFILE) {
767                         /* out of file descriptors, likely outside of our
768                          * control. stop accept() calls for some time */
769                         if(c->ev->base->stop_accept) {
770                                 struct comm_base* b = c->ev->base;
771                                 struct timeval tv;
772                                 verbose(VERB_ALGO, "out of file descriptors: "
773                                         "slow accept");
774                                 b->eb->slow_accept_enabled = 1;
775                                 fptr_ok(fptr_whitelist_stop_accept(
776                                         b->stop_accept));
777                                 (*b->stop_accept)(b->cb_arg);
778                                 /* set timeout, no mallocs */
779                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
780                                 tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
781                                 event_set(&b->eb->slow_accept, -1, EV_TIMEOUT, 
782                                         comm_base_handle_slow_accept, b);
783                                 if(event_base_set(b->eb->base,
784                                         &b->eb->slow_accept) != 0) {
785                                         /* we do not want to log here, because
786                                          * that would spam the logfiles.
787                                          * error: "event_base_set failed." */
788                                 }
789                                 if(event_add(&b->eb->slow_accept, &tv) != 0) {
790                                         /* we do not want to log here,
791                                          * error: "event_add failed." */
792                                 }
793                         }
794                         return -1;
795                 }
796 #endif
797                 log_err_addr("accept failed", strerror(errno), addr, *addrlen);
798 #else /* USE_WINSOCK */
799                 if(WSAGetLastError() == WSAEINPROGRESS ||
800                         WSAGetLastError() == WSAECONNRESET)
801                         return -1;
802                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
803                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
804                         return -1;
805                 }
806                 log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
807                         addr, *addrlen);
808 #endif
809                 return -1;
810         }
811         fd_set_nonblock(new_fd);
812         return new_fd;
813 }
814
815 #ifdef USE_WINSOCK
816 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
817         int ATTR_UNUSED(argi), long argl, long retvalue)
818 {
819         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
820                 (oper&BIO_CB_RETURN)?"return":"before",
821                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
822                 WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
823         /* on windows, check if previous operation caused EWOULDBLOCK */
824         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
825                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
826                 if(WSAGetLastError() == WSAEWOULDBLOCK)
827                         winsock_tcp_wouldblock((struct event*)
828                                 BIO_get_callback_arg(b), EV_READ);
829         }
830         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
831                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
832                 if(WSAGetLastError() == WSAEWOULDBLOCK)
833                         winsock_tcp_wouldblock((struct event*)
834                                 BIO_get_callback_arg(b), EV_WRITE);
835         }
836         /* return original return value */
837         return retvalue;
838 }
839
840 /** set win bio callbacks for nonblocking operations */
841 void
842 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
843 {
844         SSL* ssl = (SSL*)thessl;
845         /* set them both just in case, but usually they are the same BIO */
846         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
847         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
848         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
849         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
850 }
851 #endif
852
853 void 
854 comm_point_tcp_accept_callback(int fd, short event, void* arg)
855 {
856         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
857         int new_fd;
858         log_assert(c->type == comm_tcp_accept);
859         if(!(event & EV_READ)) {
860                 log_info("ignoring tcp accept event %d", (int)event);
861                 return;
862         }
863         comm_base_now(c->ev->base);
864         /* find free tcp handler. */
865         if(!c->tcp_free) {
866                 log_warn("accepted too many tcp, connections full");
867                 return;
868         }
869         /* accept incoming connection. */
870         c_hdl = c->tcp_free;
871         log_assert(fd != -1);
872         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
873                 &c_hdl->repinfo.addrlen);
874         if(new_fd == -1)
875                 return;
876         if(c->ssl) {
877                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
878                 if(!c_hdl->ssl) {
879                         c_hdl->fd = new_fd;
880                         comm_point_close(c_hdl);
881                         return;
882                 }
883                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
884 #ifdef USE_WINSOCK
885                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
886 #endif
887         }
888
889         /* grab the tcp handler buffers */
890         c->cur_tcp_count++;
891         c->tcp_free = c_hdl->tcp_free;
892         if(!c->tcp_free) {
893                 /* stop accepting incoming queries for now. */
894                 comm_point_stop_listening(c);
895         }
896         setup_tcp_handler(c_hdl, new_fd);
897 }
898
899 /** Make tcp handler free for next assignment */
900 static void
901 reclaim_tcp_handler(struct comm_point* c)
902 {
903         log_assert(c->type == comm_tcp);
904         if(c->ssl) {
905 #ifdef HAVE_SSL
906                 SSL_shutdown(c->ssl);
907                 SSL_free(c->ssl);
908                 c->ssl = NULL;
909 #endif
910         }
911         comm_point_close(c);
912         if(c->tcp_parent) {
913                 c->tcp_parent->cur_tcp_count--;
914                 c->tcp_free = c->tcp_parent->tcp_free;
915                 c->tcp_parent->tcp_free = c;
916                 if(!c->tcp_free) {
917                         /* re-enable listening on accept socket */
918                         comm_point_start_listening(c->tcp_parent, -1, -1);
919                 }
920         }
921 }
922
923 /** do the callback when writing is done */
924 static void
925 tcp_callback_writer(struct comm_point* c)
926 {
927         log_assert(c->type == comm_tcp);
928         sldns_buffer_clear(c->buffer);
929         if(c->tcp_do_toggle_rw)
930                 c->tcp_is_reading = 1;
931         c->tcp_byte_count = 0;
932         /* switch from listening(write) to listening(read) */
933         comm_point_stop_listening(c);
934         comm_point_start_listening(c, -1, -1);
935 }
936
937 /** do the callback when reading is done */
938 static void
939 tcp_callback_reader(struct comm_point* c)
940 {
941         log_assert(c->type == comm_tcp || c->type == comm_local);
942         sldns_buffer_flip(c->buffer);
943         if(c->tcp_do_toggle_rw)
944                 c->tcp_is_reading = 0;
945         c->tcp_byte_count = 0;
946         if(c->type == comm_tcp)
947                 comm_point_stop_listening(c);
948         fptr_ok(fptr_whitelist_comm_point(c->callback));
949         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
950                 comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
951         }
952 }
953
954 /** continue ssl handshake */
955 #ifdef HAVE_SSL
956 static int
957 ssl_handshake(struct comm_point* c)
958 {
959         int r;
960         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
961                 /* read condition satisfied back to writing */
962                 comm_point_listen_for_rw(c, 1, 1);
963                 c->ssl_shake_state = comm_ssl_shake_none;
964                 return 1;
965         }
966         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
967                 /* write condition satisfied, back to reading */
968                 comm_point_listen_for_rw(c, 1, 0);
969                 c->ssl_shake_state = comm_ssl_shake_none;
970                 return 1;
971         }
972
973         ERR_clear_error();
974         r = SSL_do_handshake(c->ssl);
975         if(r != 1) {
976                 int want = SSL_get_error(c->ssl, r);
977                 if(want == SSL_ERROR_WANT_READ) {
978                         if(c->ssl_shake_state == comm_ssl_shake_read)
979                                 return 1;
980                         c->ssl_shake_state = comm_ssl_shake_read;
981                         comm_point_listen_for_rw(c, 1, 0);
982                         return 1;
983                 } else if(want == SSL_ERROR_WANT_WRITE) {
984                         if(c->ssl_shake_state == comm_ssl_shake_write)
985                                 return 1;
986                         c->ssl_shake_state = comm_ssl_shake_write;
987                         comm_point_listen_for_rw(c, 0, 1);
988                         return 1;
989                 } else if(r == 0) {
990                         return 0; /* closed */
991                 } else if(want == SSL_ERROR_SYSCALL) {
992                         /* SYSCALL and errno==0 means closed uncleanly */
993                         if(errno != 0)
994                                 log_err("SSL_handshake syscall: %s",
995                                         strerror(errno));
996                         return 0;
997                 } else {
998                         log_crypto_err("ssl handshake failed");
999                         log_addr(1, "ssl handshake failed", &c->repinfo.addr,
1000                                 c->repinfo.addrlen);
1001                         return 0;
1002                 }
1003         }
1004         /* this is where peer verification could take place */
1005         log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
1006                 c->repinfo.addrlen);
1007
1008         /* setup listen rw correctly */
1009         if(c->tcp_is_reading) {
1010                 if(c->ssl_shake_state != comm_ssl_shake_read)
1011                         comm_point_listen_for_rw(c, 1, 0);
1012         } else {
1013                 comm_point_listen_for_rw(c, 1, 1);
1014         }
1015         c->ssl_shake_state = comm_ssl_shake_none;
1016         return 1;
1017 }
1018 #endif /* HAVE_SSL */
1019
1020 /** ssl read callback on TCP */
1021 static int
1022 ssl_handle_read(struct comm_point* c)
1023 {
1024 #ifdef HAVE_SSL
1025         int r;
1026         if(c->ssl_shake_state != comm_ssl_shake_none) {
1027                 if(!ssl_handshake(c))
1028                         return 0;
1029                 if(c->ssl_shake_state != comm_ssl_shake_none)
1030                         return 1;
1031         }
1032         if(c->tcp_byte_count < sizeof(uint16_t)) {
1033                 /* read length bytes */
1034                 ERR_clear_error();
1035                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1036                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1037                         c->tcp_byte_count))) <= 0) {
1038                         int want = SSL_get_error(c->ssl, r);
1039                         if(want == SSL_ERROR_ZERO_RETURN) {
1040                                 return 0; /* shutdown, closed */
1041                         } else if(want == SSL_ERROR_WANT_READ) {
1042                                 return 1; /* read more later */
1043                         } else if(want == SSL_ERROR_WANT_WRITE) {
1044                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1045                                 comm_point_listen_for_rw(c, 0, 1);
1046                                 return 1;
1047                         } else if(want == SSL_ERROR_SYSCALL) {
1048                                 if(errno != 0)
1049                                         log_err("SSL_read syscall: %s",
1050                                                 strerror(errno));
1051                                 return 0;
1052                         }
1053                         log_crypto_err("could not SSL_read");
1054                         return 0;
1055                 }
1056                 c->tcp_byte_count += r;
1057                 if(c->tcp_byte_count != sizeof(uint16_t))
1058                         return 1;
1059                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1060                         sldns_buffer_capacity(c->buffer)) {
1061                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1062                         return 0;
1063                 }
1064                 sldns_buffer_set_limit(c->buffer,
1065                         sldns_buffer_read_u16_at(c->buffer, 0));
1066                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1067                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1068                         return 0;
1069                 }
1070                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1071                         (int)sldns_buffer_limit(c->buffer));
1072         }
1073         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1074         ERR_clear_error();
1075         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1076                 (int)sldns_buffer_remaining(c->buffer));
1077         if(r <= 0) {
1078                 int want = SSL_get_error(c->ssl, r);
1079                 if(want == SSL_ERROR_ZERO_RETURN) {
1080                         return 0; /* shutdown, closed */
1081                 } else if(want == SSL_ERROR_WANT_READ) {
1082                         return 1; /* read more later */
1083                 } else if(want == SSL_ERROR_WANT_WRITE) {
1084                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1085                         comm_point_listen_for_rw(c, 0, 1);
1086                         return 1;
1087                 } else if(want == SSL_ERROR_SYSCALL) {
1088                         if(errno != 0)
1089                                 log_err("SSL_read syscall: %s",
1090                                         strerror(errno));
1091                         return 0;
1092                 }
1093                 log_crypto_err("could not SSL_read");
1094                 return 0;
1095         }
1096         sldns_buffer_skip(c->buffer, (ssize_t)r);
1097         if(sldns_buffer_remaining(c->buffer) <= 0) {
1098                 tcp_callback_reader(c);
1099         }
1100         return 1;
1101 #else
1102         (void)c;
1103         return 0;
1104 #endif /* HAVE_SSL */
1105 }
1106
1107 /** ssl write callback on TCP */
1108 static int
1109 ssl_handle_write(struct comm_point* c)
1110 {
1111 #ifdef HAVE_SSL
1112         int r;
1113         if(c->ssl_shake_state != comm_ssl_shake_none) {
1114                 if(!ssl_handshake(c))
1115                         return 0;
1116                 if(c->ssl_shake_state != comm_ssl_shake_none)
1117                         return 1;
1118         }
1119         /* ignore return, if fails we may simply block */
1120         (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1121         if(c->tcp_byte_count < sizeof(uint16_t)) {
1122                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1123                 ERR_clear_error();
1124                 r = SSL_write(c->ssl,
1125                         (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1126                         (int)(sizeof(uint16_t)-c->tcp_byte_count));
1127                 if(r <= 0) {
1128                         int want = SSL_get_error(c->ssl, r);
1129                         if(want == SSL_ERROR_ZERO_RETURN) {
1130                                 return 0; /* closed */
1131                         } else if(want == SSL_ERROR_WANT_READ) {
1132                                 c->ssl_shake_state = comm_ssl_shake_read;
1133                                 comm_point_listen_for_rw(c, 1, 0);
1134                                 return 1; /* wait for read condition */
1135                         } else if(want == SSL_ERROR_WANT_WRITE) {
1136                                 return 1; /* write more later */
1137                         } else if(want == SSL_ERROR_SYSCALL) {
1138                                 if(errno != 0)
1139                                         log_err("SSL_write syscall: %s",
1140                                                 strerror(errno));
1141                                 return 0;
1142                         }
1143                         log_crypto_err("could not SSL_write");
1144                         return 0;
1145                 }
1146                 c->tcp_byte_count += r;
1147                 if(c->tcp_byte_count < sizeof(uint16_t))
1148                         return 1;
1149                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1150                         sizeof(uint16_t));
1151                 if(sldns_buffer_remaining(c->buffer) == 0) {
1152                         tcp_callback_writer(c);
1153                         return 1;
1154                 }
1155         }
1156         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1157         ERR_clear_error();
1158         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1159                 (int)sldns_buffer_remaining(c->buffer));
1160         if(r <= 0) {
1161                 int want = SSL_get_error(c->ssl, r);
1162                 if(want == SSL_ERROR_ZERO_RETURN) {
1163                         return 0; /* closed */
1164                 } else if(want == SSL_ERROR_WANT_READ) {
1165                         c->ssl_shake_state = comm_ssl_shake_read;
1166                         comm_point_listen_for_rw(c, 1, 0);
1167                         return 1; /* wait for read condition */
1168                 } else if(want == SSL_ERROR_WANT_WRITE) {
1169                         return 1; /* write more later */
1170                 } else if(want == SSL_ERROR_SYSCALL) {
1171                         if(errno != 0)
1172                                 log_err("SSL_write syscall: %s",
1173                                         strerror(errno));
1174                         return 0;
1175                 }
1176                 log_crypto_err("could not SSL_write");
1177                 return 0;
1178         }
1179         sldns_buffer_skip(c->buffer, (ssize_t)r);
1180
1181         if(sldns_buffer_remaining(c->buffer) == 0) {
1182                 tcp_callback_writer(c);
1183         }
1184         return 1;
1185 #else
1186         (void)c;
1187         return 0;
1188 #endif /* HAVE_SSL */
1189 }
1190
1191 /** handle ssl tcp connection with dns contents */
1192 static int
1193 ssl_handle_it(struct comm_point* c)
1194 {
1195         if(c->tcp_is_reading)
1196                 return ssl_handle_read(c);
1197         return ssl_handle_write(c);
1198 }
1199
1200 /** Handle tcp reading callback. 
1201  * @param fd: file descriptor of socket.
1202  * @param c: comm point to read from into buffer.
1203  * @param short_ok: if true, very short packets are OK (for comm_local).
1204  * @return: 0 on error 
1205  */
1206 static int
1207 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1208 {
1209         ssize_t r;
1210         log_assert(c->type == comm_tcp || c->type == comm_local);
1211         if(c->ssl)
1212                 return ssl_handle_it(c);
1213         if(!c->tcp_is_reading)
1214                 return 0;
1215
1216         log_assert(fd != -1);
1217         if(c->tcp_byte_count < sizeof(uint16_t)) {
1218                 /* read length bytes */
1219                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1220                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1221                 if(r == 0)
1222                         return 0;
1223                 else if(r == -1) {
1224 #ifndef USE_WINSOCK
1225                         if(errno == EINTR || errno == EAGAIN)
1226                                 return 1;
1227 #ifdef ECONNRESET
1228                         if(errno == ECONNRESET && verbosity < 2)
1229                                 return 0; /* silence reset by peer */
1230 #endif
1231                         log_err_addr("read (in tcp s)", strerror(errno),
1232                                 &c->repinfo.addr, c->repinfo.addrlen);
1233 #else /* USE_WINSOCK */
1234                         if(WSAGetLastError() == WSAECONNRESET)
1235                                 return 0;
1236                         if(WSAGetLastError() == WSAEINPROGRESS)
1237                                 return 1;
1238                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1239                                 winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1240                                 return 1;
1241                         }
1242                         log_err_addr("read (in tcp s)", 
1243                                 wsa_strerror(WSAGetLastError()),
1244                                 &c->repinfo.addr, c->repinfo.addrlen);
1245 #endif
1246                         return 0;
1247                 } 
1248                 c->tcp_byte_count += r;
1249                 if(c->tcp_byte_count != sizeof(uint16_t))
1250                         return 1;
1251                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1252                         sldns_buffer_capacity(c->buffer)) {
1253                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1254                         return 0;
1255                 }
1256                 sldns_buffer_set_limit(c->buffer, 
1257                         sldns_buffer_read_u16_at(c->buffer, 0));
1258                 if(!short_ok && 
1259                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1260                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1261                         return 0;
1262                 }
1263                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1264                         (int)sldns_buffer_limit(c->buffer));
1265         }
1266
1267         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1268         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1269                 sldns_buffer_remaining(c->buffer), 0);
1270         if(r == 0) {
1271                 return 0;
1272         } else if(r == -1) {
1273 #ifndef USE_WINSOCK
1274                 if(errno == EINTR || errno == EAGAIN)
1275                         return 1;
1276                 log_err_addr("read (in tcp r)", strerror(errno),
1277                         &c->repinfo.addr, c->repinfo.addrlen);
1278 #else /* USE_WINSOCK */
1279                 if(WSAGetLastError() == WSAECONNRESET)
1280                         return 0;
1281                 if(WSAGetLastError() == WSAEINPROGRESS)
1282                         return 1;
1283                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1284                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1285                         return 1;
1286                 }
1287                 log_err_addr("read (in tcp r)",
1288                         wsa_strerror(WSAGetLastError()),
1289                         &c->repinfo.addr, c->repinfo.addrlen);
1290 #endif
1291                 return 0;
1292         }
1293         sldns_buffer_skip(c->buffer, r);
1294         if(sldns_buffer_remaining(c->buffer) <= 0) {
1295                 tcp_callback_reader(c);
1296         }
1297         return 1;
1298 }
1299
1300 /** 
1301  * Handle tcp writing callback. 
1302  * @param fd: file descriptor of socket.
1303  * @param c: comm point to write buffer out of.
1304  * @return: 0 on error
1305  */
1306 static int
1307 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1308 {
1309         ssize_t r;
1310         log_assert(c->type == comm_tcp);
1311         if(c->tcp_is_reading && !c->ssl)
1312                 return 0;
1313         log_assert(fd != -1);
1314         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1315                 /* check for pending error from nonblocking connect */
1316                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1317                 int error = 0;
1318                 socklen_t len = (socklen_t)sizeof(error);
1319                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1320                         &len) < 0){
1321 #ifndef USE_WINSOCK
1322                         error = errno; /* on solaris errno is error */
1323 #else /* USE_WINSOCK */
1324                         error = WSAGetLastError();
1325 #endif
1326                 }
1327 #ifndef USE_WINSOCK
1328 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1329                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1330                         return 1; /* try again later */
1331                 else
1332 #endif
1333                 if(error != 0 && verbosity < 2)
1334                         return 0; /* silence lots of chatter in the logs */
1335                 else if(error != 0) {
1336                         log_err_addr("tcp connect", strerror(error),
1337                                 &c->repinfo.addr, c->repinfo.addrlen);
1338 #else /* USE_WINSOCK */
1339                 /* examine error */
1340                 if(error == WSAEINPROGRESS)
1341                         return 1;
1342                 else if(error == WSAEWOULDBLOCK) {
1343                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1344                         return 1;
1345                 } else if(error != 0 && verbosity < 2)
1346                         return 0;
1347                 else if(error != 0) {
1348                         log_err_addr("tcp connect", wsa_strerror(error),
1349                                 &c->repinfo.addr, c->repinfo.addrlen);
1350 #endif /* USE_WINSOCK */
1351                         return 0;
1352                 }
1353         }
1354         if(c->ssl)
1355                 return ssl_handle_it(c);
1356
1357         if(c->tcp_byte_count < sizeof(uint16_t)) {
1358                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1359 #ifdef HAVE_WRITEV
1360                 struct iovec iov[2];
1361                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1362                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1363                 iov[1].iov_base = sldns_buffer_begin(c->buffer);
1364                 iov[1].iov_len = sldns_buffer_limit(c->buffer);
1365                 log_assert(iov[0].iov_len > 0);
1366                 log_assert(iov[1].iov_len > 0);
1367                 r = writev(fd, iov, 2);
1368 #else /* HAVE_WRITEV */
1369                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1370                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1371 #endif /* HAVE_WRITEV */
1372                 if(r == -1) {
1373 #ifndef USE_WINSOCK
1374 #  ifdef EPIPE
1375                         if(errno == EPIPE && verbosity < 2)
1376                                 return 0; /* silence 'broken pipe' */
1377   #endif
1378                         if(errno == EINTR || errno == EAGAIN)
1379                                 return 1;
1380 #  ifdef HAVE_WRITEV
1381                         log_err_addr("tcp writev", strerror(errno),
1382                                 &c->repinfo.addr, c->repinfo.addrlen);
1383 #  else /* HAVE_WRITEV */
1384                         log_err_addr("tcp send s", strerror(errno),
1385                                 &c->repinfo.addr, c->repinfo.addrlen);
1386 #  endif /* HAVE_WRITEV */
1387 #else
1388                         if(WSAGetLastError() == WSAENOTCONN)
1389                                 return 1;
1390                         if(WSAGetLastError() == WSAEINPROGRESS)
1391                                 return 1;
1392                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1393                                 winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1394                                 return 1; 
1395                         }
1396                         log_err_addr("tcp send s",
1397                                 wsa_strerror(WSAGetLastError()),
1398                                 &c->repinfo.addr, c->repinfo.addrlen);
1399 #endif
1400                         return 0;
1401                 }
1402                 c->tcp_byte_count += r;
1403                 if(c->tcp_byte_count < sizeof(uint16_t))
1404                         return 1;
1405                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 
1406                         sizeof(uint16_t));
1407                 if(sldns_buffer_remaining(c->buffer) == 0) {
1408                         tcp_callback_writer(c);
1409                         return 1;
1410                 }
1411         }
1412         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1413         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
1414                 sldns_buffer_remaining(c->buffer), 0);
1415         if(r == -1) {
1416 #ifndef USE_WINSOCK
1417                 if(errno == EINTR || errno == EAGAIN)
1418                         return 1;
1419                 log_err_addr("tcp send r", strerror(errno),
1420                         &c->repinfo.addr, c->repinfo.addrlen);
1421 #else
1422                 if(WSAGetLastError() == WSAEINPROGRESS)
1423                         return 1;
1424                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1425                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1426                         return 1; 
1427                 }
1428                 log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1429                         &c->repinfo.addr, c->repinfo.addrlen);
1430 #endif
1431                 return 0;
1432         }
1433         sldns_buffer_skip(c->buffer, r);
1434
1435         if(sldns_buffer_remaining(c->buffer) == 0) {
1436                 tcp_callback_writer(c);
1437         }
1438         
1439         return 1;
1440 }
1441
1442 void 
1443 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1444 {
1445         struct comm_point* c = (struct comm_point*)arg;
1446         log_assert(c->type == comm_tcp);
1447         comm_base_now(c->ev->base);
1448
1449         if(event&EV_READ) {
1450                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1451                         reclaim_tcp_handler(c);
1452                         if(!c->tcp_do_close) {
1453                                 fptr_ok(fptr_whitelist_comm_point(
1454                                         c->callback));
1455                                 (void)(*c->callback)(c, c->cb_arg, 
1456                                         NETEVENT_CLOSED, NULL);
1457                         }
1458                 }
1459                 return;
1460         }
1461         if(event&EV_WRITE) {
1462                 if(!comm_point_tcp_handle_write(fd, c)) {
1463                         reclaim_tcp_handler(c);
1464                         if(!c->tcp_do_close) {
1465                                 fptr_ok(fptr_whitelist_comm_point(
1466                                         c->callback));
1467                                 (void)(*c->callback)(c, c->cb_arg, 
1468                                         NETEVENT_CLOSED, NULL);
1469                         }
1470                 }
1471                 return;
1472         }
1473         if(event&EV_TIMEOUT) {
1474                 verbose(VERB_QUERY, "tcp took too long, dropped");
1475                 reclaim_tcp_handler(c);
1476                 if(!c->tcp_do_close) {
1477                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1478                         (void)(*c->callback)(c, c->cb_arg,
1479                                 NETEVENT_TIMEOUT, NULL);
1480                 }
1481                 return;
1482         }
1483         log_err("Ignored event %d for tcphdl.", event);
1484 }
1485
1486 void comm_point_local_handle_callback(int fd, short event, void* arg)
1487 {
1488         struct comm_point* c = (struct comm_point*)arg;
1489         log_assert(c->type == comm_local);
1490         comm_base_now(c->ev->base);
1491
1492         if(event&EV_READ) {
1493                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
1494                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1495                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
1496                                 NULL);
1497                 }
1498                 return;
1499         }
1500         log_err("Ignored event %d for localhdl.", event);
1501 }
1502
1503 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
1504         short event, void* arg)
1505 {
1506         struct comm_point* c = (struct comm_point*)arg;
1507         int err = NETEVENT_NOERROR;
1508         log_assert(c->type == comm_raw);
1509         comm_base_now(c->ev->base);
1510         
1511         if(event&EV_TIMEOUT)
1512                 err = NETEVENT_TIMEOUT;
1513         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1514         (void)(*c->callback)(c, c->cb_arg, err, NULL);
1515 }
1516
1517 struct comm_point* 
1518 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
1519         comm_point_callback_t* callback, void* callback_arg)
1520 {
1521         struct comm_point* c = (struct comm_point*)calloc(1,
1522                 sizeof(struct comm_point));
1523         short evbits;
1524         if(!c)
1525                 return NULL;
1526         c->ev = (struct internal_event*)calloc(1,
1527                 sizeof(struct internal_event));
1528         if(!c->ev) {
1529                 free(c);
1530                 return NULL;
1531         }
1532         c->ev->base = base;
1533         c->fd = fd;
1534         c->buffer = buffer;
1535         c->timeout = NULL;
1536         c->tcp_is_reading = 0;
1537         c->tcp_byte_count = 0;
1538         c->tcp_parent = NULL;
1539         c->max_tcp_count = 0;
1540         c->cur_tcp_count = 0;
1541         c->tcp_handlers = NULL;
1542         c->tcp_free = NULL;
1543         c->type = comm_udp;
1544         c->tcp_do_close = 0;
1545         c->do_not_close = 0;
1546         c->tcp_do_toggle_rw = 0;
1547         c->tcp_check_nb_connect = 0;
1548         c->inuse = 0;
1549         c->callback = callback;
1550         c->cb_arg = callback_arg;
1551         evbits = EV_READ | EV_PERSIST;
1552         /* libevent stuff */
1553         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1554         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1555                 log_err("could not baseset udp event");
1556                 comm_point_delete(c);
1557                 return NULL;
1558         }
1559         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1560                 log_err("could not add udp event");
1561                 comm_point_delete(c);
1562                 return NULL;
1563         }
1564         return c;
1565 }
1566
1567 struct comm_point* 
1568 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
1569         sldns_buffer* buffer, 
1570         comm_point_callback_t* callback, void* callback_arg)
1571 {
1572         struct comm_point* c = (struct comm_point*)calloc(1,
1573                 sizeof(struct comm_point));
1574         short evbits;
1575         if(!c)
1576                 return NULL;
1577         c->ev = (struct internal_event*)calloc(1,
1578                 sizeof(struct internal_event));
1579         if(!c->ev) {
1580                 free(c);
1581                 return NULL;
1582         }
1583         c->ev->base = base;
1584         c->fd = fd;
1585         c->buffer = buffer;
1586         c->timeout = NULL;
1587         c->tcp_is_reading = 0;
1588         c->tcp_byte_count = 0;
1589         c->tcp_parent = NULL;
1590         c->max_tcp_count = 0;
1591         c->cur_tcp_count = 0;
1592         c->tcp_handlers = NULL;
1593         c->tcp_free = NULL;
1594         c->type = comm_udp;
1595         c->tcp_do_close = 0;
1596         c->do_not_close = 0;
1597         c->inuse = 0;
1598         c->tcp_do_toggle_rw = 0;
1599         c->tcp_check_nb_connect = 0;
1600         c->callback = callback;
1601         c->cb_arg = callback_arg;
1602         evbits = EV_READ | EV_PERSIST;
1603         /* libevent stuff */
1604         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1605         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1606                 log_err("could not baseset udp event");
1607                 comm_point_delete(c);
1608                 return NULL;
1609         }
1610         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1611                 log_err("could not add udp event");
1612                 comm_point_delete(c);
1613                 return NULL;
1614         }
1615         return c;
1616 }
1617
1618 static struct comm_point* 
1619 comm_point_create_tcp_handler(struct comm_base *base, 
1620         struct comm_point* parent, size_t bufsize,
1621         comm_point_callback_t* callback, void* callback_arg)
1622 {
1623         struct comm_point* c = (struct comm_point*)calloc(1,
1624                 sizeof(struct comm_point));
1625         short evbits;
1626         if(!c)
1627                 return NULL;
1628         c->ev = (struct internal_event*)calloc(1,
1629                 sizeof(struct internal_event));
1630         if(!c->ev) {
1631                 free(c);
1632                 return NULL;
1633         }
1634         c->ev->base = base;
1635         c->fd = -1;
1636         c->buffer = sldns_buffer_new(bufsize);
1637         if(!c->buffer) {
1638                 free(c->ev);
1639                 free(c);
1640                 return NULL;
1641         }
1642         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1643         if(!c->timeout) {
1644                 sldns_buffer_free(c->buffer);
1645                 free(c->ev);
1646                 free(c);
1647                 return NULL;
1648         }
1649         c->tcp_is_reading = 0;
1650         c->tcp_byte_count = 0;
1651         c->tcp_parent = parent;
1652         c->max_tcp_count = 0;
1653         c->cur_tcp_count = 0;
1654         c->tcp_handlers = NULL;
1655         c->tcp_free = NULL;
1656         c->type = comm_tcp;
1657         c->tcp_do_close = 0;
1658         c->do_not_close = 0;
1659         c->tcp_do_toggle_rw = 1;
1660         c->tcp_check_nb_connect = 0;
1661         c->repinfo.c = c;
1662         c->callback = callback;
1663         c->cb_arg = callback_arg;
1664         /* add to parent free list */
1665         c->tcp_free = parent->tcp_free;
1666         parent->tcp_free = c;
1667         /* libevent stuff */
1668         evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1669         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1670         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1671         {
1672                 log_err("could not basetset tcphdl event");
1673                 parent->tcp_free = c->tcp_free;
1674                 free(c->ev);
1675                 free(c);
1676                 return NULL;
1677         }
1678         return c;
1679 }
1680
1681 struct comm_point* 
1682 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1683         comm_point_callback_t* callback, void* callback_arg)
1684 {
1685         struct comm_point* c = (struct comm_point*)calloc(1,
1686                 sizeof(struct comm_point));
1687         short evbits;
1688         int i;
1689         /* first allocate the TCP accept listener */
1690         if(!c)
1691                 return NULL;
1692         c->ev = (struct internal_event*)calloc(1,
1693                 sizeof(struct internal_event));
1694         if(!c->ev) {
1695                 free(c);
1696                 return NULL;
1697         }
1698         c->ev->base = base;
1699         c->fd = fd;
1700         c->buffer = NULL;
1701         c->timeout = NULL;
1702         c->tcp_is_reading = 0;
1703         c->tcp_byte_count = 0;
1704         c->tcp_parent = NULL;
1705         c->max_tcp_count = num;
1706         c->cur_tcp_count = 0;
1707         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1708                 sizeof(struct comm_point*));
1709         if(!c->tcp_handlers) {
1710                 free(c->ev);
1711                 free(c);
1712                 return NULL;
1713         }
1714         c->tcp_free = NULL;
1715         c->type = comm_tcp_accept;
1716         c->tcp_do_close = 0;
1717         c->do_not_close = 0;
1718         c->tcp_do_toggle_rw = 0;
1719         c->tcp_check_nb_connect = 0;
1720         c->callback = NULL;
1721         c->cb_arg = NULL;
1722         evbits = EV_READ | EV_PERSIST;
1723         /* libevent stuff */
1724         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1725         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1726                 event_add(&c->ev->ev, c->timeout) != 0 )
1727         {
1728                 log_err("could not add tcpacc event");
1729                 comm_point_delete(c);
1730                 return NULL;
1731         }
1732
1733         /* now prealloc the tcp handlers */
1734         for(i=0; i<num; i++) {
1735                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1736                         c, bufsize, callback, callback_arg);
1737                 if(!c->tcp_handlers[i]) {
1738                         comm_point_delete(c);
1739                         return NULL;
1740                 }
1741         }
1742         
1743         return c;
1744 }
1745
1746 struct comm_point* 
1747 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1748         comm_point_callback_t* callback, void* callback_arg)
1749 {
1750         struct comm_point* c = (struct comm_point*)calloc(1,
1751                 sizeof(struct comm_point));
1752         short evbits;
1753         if(!c)
1754                 return NULL;
1755         c->ev = (struct internal_event*)calloc(1,
1756                 sizeof(struct internal_event));
1757         if(!c->ev) {
1758                 free(c);
1759                 return NULL;
1760         }
1761         c->ev->base = base;
1762         c->fd = -1;
1763         c->buffer = sldns_buffer_new(bufsize);
1764         if(!c->buffer) {
1765                 free(c->ev);
1766                 free(c);
1767                 return NULL;
1768         }
1769         c->timeout = NULL;
1770         c->tcp_is_reading = 0;
1771         c->tcp_byte_count = 0;
1772         c->tcp_parent = NULL;
1773         c->max_tcp_count = 0;
1774         c->cur_tcp_count = 0;
1775         c->tcp_handlers = NULL;
1776         c->tcp_free = NULL;
1777         c->type = comm_tcp;
1778         c->tcp_do_close = 0;
1779         c->do_not_close = 0;
1780         c->tcp_do_toggle_rw = 1;
1781         c->tcp_check_nb_connect = 1;
1782         c->repinfo.c = c;
1783         c->callback = callback;
1784         c->cb_arg = callback_arg;
1785         evbits = EV_PERSIST | EV_WRITE;
1786         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1787         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1788         {
1789                 log_err("could not basetset tcpout event");
1790                 sldns_buffer_free(c->buffer);
1791                 free(c->ev);
1792                 free(c);
1793                 return NULL;
1794         }
1795
1796         return c;
1797 }
1798
1799 struct comm_point* 
1800 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1801         comm_point_callback_t* callback, void* callback_arg)
1802 {
1803         struct comm_point* c = (struct comm_point*)calloc(1,
1804                 sizeof(struct comm_point));
1805         short evbits;
1806         if(!c)
1807                 return NULL;
1808         c->ev = (struct internal_event*)calloc(1,
1809                 sizeof(struct internal_event));
1810         if(!c->ev) {
1811                 free(c);
1812                 return NULL;
1813         }
1814         c->ev->base = base;
1815         c->fd = fd;
1816         c->buffer = sldns_buffer_new(bufsize);
1817         if(!c->buffer) {
1818                 free(c->ev);
1819                 free(c);
1820                 return NULL;
1821         }
1822         c->timeout = NULL;
1823         c->tcp_is_reading = 1;
1824         c->tcp_byte_count = 0;
1825         c->tcp_parent = NULL;
1826         c->max_tcp_count = 0;
1827         c->cur_tcp_count = 0;
1828         c->tcp_handlers = NULL;
1829         c->tcp_free = NULL;
1830         c->type = comm_local;
1831         c->tcp_do_close = 0;
1832         c->do_not_close = 1;
1833         c->tcp_do_toggle_rw = 0;
1834         c->tcp_check_nb_connect = 0;
1835         c->callback = callback;
1836         c->cb_arg = callback_arg;
1837         /* libevent stuff */
1838         evbits = EV_PERSIST | EV_READ;
1839         event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback, 
1840                 c);
1841         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1842                 event_add(&c->ev->ev, c->timeout) != 0 )
1843         {
1844                 log_err("could not add localhdl event");
1845                 free(c->ev);
1846                 free(c);
1847                 return NULL;
1848         }
1849         return c;
1850 }
1851
1852 struct comm_point* 
1853 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
1854         comm_point_callback_t* callback, void* callback_arg)
1855 {
1856         struct comm_point* c = (struct comm_point*)calloc(1,
1857                 sizeof(struct comm_point));
1858         short evbits;
1859         if(!c)
1860                 return NULL;
1861         c->ev = (struct internal_event*)calloc(1,
1862                 sizeof(struct internal_event));
1863         if(!c->ev) {
1864                 free(c);
1865                 return NULL;
1866         }
1867         c->ev->base = base;
1868         c->fd = fd;
1869         c->buffer = NULL;
1870         c->timeout = NULL;
1871         c->tcp_is_reading = 0;
1872         c->tcp_byte_count = 0;
1873         c->tcp_parent = NULL;
1874         c->max_tcp_count = 0;
1875         c->cur_tcp_count = 0;
1876         c->tcp_handlers = NULL;
1877         c->tcp_free = NULL;
1878         c->type = comm_raw;
1879         c->tcp_do_close = 0;
1880         c->do_not_close = 1;
1881         c->tcp_do_toggle_rw = 0;
1882         c->tcp_check_nb_connect = 0;
1883         c->callback = callback;
1884         c->cb_arg = callback_arg;
1885         /* libevent stuff */
1886         if(writing)
1887                 evbits = EV_PERSIST | EV_WRITE;
1888         else    evbits = EV_PERSIST | EV_READ;
1889         event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback, 
1890                 c);
1891         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1892                 event_add(&c->ev->ev, c->timeout) != 0 )
1893         {
1894                 log_err("could not add rawhdl event");
1895                 free(c->ev);
1896                 free(c);
1897                 return NULL;
1898         }
1899         return c;
1900 }
1901
1902 void 
1903 comm_point_close(struct comm_point* c)
1904 {
1905         if(!c)
1906                 return;
1907         if(c->fd != -1)
1908                 if(event_del(&c->ev->ev) != 0) {
1909                         log_err("could not event_del on close");
1910                 }
1911         /* close fd after removing from event lists, or epoll.. is messed up */
1912         if(c->fd != -1 && !c->do_not_close) {
1913                 verbose(VERB_ALGO, "close fd %d", c->fd);
1914 #ifndef USE_WINSOCK
1915                 close(c->fd);
1916 #else
1917                 closesocket(c->fd);
1918 #endif
1919         }
1920         c->fd = -1;
1921 }
1922
1923 void 
1924 comm_point_delete(struct comm_point* c)
1925 {
1926         if(!c) 
1927                 return;
1928         if(c->type == comm_tcp && c->ssl) {
1929 #ifdef HAVE_SSL
1930                 SSL_shutdown(c->ssl);
1931                 SSL_free(c->ssl);
1932 #endif
1933         }
1934         comm_point_close(c);
1935         if(c->tcp_handlers) {
1936                 int i;
1937                 for(i=0; i<c->max_tcp_count; i++)
1938                         comm_point_delete(c->tcp_handlers[i]);
1939                 free(c->tcp_handlers);
1940         }
1941         free(c->timeout);
1942         if(c->type == comm_tcp || c->type == comm_local)
1943                 sldns_buffer_free(c->buffer);
1944         free(c->ev);
1945         free(c);
1946 }
1947
1948 void 
1949 comm_point_send_reply(struct comm_reply *repinfo)
1950 {
1951         log_assert(repinfo && repinfo->c);
1952         if(repinfo->c->type == comm_udp) {
1953                 if(repinfo->srctype)
1954                         comm_point_send_udp_msg_if(repinfo->c, 
1955                         repinfo->c->buffer, (struct sockaddr*)&repinfo->addr, 
1956                         repinfo->addrlen, repinfo);
1957                 else
1958                         comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
1959                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
1960 #ifdef USE_DNSTAP
1961                 if(repinfo->c->dtenv != NULL &&
1962                    repinfo->c->dtenv->log_client_response_messages)
1963                         dt_msg_send_client_response(repinfo->c->dtenv,
1964                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
1965 #endif
1966         } else {
1967 #ifdef USE_DNSTAP
1968                 if(repinfo->c->tcp_parent->dtenv != NULL &&
1969                    repinfo->c->tcp_parent->dtenv->log_client_response_messages)
1970                         dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
1971                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
1972 #endif
1973                 comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
1974         }
1975 }
1976
1977 void 
1978 comm_point_drop_reply(struct comm_reply* repinfo)
1979 {
1980         if(!repinfo)
1981                 return;
1982         log_assert(repinfo && repinfo->c);
1983         log_assert(repinfo->c->type != comm_tcp_accept);
1984         if(repinfo->c->type == comm_udp)
1985                 return;
1986         reclaim_tcp_handler(repinfo->c);
1987 }
1988
1989 void 
1990 comm_point_stop_listening(struct comm_point* c)
1991 {
1992         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
1993         if(event_del(&c->ev->ev) != 0) {
1994                 log_err("event_del error to stoplisten");
1995         }
1996 }
1997
1998 void 
1999 comm_point_start_listening(struct comm_point* c, int newfd, int sec)
2000 {
2001         verbose(VERB_ALGO, "comm point start listening %d", 
2002                 c->fd==-1?newfd:c->fd);
2003         if(c->type == comm_tcp_accept && !c->tcp_free) {
2004                 /* no use to start listening no free slots. */
2005                 return;
2006         }
2007         if(sec != -1 && sec != 0) {
2008                 if(!c->timeout) {
2009                         c->timeout = (struct timeval*)malloc(sizeof(
2010                                 struct timeval));
2011                         if(!c->timeout) {
2012                                 log_err("cpsl: malloc failed. No net read.");
2013                                 return;
2014                         }
2015                 }
2016                 c->ev->ev.ev_events |= EV_TIMEOUT;
2017 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
2018                 c->timeout->tv_sec = sec;
2019                 c->timeout->tv_usec = 0;
2020 #endif /* S_SPLINT_S */
2021         }
2022         if(c->type == comm_tcp) {
2023                 c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2024                 if(c->tcp_is_reading)
2025                         c->ev->ev.ev_events |= EV_READ;
2026                 else    c->ev->ev.ev_events |= EV_WRITE;
2027         }
2028         if(newfd != -1) {
2029                 if(c->fd != -1) {
2030 #ifndef USE_WINSOCK
2031                         close(c->fd);
2032 #else
2033                         closesocket(c->fd);
2034 #endif
2035                 }
2036                 c->fd = newfd;
2037                 c->ev->ev.ev_fd = c->fd;
2038         }
2039         if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
2040                 log_err("event_add failed. in cpsl.");
2041         }
2042 }
2043
2044 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
2045 {
2046         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
2047         if(event_del(&c->ev->ev) != 0) {
2048                 log_err("event_del error to cplf");
2049         }
2050         c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2051         if(rd) c->ev->ev.ev_events |= EV_READ;
2052         if(wr) c->ev->ev.ev_events |= EV_WRITE;
2053         if(event_add(&c->ev->ev, c->timeout) != 0) {
2054                 log_err("event_add failed. in cplf.");
2055         }
2056 }
2057
2058 size_t comm_point_get_mem(struct comm_point* c)
2059 {
2060         size_t s;
2061         if(!c) 
2062                 return 0;
2063         s = sizeof(*c) + sizeof(*c->ev);
2064         if(c->timeout) 
2065                 s += sizeof(*c->timeout);
2066         if(c->type == comm_tcp || c->type == comm_local)
2067                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
2068         if(c->type == comm_tcp_accept) {
2069                 int i;
2070                 for(i=0; i<c->max_tcp_count; i++)
2071                         s += comm_point_get_mem(c->tcp_handlers[i]);
2072         }
2073         return s;
2074 }
2075
2076 struct comm_timer* 
2077 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2078 {
2079         struct comm_timer *tm = (struct comm_timer*)calloc(1,
2080                 sizeof(struct comm_timer));
2081         if(!tm)
2082                 return NULL;
2083         tm->ev_timer = (struct internal_timer*)calloc(1,
2084                 sizeof(struct internal_timer));
2085         if(!tm->ev_timer) {
2086                 log_err("malloc failed");
2087                 free(tm);
2088                 return NULL;
2089         }
2090         tm->ev_timer->base = base;
2091         tm->callback = cb;
2092         tm->cb_arg = cb_arg;
2093         event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT, 
2094                 comm_timer_callback, tm);
2095         if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2096                 log_err("timer_create: event_base_set failed.");
2097                 free(tm->ev_timer);
2098                 free(tm);
2099                 return NULL;
2100         }
2101         return tm;
2102 }
2103
2104 void 
2105 comm_timer_disable(struct comm_timer* timer)
2106 {
2107         if(!timer)
2108                 return;
2109         evtimer_del(&timer->ev_timer->ev);
2110         timer->ev_timer->enabled = 0;
2111 }
2112
2113 void 
2114 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2115 {
2116         log_assert(tv);
2117         if(timer->ev_timer->enabled)
2118                 comm_timer_disable(timer);
2119         event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2120                 comm_timer_callback, timer);
2121         if(event_base_set(timer->ev_timer->base->eb->base, 
2122                 &timer->ev_timer->ev) != 0)
2123                 log_err("comm_timer_set: set_base failed.");
2124         if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2125                 log_err("comm_timer_set: evtimer_add failed.");
2126         timer->ev_timer->enabled = 1;
2127 }
2128
2129 void 
2130 comm_timer_delete(struct comm_timer* timer)
2131 {
2132         if(!timer)
2133                 return;
2134         comm_timer_disable(timer);
2135         free(timer->ev_timer);
2136         free(timer);
2137 }
2138
2139 void 
2140 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2141 {
2142         struct comm_timer* tm = (struct comm_timer*)arg;
2143         if(!(event&EV_TIMEOUT))
2144                 return;
2145         comm_base_now(tm->ev_timer->base);
2146         tm->ev_timer->enabled = 0;
2147         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2148         (*tm->callback)(tm->cb_arg);
2149 }
2150
2151 int 
2152 comm_timer_is_set(struct comm_timer* timer)
2153 {
2154         return (int)timer->ev_timer->enabled;
2155 }
2156
2157 size_t 
2158 comm_timer_get_mem(struct comm_timer* timer)
2159 {
2160         return sizeof(*timer) + sizeof(struct internal_timer);
2161 }
2162
2163 struct comm_signal* 
2164 comm_signal_create(struct comm_base* base,
2165         void (*callback)(int, void*), void* cb_arg)
2166 {
2167         struct comm_signal* com = (struct comm_signal*)malloc(
2168                 sizeof(struct comm_signal));
2169         if(!com) {
2170                 log_err("malloc failed");
2171                 return NULL;
2172         }
2173         com->base = base;
2174         com->callback = callback;
2175         com->cb_arg = cb_arg;
2176         com->ev_signal = NULL;
2177         return com;
2178 }
2179
2180 void 
2181 comm_signal_callback(int sig, short event, void* arg)
2182 {
2183         struct comm_signal* comsig = (struct comm_signal*)arg;
2184         if(!(event & EV_SIGNAL))
2185                 return;
2186         comm_base_now(comsig->base);
2187         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2188         (*comsig->callback)(sig, comsig->cb_arg);
2189 }
2190
2191 int 
2192 comm_signal_bind(struct comm_signal* comsig, int sig)
2193 {
2194         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
2195                 sizeof(struct internal_signal));
2196         if(!entry) {
2197                 log_err("malloc failed");
2198                 return 0;
2199         }
2200         log_assert(comsig);
2201         /* add signal event */
2202         signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2203         if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2204                 log_err("Could not set signal base");
2205                 free(entry);
2206                 return 0;
2207         }
2208         if(signal_add(&entry->ev, NULL) != 0) {
2209                 log_err("Could not add signal handler");
2210                 free(entry);
2211                 return 0;
2212         }
2213         /* link into list */
2214         entry->next = comsig->ev_signal;
2215         comsig->ev_signal = entry;
2216         return 1;
2217 }
2218
2219 void 
2220 comm_signal_delete(struct comm_signal* comsig)
2221 {
2222         struct internal_signal* p, *np;
2223         if(!comsig)
2224                 return;
2225         p=comsig->ev_signal;
2226         while(p) {
2227                 np = p->next;
2228                 signal_del(&p->ev);
2229                 free(p);
2230                 p = np;
2231         }
2232         free(comsig);
2233 }