]> CyberLeo.Net >> Repos - FreeBSD/releng/10.1.git/blob - contrib/unbound/util/netevent.c
Copy stable/10@r272459 to releng/10.1 as part of
[FreeBSD/releng/10.1.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/log.h"
44 #include "util/net_help.h"
45 #include "util/fptr_wlist.h"
46 #include "ldns/pkthdr.h"
47 #include "ldns/sbuffer.h"
48 #ifdef HAVE_OPENSSL_SSL_H
49 #include <openssl/ssl.h>
50 #endif
51 #ifdef HAVE_OPENSSL_ERR_H
52 #include <openssl/err.h>
53 #endif
54
55 /* -------- Start of local definitions -------- */
56 /** if CMSG_ALIGN is not defined on this platform, a workaround */
57 #ifndef CMSG_ALIGN
58 #  ifdef _CMSG_DATA_ALIGN
59 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
60 #  else
61 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
62 #  endif
63 #endif
64
65 /** if CMSG_LEN is not defined on this platform, a workaround */
66 #ifndef CMSG_LEN
67 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
68 #endif
69
70 /** if CMSG_SPACE is not defined on this platform, a workaround */
71 #ifndef CMSG_SPACE
72 #  ifdef _CMSG_HDR_ALIGN
73 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
74 #  else
75 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
76 #  endif
77 #endif
78
79 /** The TCP reading or writing query timeout in seconds */
80 #define TCP_QUERY_TIMEOUT 120 
81
82 #ifndef NONBLOCKING_IS_BROKEN
83 /** number of UDP reads to perform per read indication from select */
84 #define NUM_UDP_PER_SELECT 100
85 #else
86 #define NUM_UDP_PER_SELECT 1
87 #endif
88
89 /* We define libevent structures here to hide the libevent stuff. */
90
91 #ifdef USE_MINI_EVENT
92 #  ifdef USE_WINSOCK
93 #    include "util/winsock_event.h"
94 #  else
95 #    include "util/mini_event.h"
96 #  endif /* USE_WINSOCK */
97 #else /* USE_MINI_EVENT */
98    /* we use libevent */
99 #  ifdef HAVE_EVENT_H
100 #    include <event.h>
101 #  else
102 #    include "event2/event.h"
103 #    include "event2/event_struct.h"
104 #    include "event2/event_compat.h"
105 #  endif
106 #endif /* USE_MINI_EVENT */
107
108 /**
109  * The internal event structure for keeping libevent info for the event.
110  * Possibly other structures (list, tree) this is part of.
111  */
112 struct internal_event {
113         /** the comm base */
114         struct comm_base* base;
115         /** libevent event type, alloced here */
116         struct event ev;
117 };
118
119 /**
120  * Internal base structure, so that every thread has its own events.
121  */
122 struct internal_base {
123         /** libevent event_base type. */
124         struct event_base* base;
125         /** seconds time pointer points here */
126         time_t secs;
127         /** timeval with current time */
128         struct timeval now;
129         /** the event used for slow_accept timeouts */
130         struct event slow_accept;
131         /** true if slow_accept is enabled */
132         int slow_accept_enabled;
133 };
134
135 /**
136  * Internal timer structure, to store timer event in.
137  */
138 struct internal_timer {
139         /** the comm base */
140         struct comm_base* base;
141         /** libevent event type, alloced here */
142         struct event ev;
143         /** is timer enabled */
144         uint8_t enabled;
145 };
146
147 /**
148  * Internal signal structure, to store signal event in.
149  */
150 struct internal_signal {
151         /** libevent event type, alloced here */
152         struct event ev;
153         /** next in signal list */
154         struct internal_signal* next;
155 };
156
157 /** create a tcp handler with a parent */
158 static struct comm_point* comm_point_create_tcp_handler(
159         struct comm_base *base, struct comm_point* parent, size_t bufsize,
160         comm_point_callback_t* callback, void* callback_arg);
161
162 /* -------- End of local definitions -------- */
163
164 #ifdef USE_MINI_EVENT
165 /** minievent updates the time when it blocks. */
166 #define comm_base_now(x) /* nothing to do */
167 #else /* !USE_MINI_EVENT */
168 /** fillup the time values in the event base */
169 static void
170 comm_base_now(struct comm_base* b)
171 {
172         if(gettimeofday(&b->eb->now, NULL) < 0) {
173                 log_err("gettimeofday: %s", strerror(errno));
174         }
175         b->eb->secs = (time_t)b->eb->now.tv_sec;
176 }
177 #endif /* USE_MINI_EVENT */
178
179 struct comm_base* 
180 comm_base_create(int sigs)
181 {
182         struct comm_base* b = (struct comm_base*)calloc(1,
183                 sizeof(struct comm_base));
184         if(!b)
185                 return NULL;
186         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
187         if(!b->eb) {
188                 free(b);
189                 return NULL;
190         }
191 #ifdef USE_MINI_EVENT
192         (void)sigs;
193         /* use mini event time-sharing feature */
194         b->eb->base = event_init(&b->eb->secs, &b->eb->now);
195 #else
196 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
197         /* libev */
198         if(sigs)
199                 b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
200         else
201                 b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
202 #  else
203         (void)sigs;
204 #    ifdef HAVE_EVENT_BASE_NEW
205         b->eb->base = event_base_new();
206 #    else
207         b->eb->base = event_init();
208 #    endif
209 #  endif
210 #endif
211         if(!b->eb->base) {
212                 free(b->eb);
213                 free(b);
214                 return NULL;
215         }
216         comm_base_now(b);
217         /* avoid event_get_method call which causes crashes even when
218          * not printing, because its result is passed */
219         verbose(VERB_ALGO, 
220 #if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
221                 "libev"
222 #elif defined(USE_MINI_EVENT)
223                 "event "
224 #else
225                 "libevent "
226 #endif
227                 "%s uses %s method.", 
228                 event_get_version(), 
229 #ifdef HAVE_EVENT_BASE_GET_METHOD
230                 event_base_get_method(b->eb->base)
231 #else
232                 "not_obtainable"
233 #endif
234         );
235         return b;
236 }
237
238 struct comm_base*
239 comm_base_create_event(struct event_base* base)
240 {
241         struct comm_base* b = (struct comm_base*)calloc(1,
242                 sizeof(struct comm_base));
243         if(!b)
244                 return NULL;
245         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
246         if(!b->eb) {
247                 free(b);
248                 return NULL;
249         }
250         b->eb->base = base;
251         comm_base_now(b);
252         return b;
253 }
254
255 void 
256 comm_base_delete(struct comm_base* b)
257 {
258         if(!b)
259                 return;
260         if(b->eb->slow_accept_enabled) {
261                 if(event_del(&b->eb->slow_accept) != 0) {
262                         log_err("could not event_del slow_accept");
263                 }
264         }
265 #ifdef USE_MINI_EVENT
266         event_base_free(b->eb->base);
267 #elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
268         /* only libevent 1.2+ has it, but in 1.2 it is broken - 
269            assertion fails on signal handling ev that is not deleted
270            in libevent 1.3c (event_base_once appears) this is fixed. */
271         event_base_free(b->eb->base);
272 #endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
273         b->eb->base = NULL;
274         free(b->eb);
275         free(b);
276 }
277
278 void 
279 comm_base_delete_no_base(struct comm_base* b)
280 {
281         if(!b)
282                 return;
283         if(b->eb->slow_accept_enabled) {
284                 if(event_del(&b->eb->slow_accept) != 0) {
285                         log_err("could not event_del slow_accept");
286                 }
287         }
288         b->eb->base = NULL;
289         free(b->eb);
290         free(b);
291 }
292
293 void 
294 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
295 {
296         *tt = &b->eb->secs;
297         *tv = &b->eb->now;
298 }
299
300 void 
301 comm_base_dispatch(struct comm_base* b)
302 {
303         int retval;
304         retval = event_base_dispatch(b->eb->base);
305         if(retval != 0) {
306                 fatal_exit("event_dispatch returned error %d, "
307                         "errno is %s", retval, strerror(errno));
308         }
309 }
310
311 void comm_base_exit(struct comm_base* b)
312 {
313         if(event_base_loopexit(b->eb->base, NULL) != 0) {
314                 log_err("Could not loopexit");
315         }
316 }
317
318 void comm_base_set_slow_accept_handlers(struct comm_base* b,
319         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
320 {
321         b->stop_accept = stop_acc;
322         b->start_accept = start_acc;
323         b->cb_arg = arg;
324 }
325
326 struct event_base* comm_base_internal(struct comm_base* b)
327 {
328         return b->eb->base;
329 }
330
331 /** see if errno for udp has to be logged or not uses globals */
332 static int
333 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
334 {
335         /* do not log transient errors (unless high verbosity) */
336 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
337         switch(errno) {
338 #  ifdef ENETUNREACH
339                 case ENETUNREACH:
340 #  endif
341 #  ifdef EHOSTDOWN
342                 case EHOSTDOWN:
343 #  endif
344 #  ifdef EHOSTUNREACH
345                 case EHOSTUNREACH:
346 #  endif
347 #  ifdef ENETDOWN
348                 case ENETDOWN:
349 #  endif
350                         if(verbosity < VERB_ALGO)
351                                 return 0;
352                 default:
353                         break;
354         }
355 #endif
356         /* permission denied is gotten for every send if the
357          * network is disconnected (on some OS), squelch it */
358         if(errno == EPERM && verbosity < VERB_DETAIL)
359                 return 0;
360         /* squelch errors where people deploy AAAA ::ffff:bla for
361          * authority servers, which we try for intranets. */
362         if(errno == EINVAL && addr_is_ip4mapped(
363                 (struct sockaddr_storage*)addr, addrlen) &&
364                 verbosity < VERB_DETAIL)
365                 return 0;
366         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
367          * but a dns cache does not need it. */
368         if(errno == EACCES && addr_is_broadcast(
369                 (struct sockaddr_storage*)addr, addrlen) &&
370                 verbosity < VERB_DETAIL)
371                 return 0;
372         return 1;
373 }
374
375 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
376 {
377         return udp_send_errno_needs_log(addr, addrlen);
378 }
379
380 /* send a UDP reply */
381 int
382 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
383         struct sockaddr* addr, socklen_t addrlen) 
384 {
385         ssize_t sent;
386         log_assert(c->fd != -1);
387 #ifdef UNBOUND_DEBUG
388         if(sldns_buffer_remaining(packet) == 0)
389                 log_err("error: send empty UDP packet");
390 #endif
391         log_assert(addr && addrlen > 0);
392         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
393                 sldns_buffer_remaining(packet), 0,
394                 addr, addrlen);
395         if(sent == -1) {
396                 if(!udp_send_errno_needs_log(addr, addrlen))
397                         return 0;
398 #ifndef USE_WINSOCK
399                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
400 #else
401                 verbose(VERB_OPS, "sendto failed: %s", 
402                         wsa_strerror(WSAGetLastError()));
403 #endif
404                 log_addr(VERB_OPS, "remote address is", 
405                         (struct sockaddr_storage*)addr, addrlen);
406                 return 0;
407         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
408                 log_err("sent %d in place of %d bytes", 
409                         (int)sent, (int)sldns_buffer_remaining(packet));
410                 return 0;
411         }
412         return 1;
413 }
414
415 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
416 /** print debug ancillary info */
417 static void p_ancil(const char* str, struct comm_reply* r)
418 {
419         if(r->srctype != 4 && r->srctype != 6) {
420                 log_info("%s: unknown srctype %d", str, r->srctype);
421                 return;
422         }
423         if(r->srctype == 6) {
424                 char buf[1024];
425                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
426                         buf, (socklen_t)sizeof(buf)) == 0) {
427                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
428                 }
429                 buf[sizeof(buf)-1]=0;
430                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
431         } else if(r->srctype == 4) {
432 #ifdef IP_PKTINFO
433                 char buf1[1024], buf2[1024];
434                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
435                         buf1, (socklen_t)sizeof(buf1)) == 0) {
436                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
437                 }
438                 buf1[sizeof(buf1)-1]=0;
439 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
440                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
441                         buf2, (socklen_t)sizeof(buf2)) == 0) {
442                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
443                 }
444                 buf2[sizeof(buf2)-1]=0;
445 #else
446                 buf2[0]=0;
447 #endif
448                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
449                         buf1, buf2);
450 #elif defined(IP_RECVDSTADDR)
451                 char buf1[1024];
452                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
453                         buf1, (socklen_t)sizeof(buf1)) == 0) {
454                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
455                 }
456                 buf1[sizeof(buf1)-1]=0;
457                 log_info("%s: %s", str, buf1);
458 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
459         }
460 }
461 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
462
463 /** send a UDP reply over specified interface*/
464 static int
465 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
466         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
467 {
468 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
469         ssize_t sent;
470         struct msghdr msg;
471         struct iovec iov[1];
472         char control[256];
473 #ifndef S_SPLINT_S
474         struct cmsghdr *cmsg;
475 #endif /* S_SPLINT_S */
476
477         log_assert(c->fd != -1);
478 #ifdef UNBOUND_DEBUG
479         if(sldns_buffer_remaining(packet) == 0)
480                 log_err("error: send empty UDP packet");
481 #endif
482         log_assert(addr && addrlen > 0);
483
484         msg.msg_name = addr;
485         msg.msg_namelen = addrlen;
486         iov[0].iov_base = sldns_buffer_begin(packet);
487         iov[0].iov_len = sldns_buffer_remaining(packet);
488         msg.msg_iov = iov;
489         msg.msg_iovlen = 1;
490         msg.msg_control = control;
491 #ifndef S_SPLINT_S
492         msg.msg_controllen = sizeof(control);
493 #endif /* S_SPLINT_S */
494         msg.msg_flags = 0;
495
496 #ifndef S_SPLINT_S
497         cmsg = CMSG_FIRSTHDR(&msg);
498         if(r->srctype == 4) {
499 #ifdef IP_PKTINFO
500                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
501                 log_assert(msg.msg_controllen <= sizeof(control));
502                 cmsg->cmsg_level = IPPROTO_IP;
503                 cmsg->cmsg_type = IP_PKTINFO;
504                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
505                         sizeof(struct in_pktinfo));
506                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
507 #elif defined(IP_SENDSRCADDR)
508                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
509                 log_assert(msg.msg_controllen <= sizeof(control));
510                 cmsg->cmsg_level = IPPROTO_IP;
511                 cmsg->cmsg_type = IP_SENDSRCADDR;
512                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
513                         sizeof(struct in_addr));
514                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
515 #else
516                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
517                 msg.msg_control = NULL;
518 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
519         } else if(r->srctype == 6) {
520                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
521                 log_assert(msg.msg_controllen <= sizeof(control));
522                 cmsg->cmsg_level = IPPROTO_IPV6;
523                 cmsg->cmsg_type = IPV6_PKTINFO;
524                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
525                         sizeof(struct in6_pktinfo));
526                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
527         } else {
528                 /* try to pass all 0 to use default route */
529                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
530                 log_assert(msg.msg_controllen <= sizeof(control));
531                 cmsg->cmsg_level = IPPROTO_IPV6;
532                 cmsg->cmsg_type = IPV6_PKTINFO;
533                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
534                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
535         }
536 #endif /* S_SPLINT_S */
537         if(verbosity >= VERB_ALGO)
538                 p_ancil("send_udp over interface", r);
539         sent = sendmsg(c->fd, &msg, 0);
540         if(sent == -1) {
541                 if(!udp_send_errno_needs_log(addr, addrlen))
542                         return 0;
543                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
544                 log_addr(VERB_OPS, "remote address is", 
545                         (struct sockaddr_storage*)addr, addrlen);
546                 return 0;
547         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
548                 log_err("sent %d in place of %d bytes", 
549                         (int)sent, (int)sldns_buffer_remaining(packet));
550                 return 0;
551         }
552         return 1;
553 #else
554         (void)c;
555         (void)packet;
556         (void)addr;
557         (void)addrlen;
558         (void)r;
559         log_err("sendmsg: IPV6_PKTINFO not supported");
560         return 0;
561 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
562 }
563
564 void 
565 comm_point_udp_ancil_callback(int fd, short event, void* arg)
566 {
567 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
568         struct comm_reply rep;
569         struct msghdr msg;
570         struct iovec iov[1];
571         ssize_t rcv;
572         char ancil[256];
573         int i;
574 #ifndef S_SPLINT_S
575         struct cmsghdr* cmsg;
576 #endif /* S_SPLINT_S */
577
578         rep.c = (struct comm_point*)arg;
579         log_assert(rep.c->type == comm_udp);
580
581         if(!(event&EV_READ))
582                 return;
583         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
584         comm_base_now(rep.c->ev->base);
585         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
586                 sldns_buffer_clear(rep.c->buffer);
587                 rep.addrlen = (socklen_t)sizeof(rep.addr);
588                 log_assert(fd != -1);
589                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
590                 msg.msg_name = &rep.addr;
591                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
592                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
593                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
594                 msg.msg_iov = iov;
595                 msg.msg_iovlen = 1;
596                 msg.msg_control = ancil;
597 #ifndef S_SPLINT_S
598                 msg.msg_controllen = sizeof(ancil);
599 #endif /* S_SPLINT_S */
600                 msg.msg_flags = 0;
601                 rcv = recvmsg(fd, &msg, 0);
602                 if(rcv == -1) {
603                         if(errno != EAGAIN && errno != EINTR) {
604                                 log_err("recvmsg failed: %s", strerror(errno));
605                         }
606                         return;
607                 }
608                 rep.addrlen = msg.msg_namelen;
609                 sldns_buffer_skip(rep.c->buffer, rcv);
610                 sldns_buffer_flip(rep.c->buffer);
611                 rep.srctype = 0;
612 #ifndef S_SPLINT_S
613                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
614                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
615                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
616                                 cmsg->cmsg_type == IPV6_PKTINFO) {
617                                 rep.srctype = 6;
618                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
619                                         sizeof(struct in6_pktinfo));
620                                 break;
621 #ifdef IP_PKTINFO
622                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
623                                 cmsg->cmsg_type == IP_PKTINFO) {
624                                 rep.srctype = 4;
625                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
626                                         sizeof(struct in_pktinfo));
627                                 break;
628 #elif defined(IP_RECVDSTADDR)
629                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
630                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
631                                 rep.srctype = 4;
632                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
633                                         sizeof(struct in_addr));
634                                 break;
635 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
636                         }
637                 }
638                 if(verbosity >= VERB_ALGO)
639                         p_ancil("receive_udp on interface", &rep);
640 #endif /* S_SPLINT_S */
641                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
642                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
643                         /* send back immediate reply */
644                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
645                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
646                 }
647                 if(rep.c->fd == -1) /* commpoint closed */
648                         break;
649         }
650 #else
651         (void)fd;
652         (void)event;
653         (void)arg;
654         fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
655                 "Please disable interface-automatic");
656 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
657 }
658
659 void 
660 comm_point_udp_callback(int fd, short event, void* arg)
661 {
662         struct comm_reply rep;
663         ssize_t rcv;
664         int i;
665
666         rep.c = (struct comm_point*)arg;
667         log_assert(rep.c->type == comm_udp);
668
669         if(!(event&EV_READ))
670                 return;
671         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
672         comm_base_now(rep.c->ev->base);
673         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
674                 sldns_buffer_clear(rep.c->buffer);
675                 rep.addrlen = (socklen_t)sizeof(rep.addr);
676                 log_assert(fd != -1);
677                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
678                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
679                         sldns_buffer_remaining(rep.c->buffer), 0, 
680                         (struct sockaddr*)&rep.addr, &rep.addrlen);
681                 if(rcv == -1) {
682 #ifndef USE_WINSOCK
683                         if(errno != EAGAIN && errno != EINTR)
684                                 log_err("recvfrom %d failed: %s", 
685                                         fd, strerror(errno));
686 #else
687                         if(WSAGetLastError() != WSAEINPROGRESS &&
688                                 WSAGetLastError() != WSAECONNRESET &&
689                                 WSAGetLastError()!= WSAEWOULDBLOCK)
690                                 log_err("recvfrom failed: %s",
691                                         wsa_strerror(WSAGetLastError()));
692 #endif
693                         return;
694                 }
695                 sldns_buffer_skip(rep.c->buffer, rcv);
696                 sldns_buffer_flip(rep.c->buffer);
697                 rep.srctype = 0;
698                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
699                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
700                         /* send back immediate reply */
701                         (void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
702                                 (struct sockaddr*)&rep.addr, rep.addrlen);
703                 }
704                 if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
705                 another UDP port. Note rep.c cannot be reused with TCP fd. */
706                         break;
707         }
708 }
709
710 /** Use a new tcp handler for new query fd, set to read query */
711 static void
712 setup_tcp_handler(struct comm_point* c, int fd) 
713 {
714         log_assert(c->type == comm_tcp);
715         log_assert(c->fd == -1);
716         sldns_buffer_clear(c->buffer);
717         c->tcp_is_reading = 1;
718         c->tcp_byte_count = 0;
719         comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
720 }
721
722 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
723         short ATTR_UNUSED(event), void* arg)
724 {
725         struct comm_base* b = (struct comm_base*)arg;
726         /* timeout for the slow accept, re-enable accepts again */
727         if(b->start_accept) {
728                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
729                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
730                 (*b->start_accept)(b->cb_arg);
731                 b->eb->slow_accept_enabled = 0;
732         }
733 }
734
735 int comm_point_perform_accept(struct comm_point* c,
736         struct sockaddr_storage* addr, socklen_t* addrlen)
737 {
738         int new_fd;
739         *addrlen = (socklen_t)sizeof(*addr);
740         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
741         if(new_fd == -1) {
742 #ifndef USE_WINSOCK
743                 /* EINTR is signal interrupt. others are closed connection. */
744                 if(     errno == EINTR || errno == EAGAIN
745 #ifdef EWOULDBLOCK
746                         || errno == EWOULDBLOCK 
747 #endif
748 #ifdef ECONNABORTED
749                         || errno == ECONNABORTED 
750 #endif
751 #ifdef EPROTO
752                         || errno == EPROTO
753 #endif /* EPROTO */
754                         )
755                         return -1;
756 #if defined(ENFILE) && defined(EMFILE)
757                 if(errno == ENFILE || errno == EMFILE) {
758                         /* out of file descriptors, likely outside of our
759                          * control. stop accept() calls for some time */
760                         if(c->ev->base->stop_accept) {
761                                 struct comm_base* b = c->ev->base;
762                                 struct timeval tv;
763                                 verbose(VERB_ALGO, "out of file descriptors: "
764                                         "slow accept");
765                                 b->eb->slow_accept_enabled = 1;
766                                 fptr_ok(fptr_whitelist_stop_accept(
767                                         b->stop_accept));
768                                 (*b->stop_accept)(b->cb_arg);
769                                 /* set timeout, no mallocs */
770                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
771                                 tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
772                                 event_set(&b->eb->slow_accept, -1, EV_TIMEOUT, 
773                                         comm_base_handle_slow_accept, b);
774                                 if(event_base_set(b->eb->base,
775                                         &b->eb->slow_accept) != 0) {
776                                         /* we do not want to log here, because
777                                          * that would spam the logfiles.
778                                          * error: "event_base_set failed." */
779                                 }
780                                 if(event_add(&b->eb->slow_accept, &tv) != 0) {
781                                         /* we do not want to log here,
782                                          * error: "event_add failed." */
783                                 }
784                         }
785                         return -1;
786                 }
787 #endif
788                 log_err("accept failed: %s", strerror(errno));
789 #else /* USE_WINSOCK */
790                 if(WSAGetLastError() == WSAEINPROGRESS ||
791                         WSAGetLastError() == WSAECONNRESET)
792                         return -1;
793                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
794                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
795                         return -1;
796                 }
797                 log_err("accept failed: %s", wsa_strerror(WSAGetLastError()));
798 #endif
799                 log_addr(0, "remote address is", addr, *addrlen);
800                 return -1;
801         }
802         fd_set_nonblock(new_fd);
803         return new_fd;
804 }
805
806 #ifdef USE_WINSOCK
807 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
808         int ATTR_UNUSED(argi), long argl, long retvalue)
809 {
810         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
811                 (oper&BIO_CB_RETURN)?"return":"before",
812                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
813                 WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
814         /* on windows, check if previous operation caused EWOULDBLOCK */
815         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
816                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
817                 if(WSAGetLastError() == WSAEWOULDBLOCK)
818                         winsock_tcp_wouldblock((struct event*)
819                                 BIO_get_callback_arg(b), EV_READ);
820         }
821         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
822                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
823                 if(WSAGetLastError() == WSAEWOULDBLOCK)
824                         winsock_tcp_wouldblock((struct event*)
825                                 BIO_get_callback_arg(b), EV_WRITE);
826         }
827         /* return original return value */
828         return retvalue;
829 }
830
831 /** set win bio callbacks for nonblocking operations */
832 void
833 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
834 {
835         SSL* ssl = (SSL*)thessl;
836         /* set them both just in case, but usually they are the same BIO */
837         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
838         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
839         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
840         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
841 }
842 #endif
843
844 void 
845 comm_point_tcp_accept_callback(int fd, short event, void* arg)
846 {
847         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
848         int new_fd;
849         log_assert(c->type == comm_tcp_accept);
850         if(!(event & EV_READ)) {
851                 log_info("ignoring tcp accept event %d", (int)event);
852                 return;
853         }
854         comm_base_now(c->ev->base);
855         /* find free tcp handler. */
856         if(!c->tcp_free) {
857                 log_warn("accepted too many tcp, connections full");
858                 return;
859         }
860         /* accept incoming connection. */
861         c_hdl = c->tcp_free;
862         log_assert(fd != -1);
863         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
864                 &c_hdl->repinfo.addrlen);
865         if(new_fd == -1)
866                 return;
867         if(c->ssl) {
868                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
869                 if(!c_hdl->ssl) {
870                         c_hdl->fd = new_fd;
871                         comm_point_close(c_hdl);
872                         return;
873                 }
874                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
875 #ifdef USE_WINSOCK
876                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
877 #endif
878         }
879
880         /* grab the tcp handler buffers */
881         c->tcp_free = c_hdl->tcp_free;
882         if(!c->tcp_free) {
883                 /* stop accepting incoming queries for now. */
884                 comm_point_stop_listening(c);
885         }
886         /* addr is dropped. Not needed for tcp reply. */
887         setup_tcp_handler(c_hdl, new_fd);
888 }
889
890 /** Make tcp handler free for next assignment */
891 static void
892 reclaim_tcp_handler(struct comm_point* c)
893 {
894         log_assert(c->type == comm_tcp);
895         if(c->ssl) {
896 #ifdef HAVE_SSL
897                 SSL_shutdown(c->ssl);
898                 SSL_free(c->ssl);
899                 c->ssl = NULL;
900 #endif
901         }
902         comm_point_close(c);
903         if(c->tcp_parent) {
904                 c->tcp_free = c->tcp_parent->tcp_free;
905                 c->tcp_parent->tcp_free = c;
906                 if(!c->tcp_free) {
907                         /* re-enable listening on accept socket */
908                         comm_point_start_listening(c->tcp_parent, -1, -1);
909                 }
910         }
911 }
912
913 /** do the callback when writing is done */
914 static void
915 tcp_callback_writer(struct comm_point* c)
916 {
917         log_assert(c->type == comm_tcp);
918         sldns_buffer_clear(c->buffer);
919         if(c->tcp_do_toggle_rw)
920                 c->tcp_is_reading = 1;
921         c->tcp_byte_count = 0;
922         /* switch from listening(write) to listening(read) */
923         comm_point_stop_listening(c);
924         comm_point_start_listening(c, -1, -1);
925 }
926
927 /** do the callback when reading is done */
928 static void
929 tcp_callback_reader(struct comm_point* c)
930 {
931         log_assert(c->type == comm_tcp || c->type == comm_local);
932         sldns_buffer_flip(c->buffer);
933         if(c->tcp_do_toggle_rw)
934                 c->tcp_is_reading = 0;
935         c->tcp_byte_count = 0;
936         if(c->type == comm_tcp)
937                 comm_point_stop_listening(c);
938         fptr_ok(fptr_whitelist_comm_point(c->callback));
939         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
940                 comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
941         }
942 }
943
944 /** continue ssl handshake */
945 #ifdef HAVE_SSL
946 static int
947 ssl_handshake(struct comm_point* c)
948 {
949         int r;
950         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
951                 /* read condition satisfied back to writing */
952                 comm_point_listen_for_rw(c, 1, 1);
953                 c->ssl_shake_state = comm_ssl_shake_none;
954                 return 1;
955         }
956         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
957                 /* write condition satisfied, back to reading */
958                 comm_point_listen_for_rw(c, 1, 0);
959                 c->ssl_shake_state = comm_ssl_shake_none;
960                 return 1;
961         }
962
963         ERR_clear_error();
964         r = SSL_do_handshake(c->ssl);
965         if(r != 1) {
966                 int want = SSL_get_error(c->ssl, r);
967                 if(want == SSL_ERROR_WANT_READ) {
968                         if(c->ssl_shake_state == comm_ssl_shake_read)
969                                 return 1;
970                         c->ssl_shake_state = comm_ssl_shake_read;
971                         comm_point_listen_for_rw(c, 1, 0);
972                         return 1;
973                 } else if(want == SSL_ERROR_WANT_WRITE) {
974                         if(c->ssl_shake_state == comm_ssl_shake_write)
975                                 return 1;
976                         c->ssl_shake_state = comm_ssl_shake_write;
977                         comm_point_listen_for_rw(c, 0, 1);
978                         return 1;
979                 } else if(r == 0) {
980                         return 0; /* closed */
981                 } else if(want == SSL_ERROR_SYSCALL) {
982                         /* SYSCALL and errno==0 means closed uncleanly */
983                         if(errno != 0)
984                                 log_err("SSL_handshake syscall: %s",
985                                         strerror(errno));
986                         return 0;
987                 } else {
988                         log_crypto_err("ssl handshake failed");
989                         log_addr(1, "ssl handshake failed", &c->repinfo.addr,
990                                 c->repinfo.addrlen);
991                         return 0;
992                 }
993         }
994         /* this is where peer verification could take place */
995         log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
996                 c->repinfo.addrlen);
997
998         /* setup listen rw correctly */
999         if(c->tcp_is_reading) {
1000                 if(c->ssl_shake_state != comm_ssl_shake_read)
1001                         comm_point_listen_for_rw(c, 1, 0);
1002         } else {
1003                 comm_point_listen_for_rw(c, 1, 1);
1004         }
1005         c->ssl_shake_state = comm_ssl_shake_none;
1006         return 1;
1007 }
1008 #endif /* HAVE_SSL */
1009
1010 /** ssl read callback on TCP */
1011 static int
1012 ssl_handle_read(struct comm_point* c)
1013 {
1014 #ifdef HAVE_SSL
1015         int r;
1016         if(c->ssl_shake_state != comm_ssl_shake_none) {
1017                 if(!ssl_handshake(c))
1018                         return 0;
1019                 if(c->ssl_shake_state != comm_ssl_shake_none)
1020                         return 1;
1021         }
1022         if(c->tcp_byte_count < sizeof(uint16_t)) {
1023                 /* read length bytes */
1024                 ERR_clear_error();
1025                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1026                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1027                         c->tcp_byte_count))) <= 0) {
1028                         int want = SSL_get_error(c->ssl, r);
1029                         if(want == SSL_ERROR_ZERO_RETURN) {
1030                                 return 0; /* shutdown, closed */
1031                         } else if(want == SSL_ERROR_WANT_READ) {
1032                                 return 1; /* read more later */
1033                         } else if(want == SSL_ERROR_WANT_WRITE) {
1034                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1035                                 comm_point_listen_for_rw(c, 0, 1);
1036                                 return 1;
1037                         } else if(want == SSL_ERROR_SYSCALL) {
1038                                 if(errno != 0)
1039                                         log_err("SSL_read syscall: %s",
1040                                                 strerror(errno));
1041                                 return 0;
1042                         }
1043                         log_crypto_err("could not SSL_read");
1044                         return 0;
1045                 }
1046                 c->tcp_byte_count += r;
1047                 if(c->tcp_byte_count != sizeof(uint16_t))
1048                         return 1;
1049                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1050                         sldns_buffer_capacity(c->buffer)) {
1051                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1052                         return 0;
1053                 }
1054                 sldns_buffer_set_limit(c->buffer,
1055                         sldns_buffer_read_u16_at(c->buffer, 0));
1056                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1057                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1058                         return 0;
1059                 }
1060                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1061                         (int)sldns_buffer_limit(c->buffer));
1062         }
1063         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1064         ERR_clear_error();
1065         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1066                 (int)sldns_buffer_remaining(c->buffer));
1067         if(r <= 0) {
1068                 int want = SSL_get_error(c->ssl, r);
1069                 if(want == SSL_ERROR_ZERO_RETURN) {
1070                         return 0; /* shutdown, closed */
1071                 } else if(want == SSL_ERROR_WANT_READ) {
1072                         return 1; /* read more later */
1073                 } else if(want == SSL_ERROR_WANT_WRITE) {
1074                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1075                         comm_point_listen_for_rw(c, 0, 1);
1076                         return 1;
1077                 } else if(want == SSL_ERROR_SYSCALL) {
1078                         if(errno != 0)
1079                                 log_err("SSL_read syscall: %s",
1080                                         strerror(errno));
1081                         return 0;
1082                 }
1083                 log_crypto_err("could not SSL_read");
1084                 return 0;
1085         }
1086         sldns_buffer_skip(c->buffer, (ssize_t)r);
1087         if(sldns_buffer_remaining(c->buffer) <= 0) {
1088                 tcp_callback_reader(c);
1089         }
1090         return 1;
1091 #else
1092         (void)c;
1093         return 0;
1094 #endif /* HAVE_SSL */
1095 }
1096
1097 /** ssl write callback on TCP */
1098 static int
1099 ssl_handle_write(struct comm_point* c)
1100 {
1101 #ifdef HAVE_SSL
1102         int r;
1103         if(c->ssl_shake_state != comm_ssl_shake_none) {
1104                 if(!ssl_handshake(c))
1105                         return 0;
1106                 if(c->ssl_shake_state != comm_ssl_shake_none)
1107                         return 1;
1108         }
1109         /* ignore return, if fails we may simply block */
1110         (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1111         if(c->tcp_byte_count < sizeof(uint16_t)) {
1112                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1113                 ERR_clear_error();
1114                 r = SSL_write(c->ssl,
1115                         (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1116                         (int)(sizeof(uint16_t)-c->tcp_byte_count));
1117                 if(r <= 0) {
1118                         int want = SSL_get_error(c->ssl, r);
1119                         if(want == SSL_ERROR_ZERO_RETURN) {
1120                                 return 0; /* closed */
1121                         } else if(want == SSL_ERROR_WANT_READ) {
1122                                 c->ssl_shake_state = comm_ssl_shake_read;
1123                                 comm_point_listen_for_rw(c, 1, 0);
1124                                 return 1; /* wait for read condition */
1125                         } else if(want == SSL_ERROR_WANT_WRITE) {
1126                                 return 1; /* write more later */
1127                         } else if(want == SSL_ERROR_SYSCALL) {
1128                                 if(errno != 0)
1129                                         log_err("SSL_write syscall: %s",
1130                                                 strerror(errno));
1131                                 return 0;
1132                         }
1133                         log_crypto_err("could not SSL_write");
1134                         return 0;
1135                 }
1136                 c->tcp_byte_count += r;
1137                 if(c->tcp_byte_count < sizeof(uint16_t))
1138                         return 1;
1139                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1140                         sizeof(uint16_t));
1141                 if(sldns_buffer_remaining(c->buffer) == 0) {
1142                         tcp_callback_writer(c);
1143                         return 1;
1144                 }
1145         }
1146         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1147         ERR_clear_error();
1148         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1149                 (int)sldns_buffer_remaining(c->buffer));
1150         if(r <= 0) {
1151                 int want = SSL_get_error(c->ssl, r);
1152                 if(want == SSL_ERROR_ZERO_RETURN) {
1153                         return 0; /* closed */
1154                 } else if(want == SSL_ERROR_WANT_READ) {
1155                         c->ssl_shake_state = comm_ssl_shake_read;
1156                         comm_point_listen_for_rw(c, 1, 0);
1157                         return 1; /* wait for read condition */
1158                 } else if(want == SSL_ERROR_WANT_WRITE) {
1159                         return 1; /* write more later */
1160                 } else if(want == SSL_ERROR_SYSCALL) {
1161                         if(errno != 0)
1162                                 log_err("SSL_write syscall: %s",
1163                                         strerror(errno));
1164                         return 0;
1165                 }
1166                 log_crypto_err("could not SSL_write");
1167                 return 0;
1168         }
1169         sldns_buffer_skip(c->buffer, (ssize_t)r);
1170
1171         if(sldns_buffer_remaining(c->buffer) == 0) {
1172                 tcp_callback_writer(c);
1173         }
1174         return 1;
1175 #else
1176         (void)c;
1177         return 0;
1178 #endif /* HAVE_SSL */
1179 }
1180
1181 /** handle ssl tcp connection with dns contents */
1182 static int
1183 ssl_handle_it(struct comm_point* c)
1184 {
1185         if(c->tcp_is_reading)
1186                 return ssl_handle_read(c);
1187         return ssl_handle_write(c);
1188 }
1189
1190 /** Handle tcp reading callback. 
1191  * @param fd: file descriptor of socket.
1192  * @param c: comm point to read from into buffer.
1193  * @param short_ok: if true, very short packets are OK (for comm_local).
1194  * @return: 0 on error 
1195  */
1196 static int
1197 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1198 {
1199         ssize_t r;
1200         log_assert(c->type == comm_tcp || c->type == comm_local);
1201         if(c->ssl)
1202                 return ssl_handle_it(c);
1203         if(!c->tcp_is_reading)
1204                 return 0;
1205
1206         log_assert(fd != -1);
1207         if(c->tcp_byte_count < sizeof(uint16_t)) {
1208                 /* read length bytes */
1209                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1210                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1211                 if(r == 0)
1212                         return 0;
1213                 else if(r == -1) {
1214 #ifndef USE_WINSOCK
1215                         if(errno == EINTR || errno == EAGAIN)
1216                                 return 1;
1217 #ifdef ECONNRESET
1218                         if(errno == ECONNRESET && verbosity < 2)
1219                                 return 0; /* silence reset by peer */
1220 #endif
1221                         log_err("read (in tcp s): %s", strerror(errno));
1222 #else /* USE_WINSOCK */
1223                         if(WSAGetLastError() == WSAECONNRESET)
1224                                 return 0;
1225                         if(WSAGetLastError() == WSAEINPROGRESS)
1226                                 return 1;
1227                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1228                                 winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1229                                 return 1;
1230                         }
1231                         log_err("read (in tcp s): %s", 
1232                                 wsa_strerror(WSAGetLastError()));
1233 #endif
1234                         log_addr(0, "remote address is", &c->repinfo.addr,
1235                                 c->repinfo.addrlen);
1236                         return 0;
1237                 } 
1238                 c->tcp_byte_count += r;
1239                 if(c->tcp_byte_count != sizeof(uint16_t))
1240                         return 1;
1241                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1242                         sldns_buffer_capacity(c->buffer)) {
1243                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1244                         return 0;
1245                 }
1246                 sldns_buffer_set_limit(c->buffer, 
1247                         sldns_buffer_read_u16_at(c->buffer, 0));
1248                 if(!short_ok && 
1249                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1250                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1251                         return 0;
1252                 }
1253                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1254                         (int)sldns_buffer_limit(c->buffer));
1255         }
1256
1257         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1258         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1259                 sldns_buffer_remaining(c->buffer), 0);
1260         if(r == 0) {
1261                 return 0;
1262         } else if(r == -1) {
1263 #ifndef USE_WINSOCK
1264                 if(errno == EINTR || errno == EAGAIN)
1265                         return 1;
1266                 log_err("read (in tcp r): %s", strerror(errno));
1267 #else /* USE_WINSOCK */
1268                 if(WSAGetLastError() == WSAECONNRESET)
1269                         return 0;
1270                 if(WSAGetLastError() == WSAEINPROGRESS)
1271                         return 1;
1272                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1273                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1274                         return 1;
1275                 }
1276                 log_err("read (in tcp r): %s", 
1277                         wsa_strerror(WSAGetLastError()));
1278 #endif
1279                 log_addr(0, "remote address is", &c->repinfo.addr,
1280                         c->repinfo.addrlen);
1281                 return 0;
1282         }
1283         sldns_buffer_skip(c->buffer, r);
1284         if(sldns_buffer_remaining(c->buffer) <= 0) {
1285                 tcp_callback_reader(c);
1286         }
1287         return 1;
1288 }
1289
1290 /** 
1291  * Handle tcp writing callback. 
1292  * @param fd: file descriptor of socket.
1293  * @param c: comm point to write buffer out of.
1294  * @return: 0 on error
1295  */
1296 static int
1297 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1298 {
1299         ssize_t r;
1300         log_assert(c->type == comm_tcp);
1301         if(c->tcp_is_reading && !c->ssl)
1302                 return 0;
1303         log_assert(fd != -1);
1304         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1305                 /* check for pending error from nonblocking connect */
1306                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1307                 int error = 0;
1308                 socklen_t len = (socklen_t)sizeof(error);
1309                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1310                         &len) < 0){
1311 #ifndef USE_WINSOCK
1312                         error = errno; /* on solaris errno is error */
1313 #else /* USE_WINSOCK */
1314                         error = WSAGetLastError();
1315 #endif
1316                 }
1317 #ifndef USE_WINSOCK
1318 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1319                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1320                         return 1; /* try again later */
1321                 else
1322 #endif
1323                 if(error != 0 && verbosity < 2)
1324                         return 0; /* silence lots of chatter in the logs */
1325                 else if(error != 0) {
1326                         log_err("tcp connect: %s", strerror(error));
1327 #else /* USE_WINSOCK */
1328                 /* examine error */
1329                 if(error == WSAEINPROGRESS)
1330                         return 1;
1331                 else if(error == WSAEWOULDBLOCK) {
1332                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1333                         return 1;
1334                 } else if(error != 0 && verbosity < 2)
1335                         return 0;
1336                 else if(error != 0) {
1337                         log_err("tcp connect: %s", wsa_strerror(error));
1338 #endif /* USE_WINSOCK */
1339                         log_addr(0, "remote address is", &c->repinfo.addr, 
1340                                 c->repinfo.addrlen);
1341                         return 0;
1342                 }
1343         }
1344         if(c->ssl)
1345                 return ssl_handle_it(c);
1346
1347         if(c->tcp_byte_count < sizeof(uint16_t)) {
1348                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1349 #ifdef HAVE_WRITEV
1350                 struct iovec iov[2];
1351                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1352                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1353                 iov[1].iov_base = sldns_buffer_begin(c->buffer);
1354                 iov[1].iov_len = sldns_buffer_limit(c->buffer);
1355                 log_assert(iov[0].iov_len > 0);
1356                 log_assert(iov[1].iov_len > 0);
1357                 r = writev(fd, iov, 2);
1358 #else /* HAVE_WRITEV */
1359                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1360                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1361 #endif /* HAVE_WRITEV */
1362                 if(r == -1) {
1363 #ifndef USE_WINSOCK
1364 #ifdef EPIPE
1365                         if(errno == EPIPE && verbosity < 2)
1366                                 return 0; /* silence 'broken pipe' */
1367 #endif
1368                         if(errno == EINTR || errno == EAGAIN)
1369                                 return 1;
1370                         log_err("tcp writev: %s", strerror(errno));
1371 #else
1372                         if(WSAGetLastError() == WSAENOTCONN)
1373                                 return 1;
1374                         if(WSAGetLastError() == WSAEINPROGRESS)
1375                                 return 1;
1376                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1377                                 winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1378                                 return 1; 
1379                         }
1380                         log_err("tcp send s: %s", 
1381                                 wsa_strerror(WSAGetLastError()));
1382 #endif
1383                         log_addr(0, "remote address is", &c->repinfo.addr,
1384                                 c->repinfo.addrlen);
1385                         return 0;
1386                 }
1387                 c->tcp_byte_count += r;
1388                 if(c->tcp_byte_count < sizeof(uint16_t))
1389                         return 1;
1390                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 
1391                         sizeof(uint16_t));
1392                 if(sldns_buffer_remaining(c->buffer) == 0) {
1393                         tcp_callback_writer(c);
1394                         return 1;
1395                 }
1396         }
1397         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1398         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
1399                 sldns_buffer_remaining(c->buffer), 0);
1400         if(r == -1) {
1401 #ifndef USE_WINSOCK
1402                 if(errno == EINTR || errno == EAGAIN)
1403                         return 1;
1404                 log_err("tcp send r: %s", strerror(errno));
1405 #else
1406                 if(WSAGetLastError() == WSAEINPROGRESS)
1407                         return 1;
1408                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1409                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1410                         return 1; 
1411                 }
1412                 log_err("tcp send r: %s", 
1413                         wsa_strerror(WSAGetLastError()));
1414 #endif
1415                 log_addr(0, "remote address is", &c->repinfo.addr,
1416                         c->repinfo.addrlen);
1417                 return 0;
1418         }
1419         sldns_buffer_skip(c->buffer, r);
1420
1421         if(sldns_buffer_remaining(c->buffer) == 0) {
1422                 tcp_callback_writer(c);
1423         }
1424         
1425         return 1;
1426 }
1427
1428 void 
1429 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1430 {
1431         struct comm_point* c = (struct comm_point*)arg;
1432         log_assert(c->type == comm_tcp);
1433         comm_base_now(c->ev->base);
1434
1435         if(event&EV_READ) {
1436                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1437                         reclaim_tcp_handler(c);
1438                         if(!c->tcp_do_close) {
1439                                 fptr_ok(fptr_whitelist_comm_point(
1440                                         c->callback));
1441                                 (void)(*c->callback)(c, c->cb_arg, 
1442                                         NETEVENT_CLOSED, NULL);
1443                         }
1444                 }
1445                 return;
1446         }
1447         if(event&EV_WRITE) {
1448                 if(!comm_point_tcp_handle_write(fd, c)) {
1449                         reclaim_tcp_handler(c);
1450                         if(!c->tcp_do_close) {
1451                                 fptr_ok(fptr_whitelist_comm_point(
1452                                         c->callback));
1453                                 (void)(*c->callback)(c, c->cb_arg, 
1454                                         NETEVENT_CLOSED, NULL);
1455                         }
1456                 }
1457                 return;
1458         }
1459         if(event&EV_TIMEOUT) {
1460                 verbose(VERB_QUERY, "tcp took too long, dropped");
1461                 reclaim_tcp_handler(c);
1462                 if(!c->tcp_do_close) {
1463                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1464                         (void)(*c->callback)(c, c->cb_arg,
1465                                 NETEVENT_TIMEOUT, NULL);
1466                 }
1467                 return;
1468         }
1469         log_err("Ignored event %d for tcphdl.", event);
1470 }
1471
1472 void comm_point_local_handle_callback(int fd, short event, void* arg)
1473 {
1474         struct comm_point* c = (struct comm_point*)arg;
1475         log_assert(c->type == comm_local);
1476         comm_base_now(c->ev->base);
1477
1478         if(event&EV_READ) {
1479                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
1480                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1481                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
1482                                 NULL);
1483                 }
1484                 return;
1485         }
1486         log_err("Ignored event %d for localhdl.", event);
1487 }
1488
1489 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
1490         short event, void* arg)
1491 {
1492         struct comm_point* c = (struct comm_point*)arg;
1493         int err = NETEVENT_NOERROR;
1494         log_assert(c->type == comm_raw);
1495         comm_base_now(c->ev->base);
1496         
1497         if(event&EV_TIMEOUT)
1498                 err = NETEVENT_TIMEOUT;
1499         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1500         (void)(*c->callback)(c, c->cb_arg, err, NULL);
1501 }
1502
1503 struct comm_point* 
1504 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
1505         comm_point_callback_t* callback, void* callback_arg)
1506 {
1507         struct comm_point* c = (struct comm_point*)calloc(1,
1508                 sizeof(struct comm_point));
1509         short evbits;
1510         if(!c)
1511                 return NULL;
1512         c->ev = (struct internal_event*)calloc(1,
1513                 sizeof(struct internal_event));
1514         if(!c->ev) {
1515                 free(c);
1516                 return NULL;
1517         }
1518         c->ev->base = base;
1519         c->fd = fd;
1520         c->buffer = buffer;
1521         c->timeout = NULL;
1522         c->tcp_is_reading = 0;
1523         c->tcp_byte_count = 0;
1524         c->tcp_parent = NULL;
1525         c->max_tcp_count = 0;
1526         c->tcp_handlers = NULL;
1527         c->tcp_free = NULL;
1528         c->type = comm_udp;
1529         c->tcp_do_close = 0;
1530         c->do_not_close = 0;
1531         c->tcp_do_toggle_rw = 0;
1532         c->tcp_check_nb_connect = 0;
1533         c->inuse = 0;
1534         c->callback = callback;
1535         c->cb_arg = callback_arg;
1536         evbits = EV_READ | EV_PERSIST;
1537         /* libevent stuff */
1538         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1539         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1540                 log_err("could not baseset udp event");
1541                 comm_point_delete(c);
1542                 return NULL;
1543         }
1544         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1545                 log_err("could not add udp event");
1546                 comm_point_delete(c);
1547                 return NULL;
1548         }
1549         return c;
1550 }
1551
1552 struct comm_point* 
1553 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
1554         sldns_buffer* buffer, 
1555         comm_point_callback_t* callback, void* callback_arg)
1556 {
1557         struct comm_point* c = (struct comm_point*)calloc(1,
1558                 sizeof(struct comm_point));
1559         short evbits;
1560         if(!c)
1561                 return NULL;
1562         c->ev = (struct internal_event*)calloc(1,
1563                 sizeof(struct internal_event));
1564         if(!c->ev) {
1565                 free(c);
1566                 return NULL;
1567         }
1568         c->ev->base = base;
1569         c->fd = fd;
1570         c->buffer = buffer;
1571         c->timeout = NULL;
1572         c->tcp_is_reading = 0;
1573         c->tcp_byte_count = 0;
1574         c->tcp_parent = NULL;
1575         c->max_tcp_count = 0;
1576         c->tcp_handlers = NULL;
1577         c->tcp_free = NULL;
1578         c->type = comm_udp;
1579         c->tcp_do_close = 0;
1580         c->do_not_close = 0;
1581         c->inuse = 0;
1582         c->tcp_do_toggle_rw = 0;
1583         c->tcp_check_nb_connect = 0;
1584         c->callback = callback;
1585         c->cb_arg = callback_arg;
1586         evbits = EV_READ | EV_PERSIST;
1587         /* libevent stuff */
1588         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1589         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1590                 log_err("could not baseset udp event");
1591                 comm_point_delete(c);
1592                 return NULL;
1593         }
1594         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1595                 log_err("could not add udp event");
1596                 comm_point_delete(c);
1597                 return NULL;
1598         }
1599         return c;
1600 }
1601
1602 static struct comm_point* 
1603 comm_point_create_tcp_handler(struct comm_base *base, 
1604         struct comm_point* parent, size_t bufsize,
1605         comm_point_callback_t* callback, void* callback_arg)
1606 {
1607         struct comm_point* c = (struct comm_point*)calloc(1,
1608                 sizeof(struct comm_point));
1609         short evbits;
1610         if(!c)
1611                 return NULL;
1612         c->ev = (struct internal_event*)calloc(1,
1613                 sizeof(struct internal_event));
1614         if(!c->ev) {
1615                 free(c);
1616                 return NULL;
1617         }
1618         c->ev->base = base;
1619         c->fd = -1;
1620         c->buffer = sldns_buffer_new(bufsize);
1621         if(!c->buffer) {
1622                 free(c->ev);
1623                 free(c);
1624                 return NULL;
1625         }
1626         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1627         if(!c->timeout) {
1628                 sldns_buffer_free(c->buffer);
1629                 free(c->ev);
1630                 free(c);
1631                 return NULL;
1632         }
1633         c->tcp_is_reading = 0;
1634         c->tcp_byte_count = 0;
1635         c->tcp_parent = parent;
1636         c->max_tcp_count = 0;
1637         c->tcp_handlers = NULL;
1638         c->tcp_free = NULL;
1639         c->type = comm_tcp;
1640         c->tcp_do_close = 0;
1641         c->do_not_close = 0;
1642         c->tcp_do_toggle_rw = 1;
1643         c->tcp_check_nb_connect = 0;
1644         c->repinfo.c = c;
1645         c->callback = callback;
1646         c->cb_arg = callback_arg;
1647         /* add to parent free list */
1648         c->tcp_free = parent->tcp_free;
1649         parent->tcp_free = c;
1650         /* libevent stuff */
1651         evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1652         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1653         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1654         {
1655                 log_err("could not basetset tcphdl event");
1656                 parent->tcp_free = c->tcp_free;
1657                 free(c->ev);
1658                 free(c);
1659                 return NULL;
1660         }
1661         return c;
1662 }
1663
1664 struct comm_point* 
1665 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1666         comm_point_callback_t* callback, void* callback_arg)
1667 {
1668         struct comm_point* c = (struct comm_point*)calloc(1,
1669                 sizeof(struct comm_point));
1670         short evbits;
1671         int i;
1672         /* first allocate the TCP accept listener */
1673         if(!c)
1674                 return NULL;
1675         c->ev = (struct internal_event*)calloc(1,
1676                 sizeof(struct internal_event));
1677         if(!c->ev) {
1678                 free(c);
1679                 return NULL;
1680         }
1681         c->ev->base = base;
1682         c->fd = fd;
1683         c->buffer = NULL;
1684         c->timeout = NULL;
1685         c->tcp_is_reading = 0;
1686         c->tcp_byte_count = 0;
1687         c->tcp_parent = NULL;
1688         c->max_tcp_count = num;
1689         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1690                 sizeof(struct comm_point*));
1691         if(!c->tcp_handlers) {
1692                 free(c->ev);
1693                 free(c);
1694                 return NULL;
1695         }
1696         c->tcp_free = NULL;
1697         c->type = comm_tcp_accept;
1698         c->tcp_do_close = 0;
1699         c->do_not_close = 0;
1700         c->tcp_do_toggle_rw = 0;
1701         c->tcp_check_nb_connect = 0;
1702         c->callback = NULL;
1703         c->cb_arg = NULL;
1704         evbits = EV_READ | EV_PERSIST;
1705         /* libevent stuff */
1706         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1707         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1708                 event_add(&c->ev->ev, c->timeout) != 0 )
1709         {
1710                 log_err("could not add tcpacc event");
1711                 comm_point_delete(c);
1712                 return NULL;
1713         }
1714
1715         /* now prealloc the tcp handlers */
1716         for(i=0; i<num; i++) {
1717                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1718                         c, bufsize, callback, callback_arg);
1719                 if(!c->tcp_handlers[i]) {
1720                         comm_point_delete(c);
1721                         return NULL;
1722                 }
1723         }
1724         
1725         return c;
1726 }
1727
1728 struct comm_point* 
1729 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1730         comm_point_callback_t* callback, void* callback_arg)
1731 {
1732         struct comm_point* c = (struct comm_point*)calloc(1,
1733                 sizeof(struct comm_point));
1734         short evbits;
1735         if(!c)
1736                 return NULL;
1737         c->ev = (struct internal_event*)calloc(1,
1738                 sizeof(struct internal_event));
1739         if(!c->ev) {
1740                 free(c);
1741                 return NULL;
1742         }
1743         c->ev->base = base;
1744         c->fd = -1;
1745         c->buffer = sldns_buffer_new(bufsize);
1746         if(!c->buffer) {
1747                 free(c->ev);
1748                 free(c);
1749                 return NULL;
1750         }
1751         c->timeout = NULL;
1752         c->tcp_is_reading = 0;
1753         c->tcp_byte_count = 0;
1754         c->tcp_parent = NULL;
1755         c->max_tcp_count = 0;
1756         c->tcp_handlers = NULL;
1757         c->tcp_free = NULL;
1758         c->type = comm_tcp;
1759         c->tcp_do_close = 0;
1760         c->do_not_close = 0;
1761         c->tcp_do_toggle_rw = 1;
1762         c->tcp_check_nb_connect = 1;
1763         c->repinfo.c = c;
1764         c->callback = callback;
1765         c->cb_arg = callback_arg;
1766         evbits = EV_PERSIST | EV_WRITE;
1767         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1768         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1769         {
1770                 log_err("could not basetset tcpout event");
1771                 sldns_buffer_free(c->buffer);
1772                 free(c->ev);
1773                 free(c);
1774                 return NULL;
1775         }
1776
1777         return c;
1778 }
1779
1780 struct comm_point* 
1781 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1782         comm_point_callback_t* callback, void* callback_arg)
1783 {
1784         struct comm_point* c = (struct comm_point*)calloc(1,
1785                 sizeof(struct comm_point));
1786         short evbits;
1787         if(!c)
1788                 return NULL;
1789         c->ev = (struct internal_event*)calloc(1,
1790                 sizeof(struct internal_event));
1791         if(!c->ev) {
1792                 free(c);
1793                 return NULL;
1794         }
1795         c->ev->base = base;
1796         c->fd = fd;
1797         c->buffer = sldns_buffer_new(bufsize);
1798         if(!c->buffer) {
1799                 free(c->ev);
1800                 free(c);
1801                 return NULL;
1802         }
1803         c->timeout = NULL;
1804         c->tcp_is_reading = 1;
1805         c->tcp_byte_count = 0;
1806         c->tcp_parent = NULL;
1807         c->max_tcp_count = 0;
1808         c->tcp_handlers = NULL;
1809         c->tcp_free = NULL;
1810         c->type = comm_local;
1811         c->tcp_do_close = 0;
1812         c->do_not_close = 1;
1813         c->tcp_do_toggle_rw = 0;
1814         c->tcp_check_nb_connect = 0;
1815         c->callback = callback;
1816         c->cb_arg = callback_arg;
1817         /* libevent stuff */
1818         evbits = EV_PERSIST | EV_READ;
1819         event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback, 
1820                 c);
1821         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1822                 event_add(&c->ev->ev, c->timeout) != 0 )
1823         {
1824                 log_err("could not add localhdl event");
1825                 free(c->ev);
1826                 free(c);
1827                 return NULL;
1828         }
1829         return c;
1830 }
1831
1832 struct comm_point* 
1833 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
1834         comm_point_callback_t* callback, void* callback_arg)
1835 {
1836         struct comm_point* c = (struct comm_point*)calloc(1,
1837                 sizeof(struct comm_point));
1838         short evbits;
1839         if(!c)
1840                 return NULL;
1841         c->ev = (struct internal_event*)calloc(1,
1842                 sizeof(struct internal_event));
1843         if(!c->ev) {
1844                 free(c);
1845                 return NULL;
1846         }
1847         c->ev->base = base;
1848         c->fd = fd;
1849         c->buffer = NULL;
1850         c->timeout = NULL;
1851         c->tcp_is_reading = 0;
1852         c->tcp_byte_count = 0;
1853         c->tcp_parent = NULL;
1854         c->max_tcp_count = 0;
1855         c->tcp_handlers = NULL;
1856         c->tcp_free = NULL;
1857         c->type = comm_raw;
1858         c->tcp_do_close = 0;
1859         c->do_not_close = 1;
1860         c->tcp_do_toggle_rw = 0;
1861         c->tcp_check_nb_connect = 0;
1862         c->callback = callback;
1863         c->cb_arg = callback_arg;
1864         /* libevent stuff */
1865         if(writing)
1866                 evbits = EV_PERSIST | EV_WRITE;
1867         else    evbits = EV_PERSIST | EV_READ;
1868         event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback, 
1869                 c);
1870         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1871                 event_add(&c->ev->ev, c->timeout) != 0 )
1872         {
1873                 log_err("could not add rawhdl event");
1874                 free(c->ev);
1875                 free(c);
1876                 return NULL;
1877         }
1878         return c;
1879 }
1880
1881 void 
1882 comm_point_close(struct comm_point* c)
1883 {
1884         if(!c)
1885                 return;
1886         if(c->fd != -1)
1887                 if(event_del(&c->ev->ev) != 0) {
1888                         log_err("could not event_del on close");
1889                 }
1890         /* close fd after removing from event lists, or epoll.. is messed up */
1891         if(c->fd != -1 && !c->do_not_close) {
1892                 verbose(VERB_ALGO, "close fd %d", c->fd);
1893 #ifndef USE_WINSOCK
1894                 close(c->fd);
1895 #else
1896                 closesocket(c->fd);
1897 #endif
1898         }
1899         c->fd = -1;
1900 }
1901
1902 void 
1903 comm_point_delete(struct comm_point* c)
1904 {
1905         if(!c) 
1906                 return;
1907         if(c->type == comm_tcp && c->ssl) {
1908 #ifdef HAVE_SSL
1909                 SSL_shutdown(c->ssl);
1910                 SSL_free(c->ssl);
1911 #endif
1912         }
1913         comm_point_close(c);
1914         if(c->tcp_handlers) {
1915                 int i;
1916                 for(i=0; i<c->max_tcp_count; i++)
1917                         comm_point_delete(c->tcp_handlers[i]);
1918                 free(c->tcp_handlers);
1919         }
1920         free(c->timeout);
1921         if(c->type == comm_tcp || c->type == comm_local)
1922                 sldns_buffer_free(c->buffer);
1923         free(c->ev);
1924         free(c);
1925 }
1926
1927 void 
1928 comm_point_send_reply(struct comm_reply *repinfo)
1929 {
1930         log_assert(repinfo && repinfo->c);
1931         if(repinfo->c->type == comm_udp) {
1932                 if(repinfo->srctype)
1933                         comm_point_send_udp_msg_if(repinfo->c, 
1934                         repinfo->c->buffer, (struct sockaddr*)&repinfo->addr, 
1935                         repinfo->addrlen, repinfo);
1936                 else
1937                         comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
1938                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
1939         } else {
1940                 comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
1941         }
1942 }
1943
1944 void 
1945 comm_point_drop_reply(struct comm_reply* repinfo)
1946 {
1947         if(!repinfo)
1948                 return;
1949         log_assert(repinfo && repinfo->c);
1950         log_assert(repinfo->c->type != comm_tcp_accept);
1951         if(repinfo->c->type == comm_udp)
1952                 return;
1953         reclaim_tcp_handler(repinfo->c);
1954 }
1955
1956 void 
1957 comm_point_stop_listening(struct comm_point* c)
1958 {
1959         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
1960         if(event_del(&c->ev->ev) != 0) {
1961                 log_err("event_del error to stoplisten");
1962         }
1963 }
1964
1965 void 
1966 comm_point_start_listening(struct comm_point* c, int newfd, int sec)
1967 {
1968         verbose(VERB_ALGO, "comm point start listening %d", 
1969                 c->fd==-1?newfd:c->fd);
1970         if(c->type == comm_tcp_accept && !c->tcp_free) {
1971                 /* no use to start listening no free slots. */
1972                 return;
1973         }
1974         if(sec != -1 && sec != 0) {
1975                 if(!c->timeout) {
1976                         c->timeout = (struct timeval*)malloc(sizeof(
1977                                 struct timeval));
1978                         if(!c->timeout) {
1979                                 log_err("cpsl: malloc failed. No net read.");
1980                                 return;
1981                         }
1982                 }
1983                 c->ev->ev.ev_events |= EV_TIMEOUT;
1984 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
1985                 c->timeout->tv_sec = sec;
1986                 c->timeout->tv_usec = 0;
1987 #endif /* S_SPLINT_S */
1988         }
1989         if(c->type == comm_tcp) {
1990                 c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1991                 if(c->tcp_is_reading)
1992                         c->ev->ev.ev_events |= EV_READ;
1993                 else    c->ev->ev.ev_events |= EV_WRITE;
1994         }
1995         if(newfd != -1) {
1996                 if(c->fd != -1) {
1997 #ifndef USE_WINSOCK
1998                         close(c->fd);
1999 #else
2000                         closesocket(c->fd);
2001 #endif
2002                 }
2003                 c->fd = newfd;
2004                 c->ev->ev.ev_fd = c->fd;
2005         }
2006         if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
2007                 log_err("event_add failed. in cpsl.");
2008         }
2009 }
2010
2011 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
2012 {
2013         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
2014         if(event_del(&c->ev->ev) != 0) {
2015                 log_err("event_del error to cplf");
2016         }
2017         c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
2018         if(rd) c->ev->ev.ev_events |= EV_READ;
2019         if(wr) c->ev->ev.ev_events |= EV_WRITE;
2020         if(event_add(&c->ev->ev, c->timeout) != 0) {
2021                 log_err("event_add failed. in cplf.");
2022         }
2023 }
2024
2025 size_t comm_point_get_mem(struct comm_point* c)
2026 {
2027         size_t s;
2028         if(!c) 
2029                 return 0;
2030         s = sizeof(*c) + sizeof(*c->ev);
2031         if(c->timeout) 
2032                 s += sizeof(*c->timeout);
2033         if(c->type == comm_tcp || c->type == comm_local)
2034                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
2035         if(c->type == comm_tcp_accept) {
2036                 int i;
2037                 for(i=0; i<c->max_tcp_count; i++)
2038                         s += comm_point_get_mem(c->tcp_handlers[i]);
2039         }
2040         return s;
2041 }
2042
2043 struct comm_timer* 
2044 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2045 {
2046         struct comm_timer *tm = (struct comm_timer*)calloc(1,
2047                 sizeof(struct comm_timer));
2048         if(!tm)
2049                 return NULL;
2050         tm->ev_timer = (struct internal_timer*)calloc(1,
2051                 sizeof(struct internal_timer));
2052         if(!tm->ev_timer) {
2053                 log_err("malloc failed");
2054                 free(tm);
2055                 return NULL;
2056         }
2057         tm->ev_timer->base = base;
2058         tm->callback = cb;
2059         tm->cb_arg = cb_arg;
2060         event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT, 
2061                 comm_timer_callback, tm);
2062         if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2063                 log_err("timer_create: event_base_set failed.");
2064                 free(tm->ev_timer);
2065                 free(tm);
2066                 return NULL;
2067         }
2068         return tm;
2069 }
2070
2071 void 
2072 comm_timer_disable(struct comm_timer* timer)
2073 {
2074         if(!timer)
2075                 return;
2076         evtimer_del(&timer->ev_timer->ev);
2077         timer->ev_timer->enabled = 0;
2078 }
2079
2080 void 
2081 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2082 {
2083         log_assert(tv);
2084         if(timer->ev_timer->enabled)
2085                 comm_timer_disable(timer);
2086         event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2087                 comm_timer_callback, timer);
2088         if(event_base_set(timer->ev_timer->base->eb->base, 
2089                 &timer->ev_timer->ev) != 0)
2090                 log_err("comm_timer_set: set_base failed.");
2091         if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2092                 log_err("comm_timer_set: evtimer_add failed.");
2093         timer->ev_timer->enabled = 1;
2094 }
2095
2096 void 
2097 comm_timer_delete(struct comm_timer* timer)
2098 {
2099         if(!timer)
2100                 return;
2101         comm_timer_disable(timer);
2102         free(timer->ev_timer);
2103         free(timer);
2104 }
2105
2106 void 
2107 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2108 {
2109         struct comm_timer* tm = (struct comm_timer*)arg;
2110         if(!(event&EV_TIMEOUT))
2111                 return;
2112         comm_base_now(tm->ev_timer->base);
2113         tm->ev_timer->enabled = 0;
2114         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2115         (*tm->callback)(tm->cb_arg);
2116 }
2117
2118 int 
2119 comm_timer_is_set(struct comm_timer* timer)
2120 {
2121         return (int)timer->ev_timer->enabled;
2122 }
2123
2124 size_t 
2125 comm_timer_get_mem(struct comm_timer* timer)
2126 {
2127         return sizeof(*timer) + sizeof(struct internal_timer);
2128 }
2129
2130 struct comm_signal* 
2131 comm_signal_create(struct comm_base* base,
2132         void (*callback)(int, void*), void* cb_arg)
2133 {
2134         struct comm_signal* com = (struct comm_signal*)malloc(
2135                 sizeof(struct comm_signal));
2136         if(!com) {
2137                 log_err("malloc failed");
2138                 return NULL;
2139         }
2140         com->base = base;
2141         com->callback = callback;
2142         com->cb_arg = cb_arg;
2143         com->ev_signal = NULL;
2144         return com;
2145 }
2146
2147 void 
2148 comm_signal_callback(int sig, short event, void* arg)
2149 {
2150         struct comm_signal* comsig = (struct comm_signal*)arg;
2151         if(!(event & EV_SIGNAL))
2152                 return;
2153         comm_base_now(comsig->base);
2154         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2155         (*comsig->callback)(sig, comsig->cb_arg);
2156 }
2157
2158 int 
2159 comm_signal_bind(struct comm_signal* comsig, int sig)
2160 {
2161         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
2162                 sizeof(struct internal_signal));
2163         if(!entry) {
2164                 log_err("malloc failed");
2165                 return 0;
2166         }
2167         log_assert(comsig);
2168         /* add signal event */
2169         signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2170         if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2171                 log_err("Could not set signal base");
2172                 free(entry);
2173                 return 0;
2174         }
2175         if(signal_add(&entry->ev, NULL) != 0) {
2176                 log_err("Could not add signal handler");
2177                 free(entry);
2178                 return 0;
2179         }
2180         /* link into list */
2181         entry->next = comsig->ev_signal;
2182         comsig->ev_signal = entry;
2183         return 1;
2184 }
2185
2186 void 
2187 comm_signal_delete(struct comm_signal* comsig)
2188 {
2189         struct internal_signal* p, *np;
2190         if(!comsig)
2191                 return;
2192         p=comsig->ev_signal;
2193         while(p) {
2194                 np = p->next;
2195                 signal_del(&p->ev);
2196                 free(p);
2197                 p = np;
2198         }
2199         free(comsig);
2200 }