]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - util/netevent.c
import unbound 1.4.17
[FreeBSD/FreeBSD.git] / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include <ldns/wire2host.h>
43 #include "util/netevent.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/fptr_wlist.h"
47 #include <openssl/ssl.h>
48 #include <openssl/err.h>
49
50 /* -------- Start of local definitions -------- */
51 /** if CMSG_ALIGN is not defined on this platform, a workaround */
52 #ifndef CMSG_ALIGN
53 #  ifdef _CMSG_DATA_ALIGN
54 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
55 #  else
56 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
57 #  endif
58 #endif
59
60 /** if CMSG_LEN is not defined on this platform, a workaround */
61 #ifndef CMSG_LEN
62 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
63 #endif
64
65 /** if CMSG_SPACE is not defined on this platform, a workaround */
66 #ifndef CMSG_SPACE
67 #  ifdef _CMSG_HDR_ALIGN
68 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
69 #  else
70 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
71 #  endif
72 #endif
73
74 /** The TCP reading or writing query timeout in seconds */
75 #define TCP_QUERY_TIMEOUT 120 
76
77 #ifndef NONBLOCKING_IS_BROKEN
78 /** number of UDP reads to perform per read indication from select */
79 #define NUM_UDP_PER_SELECT 100
80 #else
81 #define NUM_UDP_PER_SELECT 1
82 #endif
83
84 /* We define libevent structures here to hide the libevent stuff. */
85
86 #ifdef USE_MINI_EVENT
87 #  ifdef USE_WINSOCK
88 #    include "util/winsock_event.h"
89 #  else
90 #    include "util/mini_event.h"
91 #  endif /* USE_WINSOCK */
92 #else /* USE_MINI_EVENT */
93    /* we use libevent */
94 #  include <event.h>
95 #endif /* USE_MINI_EVENT */
96
97 /**
98  * The internal event structure for keeping libevent info for the event.
99  * Possibly other structures (list, tree) this is part of.
100  */
101 struct internal_event {
102         /** the comm base */
103         struct comm_base* base;
104         /** libevent event type, alloced here */
105         struct event ev;
106 };
107
108 /**
109  * Internal base structure, so that every thread has its own events.
110  */
111 struct internal_base {
112         /** libevent event_base type. */
113         struct event_base* base;
114         /** seconds time pointer points here */
115         uint32_t secs;
116         /** timeval with current time */
117         struct timeval now;
118         /** the event used for slow_accept timeouts */
119         struct event slow_accept;
120         /** true if slow_accept is enabled */
121         int slow_accept_enabled;
122 };
123
124 /**
125  * Internal timer structure, to store timer event in.
126  */
127 struct internal_timer {
128         /** the comm base */
129         struct comm_base* base;
130         /** libevent event type, alloced here */
131         struct event ev;
132         /** is timer enabled */
133         uint8_t enabled;
134 };
135
136 /**
137  * Internal signal structure, to store signal event in.
138  */
139 struct internal_signal {
140         /** libevent event type, alloced here */
141         struct event ev;
142         /** next in signal list */
143         struct internal_signal* next;
144 };
145
146 /** create a tcp handler with a parent */
147 static struct comm_point* comm_point_create_tcp_handler(
148         struct comm_base *base, struct comm_point* parent, size_t bufsize,
149         comm_point_callback_t* callback, void* callback_arg);
150
151 /* -------- End of local definitions -------- */
152
153 #ifdef USE_MINI_EVENT
154 /** minievent updates the time when it blocks. */
155 #define comm_base_now(x) /* nothing to do */
156 #else /* !USE_MINI_EVENT */
157 /** fillup the time values in the event base */
158 static void
159 comm_base_now(struct comm_base* b)
160 {
161         if(gettimeofday(&b->eb->now, NULL) < 0) {
162                 log_err("gettimeofday: %s", strerror(errno));
163         }
164         b->eb->secs = (uint32_t)b->eb->now.tv_sec;
165 }
166 #endif /* USE_MINI_EVENT */
167
168 struct comm_base* 
169 comm_base_create(int sigs)
170 {
171         struct comm_base* b = (struct comm_base*)calloc(1,
172                 sizeof(struct comm_base));
173         if(!b)
174                 return NULL;
175         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
176         if(!b->eb) {
177                 free(b);
178                 return NULL;
179         }
180 #ifdef USE_MINI_EVENT
181         (void)sigs;
182         /* use mini event time-sharing feature */
183         b->eb->base = event_init(&b->eb->secs, &b->eb->now);
184 #else
185 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
186         /* libev */
187         if(sigs)
188                 b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
189         else
190                 b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
191 #  else
192         (void)sigs;
193 #    ifdef HAVE_EVENT_BASE_NEW
194         b->eb->base = event_base_new();
195 #    else
196         b->eb->base = event_init();
197 #    endif
198 #  endif
199 #endif
200         if(!b->eb->base) {
201                 free(b->eb);
202                 free(b);
203                 return NULL;
204         }
205         comm_base_now(b);
206         /* avoid event_get_method call which causes crashes even when
207          * not printing, because its result is passed */
208         verbose(VERB_ALGO, 
209 #if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
210                 "libev"
211 #elif defined(USE_MINI_EVENT)
212                 "event "
213 #else
214                 "libevent "
215 #endif
216                 "%s uses %s method.", 
217                 event_get_version(), 
218 #ifdef HAVE_EVENT_BASE_GET_METHOD
219                 event_base_get_method(b->eb->base)
220 #else
221                 "not_obtainable"
222 #endif
223         );
224         return b;
225 }
226
227 void 
228 comm_base_delete(struct comm_base* b)
229 {
230         if(!b)
231                 return;
232         if(b->eb->slow_accept_enabled) {
233                 if(event_del(&b->eb->slow_accept) != 0) {
234                         log_err("could not event_del slow_accept");
235                 }
236         }
237 #ifdef USE_MINI_EVENT
238         event_base_free(b->eb->base);
239 #elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
240         /* only libevent 1.2+ has it, but in 1.2 it is broken - 
241            assertion fails on signal handling ev that is not deleted
242            in libevent 1.3c (event_base_once appears) this is fixed. */
243         event_base_free(b->eb->base);
244 #endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
245         b->eb->base = NULL;
246         free(b->eb);
247         free(b);
248 }
249
250 void 
251 comm_base_timept(struct comm_base* b, uint32_t** tt, struct timeval** tv)
252 {
253         *tt = &b->eb->secs;
254         *tv = &b->eb->now;
255 }
256
257 void 
258 comm_base_dispatch(struct comm_base* b)
259 {
260         int retval;
261         retval = event_base_dispatch(b->eb->base);
262         if(retval != 0) {
263                 fatal_exit("event_dispatch returned error %d, "
264                         "errno is %s", retval, strerror(errno));
265         }
266 }
267
268 void comm_base_exit(struct comm_base* b)
269 {
270         if(event_base_loopexit(b->eb->base, NULL) != 0) {
271                 log_err("Could not loopexit");
272         }
273 }
274
275 void comm_base_set_slow_accept_handlers(struct comm_base* b,
276         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
277 {
278         b->stop_accept = stop_acc;
279         b->start_accept = start_acc;
280         b->cb_arg = arg;
281 }
282
283 struct event_base* comm_base_internal(struct comm_base* b)
284 {
285         return b->eb->base;
286 }
287
288 /** see if errno for udp has to be logged or not uses globals */
289 static int
290 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
291 {
292         /* do not log transient errors (unless high verbosity) */
293 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
294         switch(errno) {
295 #  ifdef ENETUNREACH
296                 case ENETUNREACH:
297 #  endif
298 #  ifdef EHOSTDOWN
299                 case EHOSTDOWN:
300 #  endif
301 #  ifdef EHOSTUNREACH
302                 case EHOSTUNREACH:
303 #  endif
304 #  ifdef ENETDOWN
305                 case ENETDOWN:
306 #  endif
307                         if(verbosity < VERB_ALGO)
308                                 return 0;
309                 default:
310                         break;
311         }
312 #endif
313         /* squelch errors where people deploy AAAA ::ffff:bla for
314          * authority servers, which we try for intranets. */
315         if(errno == EINVAL && addr_is_ip4mapped(
316                 (struct sockaddr_storage*)addr, addrlen) &&
317                 verbosity < VERB_DETAIL)
318                 return 0;
319         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
320          * but a dns cache does not need it. */
321         if(errno == EACCES && addr_is_broadcast(
322                 (struct sockaddr_storage*)addr, addrlen) &&
323                 verbosity < VERB_DETAIL)
324                 return 0;
325         return 1;
326 }
327
328 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
329 {
330         return udp_send_errno_needs_log(addr, addrlen);
331 }
332
333 /* send a UDP reply */
334 int
335 comm_point_send_udp_msg(struct comm_point *c, ldns_buffer* packet,
336         struct sockaddr* addr, socklen_t addrlen) 
337 {
338         ssize_t sent;
339         log_assert(c->fd != -1);
340 #ifdef UNBOUND_DEBUG
341         if(ldns_buffer_remaining(packet) == 0)
342                 log_err("error: send empty UDP packet");
343 #endif
344         log_assert(addr && addrlen > 0);
345         sent = sendto(c->fd, (void*)ldns_buffer_begin(packet), 
346                 ldns_buffer_remaining(packet), 0,
347                 addr, addrlen);
348         if(sent == -1) {
349                 if(!udp_send_errno_needs_log(addr, addrlen))
350                         return 0;
351 #ifndef USE_WINSOCK
352                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
353 #else
354                 verbose(VERB_OPS, "sendto failed: %s", 
355                         wsa_strerror(WSAGetLastError()));
356 #endif
357                 log_addr(VERB_OPS, "remote address is", 
358                         (struct sockaddr_storage*)addr, addrlen);
359                 return 0;
360         } else if((size_t)sent != ldns_buffer_remaining(packet)) {
361                 log_err("sent %d in place of %d bytes", 
362                         (int)sent, (int)ldns_buffer_remaining(packet));
363                 return 0;
364         }
365         return 1;
366 }
367
368 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
369 /** print debug ancillary info */
370 static void p_ancil(const char* str, struct comm_reply* r)
371 {
372         if(r->srctype != 4 && r->srctype != 6) {
373                 log_info("%s: unknown srctype %d", str, r->srctype);
374                 return;
375         }
376         if(r->srctype == 6) {
377                 char buf[1024];
378                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
379                         buf, (socklen_t)sizeof(buf)) == 0) {
380                         strncpy(buf, "(inet_ntop error)", sizeof(buf));
381                 }
382                 buf[sizeof(buf)-1]=0;
383                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
384         } else if(r->srctype == 4) {
385 #ifdef IP_PKTINFO
386                 char buf1[1024], buf2[1024];
387                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
388                         buf1, (socklen_t)sizeof(buf1)) == 0) {
389                         strncpy(buf1, "(inet_ntop error)", sizeof(buf1));
390                 }
391                 buf1[sizeof(buf1)-1]=0;
392 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
393                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
394                         buf2, (socklen_t)sizeof(buf2)) == 0) {
395                         strncpy(buf2, "(inet_ntop error)", sizeof(buf2));
396                 }
397                 buf2[sizeof(buf2)-1]=0;
398 #else
399                 buf2[0]=0;
400 #endif
401                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
402                         buf1, buf2);
403 #elif defined(IP_RECVDSTADDR)
404                 char buf1[1024];
405                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
406                         buf1, (socklen_t)sizeof(buf1)) == 0) {
407                         strncpy(buf1, "(inet_ntop error)", sizeof(buf1));
408                 }
409                 buf1[sizeof(buf1)-1]=0;
410                 log_info("%s: %s", str, buf1);
411 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
412         }
413 }
414 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
415
416 /** send a UDP reply over specified interface*/
417 static int
418 comm_point_send_udp_msg_if(struct comm_point *c, ldns_buffer* packet,
419         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
420 {
421 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
422         ssize_t sent;
423         struct msghdr msg;
424         struct iovec iov[1];
425         char control[256];
426 #ifndef S_SPLINT_S
427         struct cmsghdr *cmsg;
428 #endif /* S_SPLINT_S */
429
430         log_assert(c->fd != -1);
431 #ifdef UNBOUND_DEBUG
432         if(ldns_buffer_remaining(packet) == 0)
433                 log_err("error: send empty UDP packet");
434 #endif
435         log_assert(addr && addrlen > 0);
436
437         msg.msg_name = addr;
438         msg.msg_namelen = addrlen;
439         iov[0].iov_base = ldns_buffer_begin(packet);
440         iov[0].iov_len = ldns_buffer_remaining(packet);
441         msg.msg_iov = iov;
442         msg.msg_iovlen = 1;
443         msg.msg_control = control;
444 #ifndef S_SPLINT_S
445         msg.msg_controllen = sizeof(control);
446 #endif /* S_SPLINT_S */
447         msg.msg_flags = 0;
448
449 #ifndef S_SPLINT_S
450         cmsg = CMSG_FIRSTHDR(&msg);
451         if(r->srctype == 4) {
452 #ifdef IP_PKTINFO
453                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
454                 log_assert(msg.msg_controllen <= sizeof(control));
455                 cmsg->cmsg_level = IPPROTO_IP;
456                 cmsg->cmsg_type = IP_PKTINFO;
457                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
458                         sizeof(struct in_pktinfo));
459                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
460 #elif defined(IP_SENDSRCADDR)
461                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
462                 log_assert(msg.msg_controllen <= sizeof(control));
463                 cmsg->cmsg_level = IPPROTO_IP;
464                 cmsg->cmsg_type = IP_SENDSRCADDR;
465                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
466                         sizeof(struct in_addr));
467                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
468 #else
469                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
470                 msg.msg_control = NULL;
471 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
472         } else if(r->srctype == 6) {
473                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
474                 log_assert(msg.msg_controllen <= sizeof(control));
475                 cmsg->cmsg_level = IPPROTO_IPV6;
476                 cmsg->cmsg_type = IPV6_PKTINFO;
477                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
478                         sizeof(struct in6_pktinfo));
479                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
480         } else {
481                 /* try to pass all 0 to use default route */
482                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
483                 log_assert(msg.msg_controllen <= sizeof(control));
484                 cmsg->cmsg_level = IPPROTO_IPV6;
485                 cmsg->cmsg_type = IPV6_PKTINFO;
486                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
487                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
488         }
489 #endif /* S_SPLINT_S */
490         if(verbosity >= VERB_ALGO)
491                 p_ancil("send_udp over interface", r);
492         sent = sendmsg(c->fd, &msg, 0);
493         if(sent == -1) {
494                 if(!udp_send_errno_needs_log(addr, addrlen))
495                         return 0;
496                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
497                 log_addr(VERB_OPS, "remote address is", 
498                         (struct sockaddr_storage*)addr, addrlen);
499                 return 0;
500         } else if((size_t)sent != ldns_buffer_remaining(packet)) {
501                 log_err("sent %d in place of %d bytes", 
502                         (int)sent, (int)ldns_buffer_remaining(packet));
503                 return 0;
504         }
505         return 1;
506 #else
507         (void)c;
508         (void)packet;
509         (void)addr;
510         (void)addrlen;
511         (void)r;
512         log_err("sendmsg: IPV6_PKTINFO not supported");
513         return 0;
514 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
515 }
516
517 void 
518 comm_point_udp_ancil_callback(int fd, short event, void* arg)
519 {
520 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
521         struct comm_reply rep;
522         struct msghdr msg;
523         struct iovec iov[1];
524         ssize_t rcv;
525         char ancil[256];
526         int i;
527 #ifndef S_SPLINT_S
528         struct cmsghdr* cmsg;
529 #endif /* S_SPLINT_S */
530
531         rep.c = (struct comm_point*)arg;
532         log_assert(rep.c->type == comm_udp);
533
534         if(!(event&EV_READ))
535                 return;
536         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
537         comm_base_now(rep.c->ev->base);
538         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
539                 ldns_buffer_clear(rep.c->buffer);
540                 rep.addrlen = (socklen_t)sizeof(rep.addr);
541                 log_assert(fd != -1);
542                 log_assert(ldns_buffer_remaining(rep.c->buffer) > 0);
543                 msg.msg_name = &rep.addr;
544                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
545                 iov[0].iov_base = ldns_buffer_begin(rep.c->buffer);
546                 iov[0].iov_len = ldns_buffer_remaining(rep.c->buffer);
547                 msg.msg_iov = iov;
548                 msg.msg_iovlen = 1;
549                 msg.msg_control = ancil;
550 #ifndef S_SPLINT_S
551                 msg.msg_controllen = sizeof(ancil);
552 #endif /* S_SPLINT_S */
553                 msg.msg_flags = 0;
554                 rcv = recvmsg(fd, &msg, 0);
555                 if(rcv == -1) {
556                         if(errno != EAGAIN && errno != EINTR) {
557                                 log_err("recvmsg failed: %s", strerror(errno));
558                         }
559                         return;
560                 }
561                 rep.addrlen = msg.msg_namelen;
562                 ldns_buffer_skip(rep.c->buffer, rcv);
563                 ldns_buffer_flip(rep.c->buffer);
564                 rep.srctype = 0;
565 #ifndef S_SPLINT_S
566                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
567                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
568                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
569                                 cmsg->cmsg_type == IPV6_PKTINFO) {
570                                 rep.srctype = 6;
571                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
572                                         sizeof(struct in6_pktinfo));
573                                 break;
574 #ifdef IP_PKTINFO
575                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
576                                 cmsg->cmsg_type == IP_PKTINFO) {
577                                 rep.srctype = 4;
578                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
579                                         sizeof(struct in_pktinfo));
580                                 break;
581 #elif defined(IP_RECVDSTADDR)
582                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
583                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
584                                 rep.srctype = 4;
585                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
586                                         sizeof(struct in_addr));
587                                 break;
588 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
589                         }
590                 }
591                 if(verbosity >= VERB_ALGO)
592                         p_ancil("receive_udp on interface", &rep);
593 #endif /* S_SPLINT_S */
594                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
595                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
596                         /* send back immediate reply */
597                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
598                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
599                 }
600                 if(rep.c->fd == -1) /* commpoint closed */
601                         break;
602         }
603 #else
604         (void)fd;
605         (void)event;
606         (void)arg;
607         fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
608                 "Please disable interface-automatic");
609 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
610 }
611
612 void 
613 comm_point_udp_callback(int fd, short event, void* arg)
614 {
615         struct comm_reply rep;
616         ssize_t rcv;
617         int i;
618
619         rep.c = (struct comm_point*)arg;
620         log_assert(rep.c->type == comm_udp);
621
622         if(!(event&EV_READ))
623                 return;
624         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
625         comm_base_now(rep.c->ev->base);
626         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
627                 ldns_buffer_clear(rep.c->buffer);
628                 rep.addrlen = (socklen_t)sizeof(rep.addr);
629                 log_assert(fd != -1);
630                 log_assert(ldns_buffer_remaining(rep.c->buffer) > 0);
631                 rcv = recvfrom(fd, (void*)ldns_buffer_begin(rep.c->buffer), 
632                         ldns_buffer_remaining(rep.c->buffer), 0, 
633                         (struct sockaddr*)&rep.addr, &rep.addrlen);
634                 if(rcv == -1) {
635 #ifndef USE_WINSOCK
636                         if(errno != EAGAIN && errno != EINTR)
637                                 log_err("recvfrom %d failed: %s", 
638                                         fd, strerror(errno));
639 #else
640                         if(WSAGetLastError() != WSAEINPROGRESS &&
641                                 WSAGetLastError() != WSAECONNRESET &&
642                                 WSAGetLastError()!= WSAEWOULDBLOCK)
643                                 log_err("recvfrom failed: %s",
644                                         wsa_strerror(WSAGetLastError()));
645 #endif
646                         return;
647                 }
648                 ldns_buffer_skip(rep.c->buffer, rcv);
649                 ldns_buffer_flip(rep.c->buffer);
650                 rep.srctype = 0;
651                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
652                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
653                         /* send back immediate reply */
654                         (void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
655                                 (struct sockaddr*)&rep.addr, rep.addrlen);
656                 }
657                 if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
658                 another UDP port. Note rep.c cannot be reused with TCP fd. */
659                         break;
660         }
661 }
662
663 /** Use a new tcp handler for new query fd, set to read query */
664 static void
665 setup_tcp_handler(struct comm_point* c, int fd) 
666 {
667         log_assert(c->type == comm_tcp);
668         log_assert(c->fd == -1);
669         ldns_buffer_clear(c->buffer);
670         c->tcp_is_reading = 1;
671         c->tcp_byte_count = 0;
672         comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
673 }
674
675 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
676         short ATTR_UNUSED(event), void* arg)
677 {
678         struct comm_base* b = (struct comm_base*)arg;
679         /* timeout for the slow accept, re-enable accepts again */
680         if(b->start_accept) {
681                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
682                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
683                 (*b->start_accept)(b->cb_arg);
684                 b->eb->slow_accept_enabled = 0;
685         }
686 }
687
688 int comm_point_perform_accept(struct comm_point* c,
689         struct sockaddr_storage* addr, socklen_t* addrlen)
690 {
691         int new_fd;
692         *addrlen = (socklen_t)sizeof(*addr);
693         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
694         if(new_fd == -1) {
695 #ifndef USE_WINSOCK
696                 /* EINTR is signal interrupt. others are closed connection. */
697                 if(     errno == EINTR || errno == EAGAIN
698 #ifdef EWOULDBLOCK
699                         || errno == EWOULDBLOCK 
700 #endif
701 #ifdef ECONNABORTED
702                         || errno == ECONNABORTED 
703 #endif
704 #ifdef EPROTO
705                         || errno == EPROTO
706 #endif /* EPROTO */
707                         )
708                         return -1;
709 #if defined(ENFILE) && defined(EMFILE)
710                 if(errno == ENFILE || errno == EMFILE) {
711                         /* out of file descriptors, likely outside of our
712                          * control. stop accept() calls for some time */
713                         if(c->ev->base->stop_accept) {
714                                 struct comm_base* b = c->ev->base;
715                                 struct timeval tv;
716                                 verbose(VERB_ALGO, "out of file descriptors: "
717                                         "slow accept");
718                                 b->eb->slow_accept_enabled = 1;
719                                 fptr_ok(fptr_whitelist_stop_accept(
720                                         b->stop_accept));
721                                 (*b->stop_accept)(b->cb_arg);
722                                 /* set timeout, no mallocs */
723                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
724                                 tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
725                                 event_set(&b->eb->slow_accept, -1, EV_TIMEOUT, 
726                                         comm_base_handle_slow_accept, b);
727                                 if(event_base_set(b->eb->base,
728                                         &b->eb->slow_accept) != 0) {
729                                         /* we do not want to log here, because
730                                          * that would spam the logfiles.
731                                          * error: "event_base_set failed." */
732                                 }
733                                 if(event_add(&b->eb->slow_accept, &tv) != 0) {
734                                         /* we do not want to log here,
735                                          * error: "event_add failed." */
736                                 }
737                         }
738                         return -1;
739                 }
740 #endif
741                 log_err("accept failed: %s", strerror(errno));
742 #else /* USE_WINSOCK */
743                 if(WSAGetLastError() == WSAEINPROGRESS ||
744                         WSAGetLastError() == WSAECONNRESET)
745                         return -1;
746                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
747                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
748                         return -1;
749                 }
750                 log_err("accept failed: %s", wsa_strerror(WSAGetLastError()));
751 #endif
752                 log_addr(0, "remote address is", addr, *addrlen);
753                 return -1;
754         }
755         fd_set_nonblock(new_fd);
756         return new_fd;
757 }
758
759 #ifdef USE_WINSOCK
760 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
761         int ATTR_UNUSED(argi), long argl, long retvalue)
762 {
763         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
764                 (oper&BIO_CB_RETURN)?"return":"before",
765                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
766                 WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
767         /* on windows, check if previous operation caused EWOULDBLOCK */
768         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
769                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
770                 if(WSAGetLastError() == WSAEWOULDBLOCK)
771                         winsock_tcp_wouldblock((struct event*)
772                                 BIO_get_callback_arg(b), EV_READ);
773         }
774         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
775                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
776                 if(WSAGetLastError() == WSAEWOULDBLOCK)
777                         winsock_tcp_wouldblock((struct event*)
778                                 BIO_get_callback_arg(b), EV_WRITE);
779         }
780         /* return original return value */
781         return retvalue;
782 }
783
784 /** set win bio callbacks for nonblocking operations */
785 void
786 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
787 {
788         SSL* ssl = (SSL*)thessl;
789         /* set them both just in case, but usually they are the same BIO */
790         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
791         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
792         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
793         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
794 }
795 #endif
796
797 void 
798 comm_point_tcp_accept_callback(int fd, short event, void* arg)
799 {
800         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
801         int new_fd;
802         log_assert(c->type == comm_tcp_accept);
803         if(!(event & EV_READ)) {
804                 log_info("ignoring tcp accept event %d", (int)event);
805                 return;
806         }
807         comm_base_now(c->ev->base);
808         /* find free tcp handler. */
809         if(!c->tcp_free) {
810                 log_warn("accepted too many tcp, connections full");
811                 return;
812         }
813         /* accept incoming connection. */
814         c_hdl = c->tcp_free;
815         log_assert(fd != -1);
816         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
817                 &c_hdl->repinfo.addrlen);
818         if(new_fd == -1)
819                 return;
820         if(c->ssl) {
821                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
822                 if(!c_hdl->ssl) {
823                         c_hdl->fd = new_fd;
824                         comm_point_close(c_hdl);
825                         return;
826                 }
827                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
828 #ifdef USE_WINSOCK
829                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
830 #endif
831         }
832
833         /* grab the tcp handler buffers */
834         c->tcp_free = c_hdl->tcp_free;
835         if(!c->tcp_free) {
836                 /* stop accepting incoming queries for now. */
837                 comm_point_stop_listening(c);
838         }
839         /* addr is dropped. Not needed for tcp reply. */
840         setup_tcp_handler(c_hdl, new_fd);
841 }
842
843 /** Make tcp handler free for next assignment */
844 static void
845 reclaim_tcp_handler(struct comm_point* c)
846 {
847         log_assert(c->type == comm_tcp);
848         if(c->ssl) {
849                 SSL_shutdown(c->ssl);
850                 SSL_free(c->ssl);
851                 c->ssl = NULL;
852         }
853         comm_point_close(c);
854         if(c->tcp_parent) {
855                 c->tcp_free = c->tcp_parent->tcp_free;
856                 c->tcp_parent->tcp_free = c;
857                 if(!c->tcp_free) {
858                         /* re-enable listening on accept socket */
859                         comm_point_start_listening(c->tcp_parent, -1, -1);
860                 }
861         }
862 }
863
864 /** do the callback when writing is done */
865 static void
866 tcp_callback_writer(struct comm_point* c)
867 {
868         log_assert(c->type == comm_tcp);
869         ldns_buffer_clear(c->buffer);
870         if(c->tcp_do_toggle_rw)
871                 c->tcp_is_reading = 1;
872         c->tcp_byte_count = 0;
873         /* switch from listening(write) to listening(read) */
874         comm_point_stop_listening(c);
875         comm_point_start_listening(c, -1, -1);
876 }
877
878 /** do the callback when reading is done */
879 static void
880 tcp_callback_reader(struct comm_point* c)
881 {
882         log_assert(c->type == comm_tcp || c->type == comm_local);
883         ldns_buffer_flip(c->buffer);
884         if(c->tcp_do_toggle_rw)
885                 c->tcp_is_reading = 0;
886         c->tcp_byte_count = 0;
887         if(c->type == comm_tcp)
888                 comm_point_stop_listening(c);
889         fptr_ok(fptr_whitelist_comm_point(c->callback));
890         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
891                 comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
892         }
893 }
894
895 /** continue ssl handshake */
896 static int
897 ssl_handshake(struct comm_point* c)
898 {
899         int r;
900         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
901                 /* read condition satisfied back to writing */
902                 comm_point_listen_for_rw(c, 1, 1);
903                 c->ssl_shake_state = comm_ssl_shake_none;
904                 return 1;
905         }
906         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
907                 /* write condition satisfied, back to reading */
908                 comm_point_listen_for_rw(c, 1, 0);
909                 c->ssl_shake_state = comm_ssl_shake_none;
910                 return 1;
911         }
912
913         ERR_clear_error();
914         r = SSL_do_handshake(c->ssl);
915         if(r != 1) {
916                 int want = SSL_get_error(c->ssl, r);
917                 if(want == SSL_ERROR_WANT_READ) {
918                         if(c->ssl_shake_state == comm_ssl_shake_read)
919                                 return 1;
920                         c->ssl_shake_state = comm_ssl_shake_read;
921                         comm_point_listen_for_rw(c, 1, 0);
922                         return 1;
923                 } else if(want == SSL_ERROR_WANT_WRITE) {
924                         if(c->ssl_shake_state == comm_ssl_shake_write)
925                                 return 1;
926                         c->ssl_shake_state = comm_ssl_shake_write;
927                         comm_point_listen_for_rw(c, 0, 1);
928                         return 1;
929                 } else if(r == 0) {
930                         return 0; /* closed */
931                 } else if(want == SSL_ERROR_SYSCALL) {
932                         /* SYSCALL and errno==0 means closed uncleanly */
933                         if(errno != 0)
934                                 log_err("SSL_handshake syscall: %s",
935                                         strerror(errno));
936                         return 0;
937                 } else {
938                         log_crypto_err("ssl handshake failed");
939                         log_addr(1, "ssl handshake failed", &c->repinfo.addr,
940                                 c->repinfo.addrlen);
941                         return 0;
942                 }
943         }
944         /* this is where peer verification could take place */
945         log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
946                 c->repinfo.addrlen);
947
948         /* setup listen rw correctly */
949         if(c->tcp_is_reading) {
950                 if(c->ssl_shake_state != comm_ssl_shake_read)
951                         comm_point_listen_for_rw(c, 1, 0);
952         } else {
953                 comm_point_listen_for_rw(c, 1, 1);
954         }
955         c->ssl_shake_state = comm_ssl_shake_none;
956         return 1;
957 }
958
959 /** ssl read callback on TCP */
960 static int
961 ssl_handle_read(struct comm_point* c)
962 {
963         int r;
964         if(c->ssl_shake_state != comm_ssl_shake_none) {
965                 if(!ssl_handshake(c))
966                         return 0;
967                 if(c->ssl_shake_state != comm_ssl_shake_none)
968                         return 1;
969         }
970         if(c->tcp_byte_count < sizeof(uint16_t)) {
971                 /* read length bytes */
972                 ERR_clear_error();
973                 if((r=SSL_read(c->ssl, (void*)ldns_buffer_at(c->buffer,
974                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
975                         c->tcp_byte_count))) <= 0) {
976                         int want = SSL_get_error(c->ssl, r);
977                         if(want == SSL_ERROR_ZERO_RETURN) {
978                                 return 0; /* shutdown, closed */
979                         } else if(want == SSL_ERROR_WANT_READ) {
980                                 return 1; /* read more later */
981                         } else if(want == SSL_ERROR_WANT_WRITE) {
982                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
983                                 comm_point_listen_for_rw(c, 0, 1);
984                                 return 1;
985                         } else if(want == SSL_ERROR_SYSCALL) {
986                                 if(errno != 0)
987                                         log_err("SSL_read syscall: %s",
988                                                 strerror(errno));
989                                 return 0;
990                         }
991                         log_crypto_err("could not SSL_read");
992                         return 0;
993                 }
994                 c->tcp_byte_count += r;
995                 if(c->tcp_byte_count != sizeof(uint16_t))
996                         return 1;
997                 if(ldns_buffer_read_u16_at(c->buffer, 0) >
998                         ldns_buffer_capacity(c->buffer)) {
999                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1000                         return 0;
1001                 }
1002                 ldns_buffer_set_limit(c->buffer,
1003                         ldns_buffer_read_u16_at(c->buffer, 0));
1004                 if(ldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1005                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1006                         return 0;
1007                 }
1008                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1009                         (int)ldns_buffer_limit(c->buffer));
1010         }
1011         log_assert(ldns_buffer_remaining(c->buffer) > 0);
1012         ERR_clear_error();
1013         r = SSL_read(c->ssl, (void*)ldns_buffer_current(c->buffer),
1014                 (int)ldns_buffer_remaining(c->buffer));
1015         if(r <= 0) {
1016                 int want = SSL_get_error(c->ssl, r);
1017                 if(want == SSL_ERROR_ZERO_RETURN) {
1018                         return 0; /* shutdown, closed */
1019                 } else if(want == SSL_ERROR_WANT_READ) {
1020                         return 1; /* read more later */
1021                 } else if(want == SSL_ERROR_WANT_WRITE) {
1022                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1023                         comm_point_listen_for_rw(c, 0, 1);
1024                         return 1;
1025                 } else if(want == SSL_ERROR_SYSCALL) {
1026                         if(errno != 0)
1027                                 log_err("SSL_read syscall: %s",
1028                                         strerror(errno));
1029                         return 0;
1030                 }
1031                 log_crypto_err("could not SSL_read");
1032                 return 0;
1033         }
1034         ldns_buffer_skip(c->buffer, (ssize_t)r);
1035         if(ldns_buffer_remaining(c->buffer) <= 0) {
1036                 tcp_callback_reader(c);
1037         }
1038         return 1;
1039 }
1040
1041 /** ssl write callback on TCP */
1042 static int
1043 ssl_handle_write(struct comm_point* c)
1044 {
1045         int r;
1046         if(c->ssl_shake_state != comm_ssl_shake_none) {
1047                 if(!ssl_handshake(c))
1048                         return 0;
1049                 if(c->ssl_shake_state != comm_ssl_shake_none)
1050                         return 1;
1051         }
1052         /* ignore return, if fails we may simply block */
1053         (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1054         if(c->tcp_byte_count < sizeof(uint16_t)) {
1055                 uint16_t len = htons(ldns_buffer_limit(c->buffer));
1056                 ERR_clear_error();
1057                 r = SSL_write(c->ssl,
1058                         (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1059                         (int)(sizeof(uint16_t)-c->tcp_byte_count));
1060                 if(r <= 0) {
1061                         int want = SSL_get_error(c->ssl, r);
1062                         if(want == SSL_ERROR_ZERO_RETURN) {
1063                                 return 0; /* closed */
1064                         } else if(want == SSL_ERROR_WANT_READ) {
1065                                 c->ssl_shake_state = comm_ssl_shake_read;
1066                                 comm_point_listen_for_rw(c, 1, 0);
1067                                 return 1; /* wait for read condition */
1068                         } else if(want == SSL_ERROR_WANT_WRITE) {
1069                                 return 1; /* write more later */
1070                         } else if(want == SSL_ERROR_SYSCALL) {
1071                                 if(errno != 0)
1072                                         log_err("SSL_write syscall: %s",
1073                                                 strerror(errno));
1074                                 return 0;
1075                         }
1076                         log_crypto_err("could not SSL_write");
1077                         return 0;
1078                 }
1079                 c->tcp_byte_count += r;
1080                 if(c->tcp_byte_count < sizeof(uint16_t))
1081                         return 1;
1082                 ldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1083                         sizeof(uint16_t));
1084                 if(ldns_buffer_remaining(c->buffer) == 0) {
1085                         tcp_callback_writer(c);
1086                         return 1;
1087                 }
1088         }
1089         log_assert(ldns_buffer_remaining(c->buffer) > 0);
1090         ERR_clear_error();
1091         r = SSL_write(c->ssl, (void*)ldns_buffer_current(c->buffer),
1092                 (int)ldns_buffer_remaining(c->buffer));
1093         if(r <= 0) {
1094                 int want = SSL_get_error(c->ssl, r);
1095                 if(want == SSL_ERROR_ZERO_RETURN) {
1096                         return 0; /* closed */
1097                 } else if(want == SSL_ERROR_WANT_READ) {
1098                         c->ssl_shake_state = comm_ssl_shake_read;
1099                         comm_point_listen_for_rw(c, 1, 0);
1100                         return 1; /* wait for read condition */
1101                 } else if(want == SSL_ERROR_WANT_WRITE) {
1102                         return 1; /* write more later */
1103                 } else if(want == SSL_ERROR_SYSCALL) {
1104                         if(errno != 0)
1105                                 log_err("SSL_write syscall: %s",
1106                                         strerror(errno));
1107                         return 0;
1108                 }
1109                 log_crypto_err("could not SSL_write");
1110                 return 0;
1111         }
1112         ldns_buffer_skip(c->buffer, (ssize_t)r);
1113
1114         if(ldns_buffer_remaining(c->buffer) == 0) {
1115                 tcp_callback_writer(c);
1116         }
1117         return 1;
1118 }
1119
1120 /** handle ssl tcp connection with dns contents */
1121 static int
1122 ssl_handle_it(struct comm_point* c)
1123 {
1124         if(c->tcp_is_reading)
1125                 return ssl_handle_read(c);
1126         return ssl_handle_write(c);
1127 }
1128
1129 /** Handle tcp reading callback. 
1130  * @param fd: file descriptor of socket.
1131  * @param c: comm point to read from into buffer.
1132  * @param short_ok: if true, very short packets are OK (for comm_local).
1133  * @return: 0 on error 
1134  */
1135 static int
1136 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1137 {
1138         ssize_t r;
1139         log_assert(c->type == comm_tcp || c->type == comm_local);
1140         if(c->ssl)
1141                 return ssl_handle_it(c);
1142         if(!c->tcp_is_reading)
1143                 return 0;
1144
1145         log_assert(fd != -1);
1146         if(c->tcp_byte_count < sizeof(uint16_t)) {
1147                 /* read length bytes */
1148                 r = recv(fd,(void*)ldns_buffer_at(c->buffer,c->tcp_byte_count),
1149                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1150                 if(r == 0)
1151                         return 0;
1152                 else if(r == -1) {
1153 #ifndef USE_WINSOCK
1154                         if(errno == EINTR || errno == EAGAIN)
1155                                 return 1;
1156 #ifdef ECONNRESET
1157                         if(errno == ECONNRESET && verbosity < 2)
1158                                 return 0; /* silence reset by peer */
1159 #endif
1160                         log_err("read (in tcp s): %s", strerror(errno));
1161 #else /* USE_WINSOCK */
1162                         if(WSAGetLastError() == WSAECONNRESET)
1163                                 return 0;
1164                         if(WSAGetLastError() == WSAEINPROGRESS)
1165                                 return 1;
1166                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1167                                 winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1168                                 return 1;
1169                         }
1170                         log_err("read (in tcp s): %s", 
1171                                 wsa_strerror(WSAGetLastError()));
1172 #endif
1173                         log_addr(0, "remote address is", &c->repinfo.addr,
1174                                 c->repinfo.addrlen);
1175                         return 0;
1176                 } 
1177                 c->tcp_byte_count += r;
1178                 if(c->tcp_byte_count != sizeof(uint16_t))
1179                         return 1;
1180                 if(ldns_buffer_read_u16_at(c->buffer, 0) >
1181                         ldns_buffer_capacity(c->buffer)) {
1182                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1183                         return 0;
1184                 }
1185                 ldns_buffer_set_limit(c->buffer, 
1186                         ldns_buffer_read_u16_at(c->buffer, 0));
1187                 if(!short_ok && 
1188                         ldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1189                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1190                         return 0;
1191                 }
1192                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1193                         (int)ldns_buffer_limit(c->buffer));
1194         }
1195
1196         log_assert(ldns_buffer_remaining(c->buffer) > 0);
1197         r = recv(fd, (void*)ldns_buffer_current(c->buffer), 
1198                 ldns_buffer_remaining(c->buffer), 0);
1199         if(r == 0) {
1200                 return 0;
1201         } else if(r == -1) {
1202 #ifndef USE_WINSOCK
1203                 if(errno == EINTR || errno == EAGAIN)
1204                         return 1;
1205                 log_err("read (in tcp r): %s", strerror(errno));
1206 #else /* USE_WINSOCK */
1207                 if(WSAGetLastError() == WSAECONNRESET)
1208                         return 0;
1209                 if(WSAGetLastError() == WSAEINPROGRESS)
1210                         return 1;
1211                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1212                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1213                         return 1;
1214                 }
1215                 log_err("read (in tcp r): %s", 
1216                         wsa_strerror(WSAGetLastError()));
1217 #endif
1218                 log_addr(0, "remote address is", &c->repinfo.addr,
1219                         c->repinfo.addrlen);
1220                 return 0;
1221         }
1222         ldns_buffer_skip(c->buffer, r);
1223         if(ldns_buffer_remaining(c->buffer) <= 0) {
1224                 tcp_callback_reader(c);
1225         }
1226         return 1;
1227 }
1228
1229 /** 
1230  * Handle tcp writing callback. 
1231  * @param fd: file descriptor of socket.
1232  * @param c: comm point to write buffer out of.
1233  * @return: 0 on error
1234  */
1235 static int
1236 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1237 {
1238         ssize_t r;
1239         log_assert(c->type == comm_tcp);
1240         if(c->tcp_is_reading && !c->ssl)
1241                 return 0;
1242         log_assert(fd != -1);
1243         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1244                 /* check for pending error from nonblocking connect */
1245                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1246                 int error = 0;
1247                 socklen_t len = (socklen_t)sizeof(error);
1248                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1249                         &len) < 0){
1250 #ifndef USE_WINSOCK
1251                         error = errno; /* on solaris errno is error */
1252 #else /* USE_WINSOCK */
1253                         error = WSAGetLastError();
1254 #endif
1255                 }
1256 #ifndef USE_WINSOCK
1257 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1258                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1259                         return 1; /* try again later */
1260                 else
1261 #endif
1262                 if(error != 0 && verbosity < 2)
1263                         return 0; /* silence lots of chatter in the logs */
1264                 else if(error != 0) {
1265                         log_err("tcp connect: %s", strerror(error));
1266 #else /* USE_WINSOCK */
1267                 /* examine error */
1268                 if(error == WSAEINPROGRESS)
1269                         return 1;
1270                 else if(error == WSAEWOULDBLOCK) {
1271                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1272                         return 1;
1273                 } else if(error != 0 && verbosity < 2)
1274                         return 0;
1275                 else if(error != 0) {
1276                         log_err("tcp connect: %s", wsa_strerror(error));
1277 #endif /* USE_WINSOCK */
1278                         log_addr(0, "remote address is", &c->repinfo.addr, 
1279                                 c->repinfo.addrlen);
1280                         return 0;
1281                 }
1282         }
1283         if(c->ssl)
1284                 return ssl_handle_it(c);
1285
1286         if(c->tcp_byte_count < sizeof(uint16_t)) {
1287                 uint16_t len = htons(ldns_buffer_limit(c->buffer));
1288 #ifdef HAVE_WRITEV
1289                 struct iovec iov[2];
1290                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1291                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1292                 iov[1].iov_base = ldns_buffer_begin(c->buffer);
1293                 iov[1].iov_len = ldns_buffer_limit(c->buffer);
1294                 log_assert(iov[0].iov_len > 0);
1295                 log_assert(iov[1].iov_len > 0);
1296                 r = writev(fd, iov, 2);
1297 #else /* HAVE_WRITEV */
1298                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1299                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1300 #endif /* HAVE_WRITEV */
1301                 if(r == -1) {
1302 #ifndef USE_WINSOCK
1303 #ifdef EPIPE
1304                         if(errno == EPIPE && verbosity < 2)
1305                                 return 0; /* silence 'broken pipe' */
1306 #endif
1307                         if(errno == EINTR || errno == EAGAIN)
1308                                 return 1;
1309                         log_err("tcp writev: %s", strerror(errno));
1310 #else
1311                         if(WSAGetLastError() == WSAENOTCONN)
1312                                 return 1;
1313                         if(WSAGetLastError() == WSAEINPROGRESS)
1314                                 return 1;
1315                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1316                                 winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1317                                 return 1; 
1318                         }
1319                         log_err("tcp send s: %s", 
1320                                 wsa_strerror(WSAGetLastError()));
1321 #endif
1322                         log_addr(0, "remote address is", &c->repinfo.addr,
1323                                 c->repinfo.addrlen);
1324                         return 0;
1325                 }
1326                 c->tcp_byte_count += r;
1327                 if(c->tcp_byte_count < sizeof(uint16_t))
1328                         return 1;
1329                 ldns_buffer_set_position(c->buffer, c->tcp_byte_count - 
1330                         sizeof(uint16_t));
1331                 if(ldns_buffer_remaining(c->buffer) == 0) {
1332                         tcp_callback_writer(c);
1333                         return 1;
1334                 }
1335         }
1336         log_assert(ldns_buffer_remaining(c->buffer) > 0);
1337         r = send(fd, (void*)ldns_buffer_current(c->buffer), 
1338                 ldns_buffer_remaining(c->buffer), 0);
1339         if(r == -1) {
1340 #ifndef USE_WINSOCK
1341                 if(errno == EINTR || errno == EAGAIN)
1342                         return 1;
1343                 log_err("tcp send r: %s", strerror(errno));
1344 #else
1345                 if(WSAGetLastError() == WSAEINPROGRESS)
1346                         return 1;
1347                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1348                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1349                         return 1; 
1350                 }
1351                 log_err("tcp send r: %s", 
1352                         wsa_strerror(WSAGetLastError()));
1353 #endif
1354                 log_addr(0, "remote address is", &c->repinfo.addr,
1355                         c->repinfo.addrlen);
1356                 return 0;
1357         }
1358         ldns_buffer_skip(c->buffer, r);
1359
1360         if(ldns_buffer_remaining(c->buffer) == 0) {
1361                 tcp_callback_writer(c);
1362         }
1363         
1364         return 1;
1365 }
1366
1367 void 
1368 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1369 {
1370         struct comm_point* c = (struct comm_point*)arg;
1371         log_assert(c->type == comm_tcp);
1372         comm_base_now(c->ev->base);
1373
1374         if(event&EV_READ) {
1375                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1376                         reclaim_tcp_handler(c);
1377                         if(!c->tcp_do_close) {
1378                                 fptr_ok(fptr_whitelist_comm_point(
1379                                         c->callback));
1380                                 (void)(*c->callback)(c, c->cb_arg, 
1381                                         NETEVENT_CLOSED, NULL);
1382                         }
1383                 }
1384                 return;
1385         }
1386         if(event&EV_WRITE) {
1387                 if(!comm_point_tcp_handle_write(fd, c)) {
1388                         reclaim_tcp_handler(c);
1389                         if(!c->tcp_do_close) {
1390                                 fptr_ok(fptr_whitelist_comm_point(
1391                                         c->callback));
1392                                 (void)(*c->callback)(c, c->cb_arg, 
1393                                         NETEVENT_CLOSED, NULL);
1394                         }
1395                 }
1396                 return;
1397         }
1398         if(event&EV_TIMEOUT) {
1399                 verbose(VERB_QUERY, "tcp took too long, dropped");
1400                 reclaim_tcp_handler(c);
1401                 if(!c->tcp_do_close) {
1402                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1403                         (void)(*c->callback)(c, c->cb_arg,
1404                                 NETEVENT_TIMEOUT, NULL);
1405                 }
1406                 return;
1407         }
1408         log_err("Ignored event %d for tcphdl.", event);
1409 }
1410
1411 void comm_point_local_handle_callback(int fd, short event, void* arg)
1412 {
1413         struct comm_point* c = (struct comm_point*)arg;
1414         log_assert(c->type == comm_local);
1415         comm_base_now(c->ev->base);
1416
1417         if(event&EV_READ) {
1418                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
1419                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1420                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
1421                                 NULL);
1422                 }
1423                 return;
1424         }
1425         log_err("Ignored event %d for localhdl.", event);
1426 }
1427
1428 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
1429         short event, void* arg)
1430 {
1431         struct comm_point* c = (struct comm_point*)arg;
1432         int err = NETEVENT_NOERROR;
1433         log_assert(c->type == comm_raw);
1434         comm_base_now(c->ev->base);
1435         
1436         if(event&EV_TIMEOUT)
1437                 err = NETEVENT_TIMEOUT;
1438         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1439         (void)(*c->callback)(c, c->cb_arg, err, NULL);
1440 }
1441
1442 struct comm_point* 
1443 comm_point_create_udp(struct comm_base *base, int fd, ldns_buffer* buffer,
1444         comm_point_callback_t* callback, void* callback_arg)
1445 {
1446         struct comm_point* c = (struct comm_point*)calloc(1,
1447                 sizeof(struct comm_point));
1448         short evbits;
1449         if(!c)
1450                 return NULL;
1451         c->ev = (struct internal_event*)calloc(1,
1452                 sizeof(struct internal_event));
1453         if(!c->ev) {
1454                 free(c);
1455                 return NULL;
1456         }
1457         c->ev->base = base;
1458         c->fd = fd;
1459         c->buffer = buffer;
1460         c->timeout = NULL;
1461         c->tcp_is_reading = 0;
1462         c->tcp_byte_count = 0;
1463         c->tcp_parent = NULL;
1464         c->max_tcp_count = 0;
1465         c->tcp_handlers = NULL;
1466         c->tcp_free = NULL;
1467         c->type = comm_udp;
1468         c->tcp_do_close = 0;
1469         c->do_not_close = 0;
1470         c->tcp_do_toggle_rw = 0;
1471         c->tcp_check_nb_connect = 0;
1472         c->inuse = 0;
1473         c->callback = callback;
1474         c->cb_arg = callback_arg;
1475         evbits = EV_READ | EV_PERSIST;
1476         /* libevent stuff */
1477         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1478         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1479                 log_err("could not baseset udp event");
1480                 comm_point_delete(c);
1481                 return NULL;
1482         }
1483         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1484                 log_err("could not add udp event");
1485                 comm_point_delete(c);
1486                 return NULL;
1487         }
1488         return c;
1489 }
1490
1491 struct comm_point* 
1492 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
1493         ldns_buffer* buffer, 
1494         comm_point_callback_t* callback, void* callback_arg)
1495 {
1496         struct comm_point* c = (struct comm_point*)calloc(1,
1497                 sizeof(struct comm_point));
1498         short evbits;
1499         if(!c)
1500                 return NULL;
1501         c->ev = (struct internal_event*)calloc(1,
1502                 sizeof(struct internal_event));
1503         if(!c->ev) {
1504                 free(c);
1505                 return NULL;
1506         }
1507         c->ev->base = base;
1508         c->fd = fd;
1509         c->buffer = buffer;
1510         c->timeout = NULL;
1511         c->tcp_is_reading = 0;
1512         c->tcp_byte_count = 0;
1513         c->tcp_parent = NULL;
1514         c->max_tcp_count = 0;
1515         c->tcp_handlers = NULL;
1516         c->tcp_free = NULL;
1517         c->type = comm_udp;
1518         c->tcp_do_close = 0;
1519         c->do_not_close = 0;
1520         c->inuse = 0;
1521         c->tcp_do_toggle_rw = 0;
1522         c->tcp_check_nb_connect = 0;
1523         c->callback = callback;
1524         c->cb_arg = callback_arg;
1525         evbits = EV_READ | EV_PERSIST;
1526         /* libevent stuff */
1527         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1528         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1529                 log_err("could not baseset udp event");
1530                 comm_point_delete(c);
1531                 return NULL;
1532         }
1533         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1534                 log_err("could not add udp event");
1535                 comm_point_delete(c);
1536                 return NULL;
1537         }
1538         return c;
1539 }
1540
1541 static struct comm_point* 
1542 comm_point_create_tcp_handler(struct comm_base *base, 
1543         struct comm_point* parent, size_t bufsize,
1544         comm_point_callback_t* callback, void* callback_arg)
1545 {
1546         struct comm_point* c = (struct comm_point*)calloc(1,
1547                 sizeof(struct comm_point));
1548         short evbits;
1549         if(!c)
1550                 return NULL;
1551         c->ev = (struct internal_event*)calloc(1,
1552                 sizeof(struct internal_event));
1553         if(!c->ev) {
1554                 free(c);
1555                 return NULL;
1556         }
1557         c->ev->base = base;
1558         c->fd = -1;
1559         c->buffer = ldns_buffer_new(bufsize);
1560         if(!c->buffer) {
1561                 free(c->ev);
1562                 free(c);
1563                 return NULL;
1564         }
1565         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1566         if(!c->timeout) {
1567                 ldns_buffer_free(c->buffer);
1568                 free(c->ev);
1569                 free(c);
1570                 return NULL;
1571         }
1572         c->tcp_is_reading = 0;
1573         c->tcp_byte_count = 0;
1574         c->tcp_parent = parent;
1575         c->max_tcp_count = 0;
1576         c->tcp_handlers = NULL;
1577         c->tcp_free = NULL;
1578         c->type = comm_tcp;
1579         c->tcp_do_close = 0;
1580         c->do_not_close = 0;
1581         c->tcp_do_toggle_rw = 1;
1582         c->tcp_check_nb_connect = 0;
1583         c->repinfo.c = c;
1584         c->callback = callback;
1585         c->cb_arg = callback_arg;
1586         /* add to parent free list */
1587         c->tcp_free = parent->tcp_free;
1588         parent->tcp_free = c;
1589         /* libevent stuff */
1590         evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1591         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1592         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1593         {
1594                 log_err("could not basetset tcphdl event");
1595                 parent->tcp_free = c->tcp_free;
1596                 free(c->ev);
1597                 free(c);
1598                 return NULL;
1599         }
1600         return c;
1601 }
1602
1603 struct comm_point* 
1604 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1605         comm_point_callback_t* callback, void* callback_arg)
1606 {
1607         struct comm_point* c = (struct comm_point*)calloc(1,
1608                 sizeof(struct comm_point));
1609         short evbits;
1610         int i;
1611         /* first allocate the TCP accept listener */
1612         if(!c)
1613                 return NULL;
1614         c->ev = (struct internal_event*)calloc(1,
1615                 sizeof(struct internal_event));
1616         if(!c->ev) {
1617                 free(c);
1618                 return NULL;
1619         }
1620         c->ev->base = base;
1621         c->fd = fd;
1622         c->buffer = NULL;
1623         c->timeout = NULL;
1624         c->tcp_is_reading = 0;
1625         c->tcp_byte_count = 0;
1626         c->tcp_parent = NULL;
1627         c->max_tcp_count = num;
1628         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1629                 sizeof(struct comm_point*));
1630         if(!c->tcp_handlers) {
1631                 free(c->ev);
1632                 free(c);
1633                 return NULL;
1634         }
1635         c->tcp_free = NULL;
1636         c->type = comm_tcp_accept;
1637         c->tcp_do_close = 0;
1638         c->do_not_close = 0;
1639         c->tcp_do_toggle_rw = 0;
1640         c->tcp_check_nb_connect = 0;
1641         c->callback = NULL;
1642         c->cb_arg = NULL;
1643         evbits = EV_READ | EV_PERSIST;
1644         /* libevent stuff */
1645         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1646         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1647                 event_add(&c->ev->ev, c->timeout) != 0 )
1648         {
1649                 log_err("could not add tcpacc event");
1650                 comm_point_delete(c);
1651                 return NULL;
1652         }
1653
1654         /* now prealloc the tcp handlers */
1655         for(i=0; i<num; i++) {
1656                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1657                         c, bufsize, callback, callback_arg);
1658                 if(!c->tcp_handlers[i]) {
1659                         comm_point_delete(c);
1660                         return NULL;
1661                 }
1662         }
1663         
1664         return c;
1665 }
1666
1667 struct comm_point* 
1668 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1669         comm_point_callback_t* callback, void* callback_arg)
1670 {
1671         struct comm_point* c = (struct comm_point*)calloc(1,
1672                 sizeof(struct comm_point));
1673         short evbits;
1674         if(!c)
1675                 return NULL;
1676         c->ev = (struct internal_event*)calloc(1,
1677                 sizeof(struct internal_event));
1678         if(!c->ev) {
1679                 free(c);
1680                 return NULL;
1681         }
1682         c->ev->base = base;
1683         c->fd = -1;
1684         c->buffer = ldns_buffer_new(bufsize);
1685         if(!c->buffer) {
1686                 free(c->ev);
1687                 free(c);
1688                 return NULL;
1689         }
1690         c->timeout = NULL;
1691         c->tcp_is_reading = 0;
1692         c->tcp_byte_count = 0;
1693         c->tcp_parent = NULL;
1694         c->max_tcp_count = 0;
1695         c->tcp_handlers = NULL;
1696         c->tcp_free = NULL;
1697         c->type = comm_tcp;
1698         c->tcp_do_close = 0;
1699         c->do_not_close = 0;
1700         c->tcp_do_toggle_rw = 1;
1701         c->tcp_check_nb_connect = 1;
1702         c->repinfo.c = c;
1703         c->callback = callback;
1704         c->cb_arg = callback_arg;
1705         evbits = EV_PERSIST | EV_WRITE;
1706         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1707         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1708         {
1709                 log_err("could not basetset tcpout event");
1710                 ldns_buffer_free(c->buffer);
1711                 free(c->ev);
1712                 free(c);
1713                 return NULL;
1714         }
1715
1716         return c;
1717 }
1718
1719 struct comm_point* 
1720 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1721         comm_point_callback_t* callback, void* callback_arg)
1722 {
1723         struct comm_point* c = (struct comm_point*)calloc(1,
1724                 sizeof(struct comm_point));
1725         short evbits;
1726         if(!c)
1727                 return NULL;
1728         c->ev = (struct internal_event*)calloc(1,
1729                 sizeof(struct internal_event));
1730         if(!c->ev) {
1731                 free(c);
1732                 return NULL;
1733         }
1734         c->ev->base = base;
1735         c->fd = fd;
1736         c->buffer = ldns_buffer_new(bufsize);
1737         if(!c->buffer) {
1738                 free(c->ev);
1739                 free(c);
1740                 return NULL;
1741         }
1742         c->timeout = NULL;
1743         c->tcp_is_reading = 1;
1744         c->tcp_byte_count = 0;
1745         c->tcp_parent = NULL;
1746         c->max_tcp_count = 0;
1747         c->tcp_handlers = NULL;
1748         c->tcp_free = NULL;
1749         c->type = comm_local;
1750         c->tcp_do_close = 0;
1751         c->do_not_close = 1;
1752         c->tcp_do_toggle_rw = 0;
1753         c->tcp_check_nb_connect = 0;
1754         c->callback = callback;
1755         c->cb_arg = callback_arg;
1756         /* libevent stuff */
1757         evbits = EV_PERSIST | EV_READ;
1758         event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback, 
1759                 c);
1760         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1761                 event_add(&c->ev->ev, c->timeout) != 0 )
1762         {
1763                 log_err("could not add localhdl event");
1764                 free(c->ev);
1765                 free(c);
1766                 return NULL;
1767         }
1768         return c;
1769 }
1770
1771 struct comm_point* 
1772 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
1773         comm_point_callback_t* callback, void* callback_arg)
1774 {
1775         struct comm_point* c = (struct comm_point*)calloc(1,
1776                 sizeof(struct comm_point));
1777         short evbits;
1778         if(!c)
1779                 return NULL;
1780         c->ev = (struct internal_event*)calloc(1,
1781                 sizeof(struct internal_event));
1782         if(!c->ev) {
1783                 free(c);
1784                 return NULL;
1785         }
1786         c->ev->base = base;
1787         c->fd = fd;
1788         c->buffer = NULL;
1789         c->timeout = NULL;
1790         c->tcp_is_reading = 0;
1791         c->tcp_byte_count = 0;
1792         c->tcp_parent = NULL;
1793         c->max_tcp_count = 0;
1794         c->tcp_handlers = NULL;
1795         c->tcp_free = NULL;
1796         c->type = comm_raw;
1797         c->tcp_do_close = 0;
1798         c->do_not_close = 1;
1799         c->tcp_do_toggle_rw = 0;
1800         c->tcp_check_nb_connect = 0;
1801         c->callback = callback;
1802         c->cb_arg = callback_arg;
1803         /* libevent stuff */
1804         if(writing)
1805                 evbits = EV_PERSIST | EV_WRITE;
1806         else    evbits = EV_PERSIST | EV_READ;
1807         event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback, 
1808                 c);
1809         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1810                 event_add(&c->ev->ev, c->timeout) != 0 )
1811         {
1812                 log_err("could not add rawhdl event");
1813                 free(c->ev);
1814                 free(c);
1815                 return NULL;
1816         }
1817         return c;
1818 }
1819
1820 void 
1821 comm_point_close(struct comm_point* c)
1822 {
1823         if(!c)
1824                 return;
1825         if(c->fd != -1)
1826                 if(event_del(&c->ev->ev) != 0) {
1827                         log_err("could not event_del on close");
1828                 }
1829         /* close fd after removing from event lists, or epoll.. is messed up */
1830         if(c->fd != -1 && !c->do_not_close) {
1831                 verbose(VERB_ALGO, "close fd %d", c->fd);
1832 #ifndef USE_WINSOCK
1833                 close(c->fd);
1834 #else
1835                 closesocket(c->fd);
1836 #endif
1837         }
1838         c->fd = -1;
1839 }
1840
1841 void 
1842 comm_point_delete(struct comm_point* c)
1843 {
1844         if(!c) 
1845                 return;
1846         if(c->type == comm_tcp && c->ssl) {
1847                 SSL_shutdown(c->ssl);
1848                 SSL_free(c->ssl);
1849         }
1850         comm_point_close(c);
1851         if(c->tcp_handlers) {
1852                 int i;
1853                 for(i=0; i<c->max_tcp_count; i++)
1854                         comm_point_delete(c->tcp_handlers[i]);
1855                 free(c->tcp_handlers);
1856         }
1857         free(c->timeout);
1858         if(c->type == comm_tcp || c->type == comm_local)
1859                 ldns_buffer_free(c->buffer);
1860         free(c->ev);
1861         free(c);
1862 }
1863
1864 void 
1865 comm_point_send_reply(struct comm_reply *repinfo)
1866 {
1867         log_assert(repinfo && repinfo->c);
1868         if(repinfo->c->type == comm_udp) {
1869                 if(repinfo->srctype)
1870                         comm_point_send_udp_msg_if(repinfo->c, 
1871                         repinfo->c->buffer, (struct sockaddr*)&repinfo->addr, 
1872                         repinfo->addrlen, repinfo);
1873                 else
1874                         comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
1875                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
1876         } else {
1877                 comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
1878         }
1879 }
1880
1881 void 
1882 comm_point_drop_reply(struct comm_reply* repinfo)
1883 {
1884         if(!repinfo)
1885                 return;
1886         log_assert(repinfo && repinfo->c);
1887         log_assert(repinfo->c->type != comm_tcp_accept);
1888         if(repinfo->c->type == comm_udp)
1889                 return;
1890         reclaim_tcp_handler(repinfo->c);
1891 }
1892
1893 void 
1894 comm_point_stop_listening(struct comm_point* c)
1895 {
1896         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
1897         if(event_del(&c->ev->ev) != 0) {
1898                 log_err("event_del error to stoplisten");
1899         }
1900 }
1901
1902 void 
1903 comm_point_start_listening(struct comm_point* c, int newfd, int sec)
1904 {
1905         verbose(VERB_ALGO, "comm point start listening %d", 
1906                 c->fd==-1?newfd:c->fd);
1907         if(c->type == comm_tcp_accept && !c->tcp_free) {
1908                 /* no use to start listening no free slots. */
1909                 return;
1910         }
1911         if(sec != -1 && sec != 0) {
1912                 if(!c->timeout) {
1913                         c->timeout = (struct timeval*)malloc(sizeof(
1914                                 struct timeval));
1915                         if(!c->timeout) {
1916                                 log_err("cpsl: malloc failed. No net read.");
1917                                 return;
1918                         }
1919                 }
1920                 c->ev->ev.ev_events |= EV_TIMEOUT;
1921 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
1922                 c->timeout->tv_sec = sec;
1923                 c->timeout->tv_usec = 0;
1924 #endif /* S_SPLINT_S */
1925         }
1926         if(c->type == comm_tcp) {
1927                 c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1928                 if(c->tcp_is_reading)
1929                         c->ev->ev.ev_events |= EV_READ;
1930                 else    c->ev->ev.ev_events |= EV_WRITE;
1931         }
1932         if(newfd != -1) {
1933                 if(c->fd != -1) {
1934 #ifndef USE_WINSOCK
1935                         close(c->fd);
1936 #else
1937                         closesocket(c->fd);
1938 #endif
1939                 }
1940                 c->fd = newfd;
1941                 c->ev->ev.ev_fd = c->fd;
1942         }
1943         if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
1944                 log_err("event_add failed. in cpsl.");
1945         }
1946 }
1947
1948 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
1949 {
1950         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
1951         if(event_del(&c->ev->ev) != 0) {
1952                 log_err("event_del error to cplf");
1953         }
1954         c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1955         if(rd) c->ev->ev.ev_events |= EV_READ;
1956         if(wr) c->ev->ev.ev_events |= EV_WRITE;
1957         if(event_add(&c->ev->ev, c->timeout) != 0) {
1958                 log_err("event_add failed. in cplf.");
1959         }
1960 }
1961
1962 size_t comm_point_get_mem(struct comm_point* c)
1963 {
1964         size_t s;
1965         if(!c) 
1966                 return 0;
1967         s = sizeof(*c) + sizeof(*c->ev);
1968         if(c->timeout) 
1969                 s += sizeof(*c->timeout);
1970         if(c->type == comm_tcp || c->type == comm_local)
1971                 s += sizeof(*c->buffer) + ldns_buffer_capacity(c->buffer);
1972         if(c->type == comm_tcp_accept) {
1973                 int i;
1974                 for(i=0; i<c->max_tcp_count; i++)
1975                         s += comm_point_get_mem(c->tcp_handlers[i]);
1976         }
1977         return s;
1978 }
1979
1980 struct comm_timer* 
1981 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
1982 {
1983         struct comm_timer *tm = (struct comm_timer*)calloc(1,
1984                 sizeof(struct comm_timer));
1985         if(!tm)
1986                 return NULL;
1987         tm->ev_timer = (struct internal_timer*)calloc(1,
1988                 sizeof(struct internal_timer));
1989         if(!tm->ev_timer) {
1990                 log_err("malloc failed");
1991                 free(tm);
1992                 return NULL;
1993         }
1994         tm->ev_timer->base = base;
1995         tm->callback = cb;
1996         tm->cb_arg = cb_arg;
1997         event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT, 
1998                 comm_timer_callback, tm);
1999         if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2000                 log_err("timer_create: event_base_set failed.");
2001                 free(tm->ev_timer);
2002                 free(tm);
2003                 return NULL;
2004         }
2005         return tm;
2006 }
2007
2008 void 
2009 comm_timer_disable(struct comm_timer* timer)
2010 {
2011         if(!timer)
2012                 return;
2013         evtimer_del(&timer->ev_timer->ev);
2014         timer->ev_timer->enabled = 0;
2015 }
2016
2017 void 
2018 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2019 {
2020         log_assert(tv);
2021         if(timer->ev_timer->enabled)
2022                 comm_timer_disable(timer);
2023         event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2024                 comm_timer_callback, timer);
2025         if(event_base_set(timer->ev_timer->base->eb->base, 
2026                 &timer->ev_timer->ev) != 0)
2027                 log_err("comm_timer_set: set_base failed.");
2028         if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2029                 log_err("comm_timer_set: evtimer_add failed.");
2030         timer->ev_timer->enabled = 1;
2031 }
2032
2033 void 
2034 comm_timer_delete(struct comm_timer* timer)
2035 {
2036         if(!timer)
2037                 return;
2038         comm_timer_disable(timer);
2039         free(timer->ev_timer);
2040         free(timer);
2041 }
2042
2043 void 
2044 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2045 {
2046         struct comm_timer* tm = (struct comm_timer*)arg;
2047         if(!(event&EV_TIMEOUT))
2048                 return;
2049         comm_base_now(tm->ev_timer->base);
2050         tm->ev_timer->enabled = 0;
2051         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2052         (*tm->callback)(tm->cb_arg);
2053 }
2054
2055 int 
2056 comm_timer_is_set(struct comm_timer* timer)
2057 {
2058         return (int)timer->ev_timer->enabled;
2059 }
2060
2061 size_t 
2062 comm_timer_get_mem(struct comm_timer* timer)
2063 {
2064         return sizeof(*timer) + sizeof(struct internal_timer);
2065 }
2066
2067 struct comm_signal* 
2068 comm_signal_create(struct comm_base* base,
2069         void (*callback)(int, void*), void* cb_arg)
2070 {
2071         struct comm_signal* com = (struct comm_signal*)malloc(
2072                 sizeof(struct comm_signal));
2073         if(!com) {
2074                 log_err("malloc failed");
2075                 return NULL;
2076         }
2077         com->base = base;
2078         com->callback = callback;
2079         com->cb_arg = cb_arg;
2080         com->ev_signal = NULL;
2081         return com;
2082 }
2083
2084 void 
2085 comm_signal_callback(int sig, short event, void* arg)
2086 {
2087         struct comm_signal* comsig = (struct comm_signal*)arg;
2088         if(!(event & EV_SIGNAL))
2089                 return;
2090         comm_base_now(comsig->base);
2091         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2092         (*comsig->callback)(sig, comsig->cb_arg);
2093 }
2094
2095 int 
2096 comm_signal_bind(struct comm_signal* comsig, int sig)
2097 {
2098         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
2099                 sizeof(struct internal_signal));
2100         if(!entry) {
2101                 log_err("malloc failed");
2102                 return 0;
2103         }
2104         log_assert(comsig);
2105         /* add signal event */
2106         signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2107         if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2108                 log_err("Could not set signal base");
2109                 free(entry);
2110                 return 0;
2111         }
2112         if(signal_add(&entry->ev, NULL) != 0) {
2113                 log_err("Could not add signal handler");
2114                 free(entry);
2115                 return 0;
2116         }
2117         /* link into list */
2118         entry->next = comsig->ev_signal;
2119         comsig->ev_signal = entry;
2120         return 1;
2121 }
2122
2123 void 
2124 comm_signal_delete(struct comm_signal* comsig)
2125 {
2126         struct internal_signal* p, *np;
2127         if(!comsig)
2128                 return;
2129         p=comsig->ev_signal;
2130         while(p) {
2131                 np = p->next;
2132                 signal_del(&p->ev);
2133                 free(p);
2134                 p = np;
2135         }
2136         free(comsig);
2137 }