]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - contrib/unbound/util/netevent.c
Copy head (r256279) to stable/10 as part of the 10.0-RELEASE cycle.
[FreeBSD/stable/10.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include <ldns/wire2host.h>
43 #include "util/netevent.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/fptr_wlist.h"
47 #ifdef HAVE_OPENSSL_SSL_H
48 #include <openssl/ssl.h>
49 #endif
50 #ifdef HAVE_OPENSSL_ERR_H
51 #include <openssl/err.h>
52 #endif
53
54 /* -------- Start of local definitions -------- */
55 /** if CMSG_ALIGN is not defined on this platform, a workaround */
56 #ifndef CMSG_ALIGN
57 #  ifdef _CMSG_DATA_ALIGN
58 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
59 #  else
60 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
61 #  endif
62 #endif
63
64 /** if CMSG_LEN is not defined on this platform, a workaround */
65 #ifndef CMSG_LEN
66 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
67 #endif
68
69 /** if CMSG_SPACE is not defined on this platform, a workaround */
70 #ifndef CMSG_SPACE
71 #  ifdef _CMSG_HDR_ALIGN
72 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
73 #  else
74 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
75 #  endif
76 #endif
77
78 /** The TCP reading or writing query timeout in seconds */
79 #define TCP_QUERY_TIMEOUT 120 
80
81 #ifndef NONBLOCKING_IS_BROKEN
82 /** number of UDP reads to perform per read indication from select */
83 #define NUM_UDP_PER_SELECT 100
84 #else
85 #define NUM_UDP_PER_SELECT 1
86 #endif
87
88 /* We define libevent structures here to hide the libevent stuff. */
89
90 #ifdef USE_MINI_EVENT
91 #  ifdef USE_WINSOCK
92 #    include "util/winsock_event.h"
93 #  else
94 #    include "util/mini_event.h"
95 #  endif /* USE_WINSOCK */
96 #else /* USE_MINI_EVENT */
97    /* we use libevent */
98 #  ifdef HAVE_EVENT_H
99 #    include <event.h>
100 #  else
101 #    include "event2/event.h"
102 #    include "event2/event_struct.h"
103 #    include "event2/event_compat.h"
104 #  endif
105 #endif /* USE_MINI_EVENT */
106
107 /**
108  * The internal event structure for keeping libevent info for the event.
109  * Possibly other structures (list, tree) this is part of.
110  */
111 struct internal_event {
112         /** the comm base */
113         struct comm_base* base;
114         /** libevent event type, alloced here */
115         struct event ev;
116 };
117
118 /**
119  * Internal base structure, so that every thread has its own events.
120  */
121 struct internal_base {
122         /** libevent event_base type. */
123         struct event_base* base;
124         /** seconds time pointer points here */
125         uint32_t secs;
126         /** timeval with current time */
127         struct timeval now;
128         /** the event used for slow_accept timeouts */
129         struct event slow_accept;
130         /** true if slow_accept is enabled */
131         int slow_accept_enabled;
132 };
133
134 /**
135  * Internal timer structure, to store timer event in.
136  */
137 struct internal_timer {
138         /** the comm base */
139         struct comm_base* base;
140         /** libevent event type, alloced here */
141         struct event ev;
142         /** is timer enabled */
143         uint8_t enabled;
144 };
145
146 /**
147  * Internal signal structure, to store signal event in.
148  */
149 struct internal_signal {
150         /** libevent event type, alloced here */
151         struct event ev;
152         /** next in signal list */
153         struct internal_signal* next;
154 };
155
156 /** create a tcp handler with a parent */
157 static struct comm_point* comm_point_create_tcp_handler(
158         struct comm_base *base, struct comm_point* parent, size_t bufsize,
159         comm_point_callback_t* callback, void* callback_arg);
160
161 /* -------- End of local definitions -------- */
162
163 #ifdef USE_MINI_EVENT
164 /** minievent updates the time when it blocks. */
165 #define comm_base_now(x) /* nothing to do */
166 #else /* !USE_MINI_EVENT */
167 /** fillup the time values in the event base */
168 static void
169 comm_base_now(struct comm_base* b)
170 {
171         if(gettimeofday(&b->eb->now, NULL) < 0) {
172                 log_err("gettimeofday: %s", strerror(errno));
173         }
174         b->eb->secs = (uint32_t)b->eb->now.tv_sec;
175 }
176 #endif /* USE_MINI_EVENT */
177
178 struct comm_base* 
179 comm_base_create(int sigs)
180 {
181         struct comm_base* b = (struct comm_base*)calloc(1,
182                 sizeof(struct comm_base));
183         if(!b)
184                 return NULL;
185         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
186         if(!b->eb) {
187                 free(b);
188                 return NULL;
189         }
190 #ifdef USE_MINI_EVENT
191         (void)sigs;
192         /* use mini event time-sharing feature */
193         b->eb->base = event_init(&b->eb->secs, &b->eb->now);
194 #else
195 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
196         /* libev */
197         if(sigs)
198                 b->eb->base=(struct event_base *)ev_default_loop(EVFLAG_AUTO);
199         else
200                 b->eb->base=(struct event_base *)ev_loop_new(EVFLAG_AUTO);
201 #  else
202         (void)sigs;
203 #    ifdef HAVE_EVENT_BASE_NEW
204         b->eb->base = event_base_new();
205 #    else
206         b->eb->base = event_init();
207 #    endif
208 #  endif
209 #endif
210         if(!b->eb->base) {
211                 free(b->eb);
212                 free(b);
213                 return NULL;
214         }
215         comm_base_now(b);
216         /* avoid event_get_method call which causes crashes even when
217          * not printing, because its result is passed */
218         verbose(VERB_ALGO, 
219 #if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
220                 "libev"
221 #elif defined(USE_MINI_EVENT)
222                 "event "
223 #else
224                 "libevent "
225 #endif
226                 "%s uses %s method.", 
227                 event_get_version(), 
228 #ifdef HAVE_EVENT_BASE_GET_METHOD
229                 event_base_get_method(b->eb->base)
230 #else
231                 "not_obtainable"
232 #endif
233         );
234         return b;
235 }
236
237 void 
238 comm_base_delete(struct comm_base* b)
239 {
240         if(!b)
241                 return;
242         if(b->eb->slow_accept_enabled) {
243                 if(event_del(&b->eb->slow_accept) != 0) {
244                         log_err("could not event_del slow_accept");
245                 }
246         }
247 #ifdef USE_MINI_EVENT
248         event_base_free(b->eb->base);
249 #elif defined(HAVE_EVENT_BASE_FREE) && defined(HAVE_EVENT_BASE_ONCE)
250         /* only libevent 1.2+ has it, but in 1.2 it is broken - 
251            assertion fails on signal handling ev that is not deleted
252            in libevent 1.3c (event_base_once appears) this is fixed. */
253         event_base_free(b->eb->base);
254 #endif /* HAVE_EVENT_BASE_FREE and HAVE_EVENT_BASE_ONCE */
255         b->eb->base = NULL;
256         free(b->eb);
257         free(b);
258 }
259
260 void 
261 comm_base_timept(struct comm_base* b, uint32_t** tt, struct timeval** tv)
262 {
263         *tt = &b->eb->secs;
264         *tv = &b->eb->now;
265 }
266
267 void 
268 comm_base_dispatch(struct comm_base* b)
269 {
270         int retval;
271         retval = event_base_dispatch(b->eb->base);
272         if(retval != 0) {
273                 fatal_exit("event_dispatch returned error %d, "
274                         "errno is %s", retval, strerror(errno));
275         }
276 }
277
278 void comm_base_exit(struct comm_base* b)
279 {
280         if(event_base_loopexit(b->eb->base, NULL) != 0) {
281                 log_err("Could not loopexit");
282         }
283 }
284
285 void comm_base_set_slow_accept_handlers(struct comm_base* b,
286         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
287 {
288         b->stop_accept = stop_acc;
289         b->start_accept = start_acc;
290         b->cb_arg = arg;
291 }
292
293 struct event_base* comm_base_internal(struct comm_base* b)
294 {
295         return b->eb->base;
296 }
297
298 /** see if errno for udp has to be logged or not uses globals */
299 static int
300 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
301 {
302         /* do not log transient errors (unless high verbosity) */
303 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
304         switch(errno) {
305 #  ifdef ENETUNREACH
306                 case ENETUNREACH:
307 #  endif
308 #  ifdef EHOSTDOWN
309                 case EHOSTDOWN:
310 #  endif
311 #  ifdef EHOSTUNREACH
312                 case EHOSTUNREACH:
313 #  endif
314 #  ifdef ENETDOWN
315                 case ENETDOWN:
316 #  endif
317                         if(verbosity < VERB_ALGO)
318                                 return 0;
319                 default:
320                         break;
321         }
322 #endif
323         /* squelch errors where people deploy AAAA ::ffff:bla for
324          * authority servers, which we try for intranets. */
325         if(errno == EINVAL && addr_is_ip4mapped(
326                 (struct sockaddr_storage*)addr, addrlen) &&
327                 verbosity < VERB_DETAIL)
328                 return 0;
329         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
330          * but a dns cache does not need it. */
331         if(errno == EACCES && addr_is_broadcast(
332                 (struct sockaddr_storage*)addr, addrlen) &&
333                 verbosity < VERB_DETAIL)
334                 return 0;
335         return 1;
336 }
337
338 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
339 {
340         return udp_send_errno_needs_log(addr, addrlen);
341 }
342
343 /* send a UDP reply */
344 int
345 comm_point_send_udp_msg(struct comm_point *c, ldns_buffer* packet,
346         struct sockaddr* addr, socklen_t addrlen) 
347 {
348         ssize_t sent;
349         log_assert(c->fd != -1);
350 #ifdef UNBOUND_DEBUG
351         if(ldns_buffer_remaining(packet) == 0)
352                 log_err("error: send empty UDP packet");
353 #endif
354         log_assert(addr && addrlen > 0);
355         sent = sendto(c->fd, (void*)ldns_buffer_begin(packet), 
356                 ldns_buffer_remaining(packet), 0,
357                 addr, addrlen);
358         if(sent == -1) {
359                 if(!udp_send_errno_needs_log(addr, addrlen))
360                         return 0;
361 #ifndef USE_WINSOCK
362                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
363 #else
364                 verbose(VERB_OPS, "sendto failed: %s", 
365                         wsa_strerror(WSAGetLastError()));
366 #endif
367                 log_addr(VERB_OPS, "remote address is", 
368                         (struct sockaddr_storage*)addr, addrlen);
369                 return 0;
370         } else if((size_t)sent != ldns_buffer_remaining(packet)) {
371                 log_err("sent %d in place of %d bytes", 
372                         (int)sent, (int)ldns_buffer_remaining(packet));
373                 return 0;
374         }
375         return 1;
376 }
377
378 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
379 /** print debug ancillary info */
380 static void p_ancil(const char* str, struct comm_reply* r)
381 {
382         if(r->srctype != 4 && r->srctype != 6) {
383                 log_info("%s: unknown srctype %d", str, r->srctype);
384                 return;
385         }
386         if(r->srctype == 6) {
387                 char buf[1024];
388                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
389                         buf, (socklen_t)sizeof(buf)) == 0) {
390                         strncpy(buf, "(inet_ntop error)", sizeof(buf));
391                 }
392                 buf[sizeof(buf)-1]=0;
393                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
394         } else if(r->srctype == 4) {
395 #ifdef IP_PKTINFO
396                 char buf1[1024], buf2[1024];
397                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
398                         buf1, (socklen_t)sizeof(buf1)) == 0) {
399                         strncpy(buf1, "(inet_ntop error)", sizeof(buf1));
400                 }
401                 buf1[sizeof(buf1)-1]=0;
402 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
403                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
404                         buf2, (socklen_t)sizeof(buf2)) == 0) {
405                         strncpy(buf2, "(inet_ntop error)", sizeof(buf2));
406                 }
407                 buf2[sizeof(buf2)-1]=0;
408 #else
409                 buf2[0]=0;
410 #endif
411                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
412                         buf1, buf2);
413 #elif defined(IP_RECVDSTADDR)
414                 char buf1[1024];
415                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
416                         buf1, (socklen_t)sizeof(buf1)) == 0) {
417                         strncpy(buf1, "(inet_ntop error)", sizeof(buf1));
418                 }
419                 buf1[sizeof(buf1)-1]=0;
420                 log_info("%s: %s", str, buf1);
421 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
422         }
423 }
424 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
425
426 /** send a UDP reply over specified interface*/
427 static int
428 comm_point_send_udp_msg_if(struct comm_point *c, ldns_buffer* packet,
429         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
430 {
431 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
432         ssize_t sent;
433         struct msghdr msg;
434         struct iovec iov[1];
435         char control[256];
436 #ifndef S_SPLINT_S
437         struct cmsghdr *cmsg;
438 #endif /* S_SPLINT_S */
439
440         log_assert(c->fd != -1);
441 #ifdef UNBOUND_DEBUG
442         if(ldns_buffer_remaining(packet) == 0)
443                 log_err("error: send empty UDP packet");
444 #endif
445         log_assert(addr && addrlen > 0);
446
447         msg.msg_name = addr;
448         msg.msg_namelen = addrlen;
449         iov[0].iov_base = ldns_buffer_begin(packet);
450         iov[0].iov_len = ldns_buffer_remaining(packet);
451         msg.msg_iov = iov;
452         msg.msg_iovlen = 1;
453         msg.msg_control = control;
454 #ifndef S_SPLINT_S
455         msg.msg_controllen = sizeof(control);
456 #endif /* S_SPLINT_S */
457         msg.msg_flags = 0;
458
459 #ifndef S_SPLINT_S
460         cmsg = CMSG_FIRSTHDR(&msg);
461         if(r->srctype == 4) {
462 #ifdef IP_PKTINFO
463                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
464                 log_assert(msg.msg_controllen <= sizeof(control));
465                 cmsg->cmsg_level = IPPROTO_IP;
466                 cmsg->cmsg_type = IP_PKTINFO;
467                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
468                         sizeof(struct in_pktinfo));
469                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
470 #elif defined(IP_SENDSRCADDR)
471                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
472                 log_assert(msg.msg_controllen <= sizeof(control));
473                 cmsg->cmsg_level = IPPROTO_IP;
474                 cmsg->cmsg_type = IP_SENDSRCADDR;
475                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
476                         sizeof(struct in_addr));
477                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
478 #else
479                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
480                 msg.msg_control = NULL;
481 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
482         } else if(r->srctype == 6) {
483                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
484                 log_assert(msg.msg_controllen <= sizeof(control));
485                 cmsg->cmsg_level = IPPROTO_IPV6;
486                 cmsg->cmsg_type = IPV6_PKTINFO;
487                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
488                         sizeof(struct in6_pktinfo));
489                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
490         } else {
491                 /* try to pass all 0 to use default route */
492                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
493                 log_assert(msg.msg_controllen <= sizeof(control));
494                 cmsg->cmsg_level = IPPROTO_IPV6;
495                 cmsg->cmsg_type = IPV6_PKTINFO;
496                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
497                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
498         }
499 #endif /* S_SPLINT_S */
500         if(verbosity >= VERB_ALGO)
501                 p_ancil("send_udp over interface", r);
502         sent = sendmsg(c->fd, &msg, 0);
503         if(sent == -1) {
504                 if(!udp_send_errno_needs_log(addr, addrlen))
505                         return 0;
506                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
507                 log_addr(VERB_OPS, "remote address is", 
508                         (struct sockaddr_storage*)addr, addrlen);
509                 return 0;
510         } else if((size_t)sent != ldns_buffer_remaining(packet)) {
511                 log_err("sent %d in place of %d bytes", 
512                         (int)sent, (int)ldns_buffer_remaining(packet));
513                 return 0;
514         }
515         return 1;
516 #else
517         (void)c;
518         (void)packet;
519         (void)addr;
520         (void)addrlen;
521         (void)r;
522         log_err("sendmsg: IPV6_PKTINFO not supported");
523         return 0;
524 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
525 }
526
527 void 
528 comm_point_udp_ancil_callback(int fd, short event, void* arg)
529 {
530 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
531         struct comm_reply rep;
532         struct msghdr msg;
533         struct iovec iov[1];
534         ssize_t rcv;
535         char ancil[256];
536         int i;
537 #ifndef S_SPLINT_S
538         struct cmsghdr* cmsg;
539 #endif /* S_SPLINT_S */
540
541         rep.c = (struct comm_point*)arg;
542         log_assert(rep.c->type == comm_udp);
543
544         if(!(event&EV_READ))
545                 return;
546         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
547         comm_base_now(rep.c->ev->base);
548         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
549                 ldns_buffer_clear(rep.c->buffer);
550                 rep.addrlen = (socklen_t)sizeof(rep.addr);
551                 log_assert(fd != -1);
552                 log_assert(ldns_buffer_remaining(rep.c->buffer) > 0);
553                 msg.msg_name = &rep.addr;
554                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
555                 iov[0].iov_base = ldns_buffer_begin(rep.c->buffer);
556                 iov[0].iov_len = ldns_buffer_remaining(rep.c->buffer);
557                 msg.msg_iov = iov;
558                 msg.msg_iovlen = 1;
559                 msg.msg_control = ancil;
560 #ifndef S_SPLINT_S
561                 msg.msg_controllen = sizeof(ancil);
562 #endif /* S_SPLINT_S */
563                 msg.msg_flags = 0;
564                 rcv = recvmsg(fd, &msg, 0);
565                 if(rcv == -1) {
566                         if(errno != EAGAIN && errno != EINTR) {
567                                 log_err("recvmsg failed: %s", strerror(errno));
568                         }
569                         return;
570                 }
571                 rep.addrlen = msg.msg_namelen;
572                 ldns_buffer_skip(rep.c->buffer, rcv);
573                 ldns_buffer_flip(rep.c->buffer);
574                 rep.srctype = 0;
575 #ifndef S_SPLINT_S
576                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
577                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
578                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
579                                 cmsg->cmsg_type == IPV6_PKTINFO) {
580                                 rep.srctype = 6;
581                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
582                                         sizeof(struct in6_pktinfo));
583                                 break;
584 #ifdef IP_PKTINFO
585                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
586                                 cmsg->cmsg_type == IP_PKTINFO) {
587                                 rep.srctype = 4;
588                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
589                                         sizeof(struct in_pktinfo));
590                                 break;
591 #elif defined(IP_RECVDSTADDR)
592                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
593                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
594                                 rep.srctype = 4;
595                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
596                                         sizeof(struct in_addr));
597                                 break;
598 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
599                         }
600                 }
601                 if(verbosity >= VERB_ALGO)
602                         p_ancil("receive_udp on interface", &rep);
603 #endif /* S_SPLINT_S */
604                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
605                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
606                         /* send back immediate reply */
607                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
608                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
609                 }
610                 if(rep.c->fd == -1) /* commpoint closed */
611                         break;
612         }
613 #else
614         (void)fd;
615         (void)event;
616         (void)arg;
617         fatal_exit("recvmsg: No support for IPV6_PKTINFO. "
618                 "Please disable interface-automatic");
619 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
620 }
621
622 void 
623 comm_point_udp_callback(int fd, short event, void* arg)
624 {
625         struct comm_reply rep;
626         ssize_t rcv;
627         int i;
628
629         rep.c = (struct comm_point*)arg;
630         log_assert(rep.c->type == comm_udp);
631
632         if(!(event&EV_READ))
633                 return;
634         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
635         comm_base_now(rep.c->ev->base);
636         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
637                 ldns_buffer_clear(rep.c->buffer);
638                 rep.addrlen = (socklen_t)sizeof(rep.addr);
639                 log_assert(fd != -1);
640                 log_assert(ldns_buffer_remaining(rep.c->buffer) > 0);
641                 rcv = recvfrom(fd, (void*)ldns_buffer_begin(rep.c->buffer), 
642                         ldns_buffer_remaining(rep.c->buffer), 0, 
643                         (struct sockaddr*)&rep.addr, &rep.addrlen);
644                 if(rcv == -1) {
645 #ifndef USE_WINSOCK
646                         if(errno != EAGAIN && errno != EINTR)
647                                 log_err("recvfrom %d failed: %s", 
648                                         fd, strerror(errno));
649 #else
650                         if(WSAGetLastError() != WSAEINPROGRESS &&
651                                 WSAGetLastError() != WSAECONNRESET &&
652                                 WSAGetLastError()!= WSAEWOULDBLOCK)
653                                 log_err("recvfrom failed: %s",
654                                         wsa_strerror(WSAGetLastError()));
655 #endif
656                         return;
657                 }
658                 ldns_buffer_skip(rep.c->buffer, rcv);
659                 ldns_buffer_flip(rep.c->buffer);
660                 rep.srctype = 0;
661                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
662                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
663                         /* send back immediate reply */
664                         (void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
665                                 (struct sockaddr*)&rep.addr, rep.addrlen);
666                 }
667                 if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
668                 another UDP port. Note rep.c cannot be reused with TCP fd. */
669                         break;
670         }
671 }
672
673 /** Use a new tcp handler for new query fd, set to read query */
674 static void
675 setup_tcp_handler(struct comm_point* c, int fd) 
676 {
677         log_assert(c->type == comm_tcp);
678         log_assert(c->fd == -1);
679         ldns_buffer_clear(c->buffer);
680         c->tcp_is_reading = 1;
681         c->tcp_byte_count = 0;
682         comm_point_start_listening(c, fd, TCP_QUERY_TIMEOUT);
683 }
684
685 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
686         short ATTR_UNUSED(event), void* arg)
687 {
688         struct comm_base* b = (struct comm_base*)arg;
689         /* timeout for the slow accept, re-enable accepts again */
690         if(b->start_accept) {
691                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
692                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
693                 (*b->start_accept)(b->cb_arg);
694                 b->eb->slow_accept_enabled = 0;
695         }
696 }
697
698 int comm_point_perform_accept(struct comm_point* c,
699         struct sockaddr_storage* addr, socklen_t* addrlen)
700 {
701         int new_fd;
702         *addrlen = (socklen_t)sizeof(*addr);
703         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
704         if(new_fd == -1) {
705 #ifndef USE_WINSOCK
706                 /* EINTR is signal interrupt. others are closed connection. */
707                 if(     errno == EINTR || errno == EAGAIN
708 #ifdef EWOULDBLOCK
709                         || errno == EWOULDBLOCK 
710 #endif
711 #ifdef ECONNABORTED
712                         || errno == ECONNABORTED 
713 #endif
714 #ifdef EPROTO
715                         || errno == EPROTO
716 #endif /* EPROTO */
717                         )
718                         return -1;
719 #if defined(ENFILE) && defined(EMFILE)
720                 if(errno == ENFILE || errno == EMFILE) {
721                         /* out of file descriptors, likely outside of our
722                          * control. stop accept() calls for some time */
723                         if(c->ev->base->stop_accept) {
724                                 struct comm_base* b = c->ev->base;
725                                 struct timeval tv;
726                                 verbose(VERB_ALGO, "out of file descriptors: "
727                                         "slow accept");
728                                 b->eb->slow_accept_enabled = 1;
729                                 fptr_ok(fptr_whitelist_stop_accept(
730                                         b->stop_accept));
731                                 (*b->stop_accept)(b->cb_arg);
732                                 /* set timeout, no mallocs */
733                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
734                                 tv.tv_usec = NETEVENT_SLOW_ACCEPT_TIME%1000;
735                                 event_set(&b->eb->slow_accept, -1, EV_TIMEOUT, 
736                                         comm_base_handle_slow_accept, b);
737                                 if(event_base_set(b->eb->base,
738                                         &b->eb->slow_accept) != 0) {
739                                         /* we do not want to log here, because
740                                          * that would spam the logfiles.
741                                          * error: "event_base_set failed." */
742                                 }
743                                 if(event_add(&b->eb->slow_accept, &tv) != 0) {
744                                         /* we do not want to log here,
745                                          * error: "event_add failed." */
746                                 }
747                         }
748                         return -1;
749                 }
750 #endif
751                 log_err("accept failed: %s", strerror(errno));
752 #else /* USE_WINSOCK */
753                 if(WSAGetLastError() == WSAEINPROGRESS ||
754                         WSAGetLastError() == WSAECONNRESET)
755                         return -1;
756                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
757                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
758                         return -1;
759                 }
760                 log_err("accept failed: %s", wsa_strerror(WSAGetLastError()));
761 #endif
762                 log_addr(0, "remote address is", addr, *addrlen);
763                 return -1;
764         }
765         fd_set_nonblock(new_fd);
766         return new_fd;
767 }
768
769 #ifdef USE_WINSOCK
770 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
771         int ATTR_UNUSED(argi), long argl, long retvalue)
772 {
773         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
774                 (oper&BIO_CB_RETURN)?"return":"before",
775                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
776                 WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
777         /* on windows, check if previous operation caused EWOULDBLOCK */
778         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
779                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
780                 if(WSAGetLastError() == WSAEWOULDBLOCK)
781                         winsock_tcp_wouldblock((struct event*)
782                                 BIO_get_callback_arg(b), EV_READ);
783         }
784         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
785                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
786                 if(WSAGetLastError() == WSAEWOULDBLOCK)
787                         winsock_tcp_wouldblock((struct event*)
788                                 BIO_get_callback_arg(b), EV_WRITE);
789         }
790         /* return original return value */
791         return retvalue;
792 }
793
794 /** set win bio callbacks for nonblocking operations */
795 void
796 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
797 {
798         SSL* ssl = (SSL*)thessl;
799         /* set them both just in case, but usually they are the same BIO */
800         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
801         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)&c->ev->ev);
802         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
803         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)&c->ev->ev);
804 }
805 #endif
806
807 void 
808 comm_point_tcp_accept_callback(int fd, short event, void* arg)
809 {
810         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
811         int new_fd;
812         log_assert(c->type == comm_tcp_accept);
813         if(!(event & EV_READ)) {
814                 log_info("ignoring tcp accept event %d", (int)event);
815                 return;
816         }
817         comm_base_now(c->ev->base);
818         /* find free tcp handler. */
819         if(!c->tcp_free) {
820                 log_warn("accepted too many tcp, connections full");
821                 return;
822         }
823         /* accept incoming connection. */
824         c_hdl = c->tcp_free;
825         log_assert(fd != -1);
826         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
827                 &c_hdl->repinfo.addrlen);
828         if(new_fd == -1)
829                 return;
830         if(c->ssl) {
831                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
832                 if(!c_hdl->ssl) {
833                         c_hdl->fd = new_fd;
834                         comm_point_close(c_hdl);
835                         return;
836                 }
837                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
838 #ifdef USE_WINSOCK
839                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
840 #endif
841         }
842
843         /* grab the tcp handler buffers */
844         c->tcp_free = c_hdl->tcp_free;
845         if(!c->tcp_free) {
846                 /* stop accepting incoming queries for now. */
847                 comm_point_stop_listening(c);
848         }
849         /* addr is dropped. Not needed for tcp reply. */
850         setup_tcp_handler(c_hdl, new_fd);
851 }
852
853 /** Make tcp handler free for next assignment */
854 static void
855 reclaim_tcp_handler(struct comm_point* c)
856 {
857         log_assert(c->type == comm_tcp);
858         if(c->ssl) {
859 #ifdef HAVE_SSL
860                 SSL_shutdown(c->ssl);
861                 SSL_free(c->ssl);
862                 c->ssl = NULL;
863 #endif
864         }
865         comm_point_close(c);
866         if(c->tcp_parent) {
867                 c->tcp_free = c->tcp_parent->tcp_free;
868                 c->tcp_parent->tcp_free = c;
869                 if(!c->tcp_free) {
870                         /* re-enable listening on accept socket */
871                         comm_point_start_listening(c->tcp_parent, -1, -1);
872                 }
873         }
874 }
875
876 /** do the callback when writing is done */
877 static void
878 tcp_callback_writer(struct comm_point* c)
879 {
880         log_assert(c->type == comm_tcp);
881         ldns_buffer_clear(c->buffer);
882         if(c->tcp_do_toggle_rw)
883                 c->tcp_is_reading = 1;
884         c->tcp_byte_count = 0;
885         /* switch from listening(write) to listening(read) */
886         comm_point_stop_listening(c);
887         comm_point_start_listening(c, -1, -1);
888 }
889
890 /** do the callback when reading is done */
891 static void
892 tcp_callback_reader(struct comm_point* c)
893 {
894         log_assert(c->type == comm_tcp || c->type == comm_local);
895         ldns_buffer_flip(c->buffer);
896         if(c->tcp_do_toggle_rw)
897                 c->tcp_is_reading = 0;
898         c->tcp_byte_count = 0;
899         if(c->type == comm_tcp)
900                 comm_point_stop_listening(c);
901         fptr_ok(fptr_whitelist_comm_point(c->callback));
902         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
903                 comm_point_start_listening(c, -1, TCP_QUERY_TIMEOUT);
904         }
905 }
906
907 /** continue ssl handshake */
908 #ifdef HAVE_SSL
909 static int
910 ssl_handshake(struct comm_point* c)
911 {
912         int r;
913         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
914                 /* read condition satisfied back to writing */
915                 comm_point_listen_for_rw(c, 1, 1);
916                 c->ssl_shake_state = comm_ssl_shake_none;
917                 return 1;
918         }
919         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
920                 /* write condition satisfied, back to reading */
921                 comm_point_listen_for_rw(c, 1, 0);
922                 c->ssl_shake_state = comm_ssl_shake_none;
923                 return 1;
924         }
925
926         ERR_clear_error();
927         r = SSL_do_handshake(c->ssl);
928         if(r != 1) {
929                 int want = SSL_get_error(c->ssl, r);
930                 if(want == SSL_ERROR_WANT_READ) {
931                         if(c->ssl_shake_state == comm_ssl_shake_read)
932                                 return 1;
933                         c->ssl_shake_state = comm_ssl_shake_read;
934                         comm_point_listen_for_rw(c, 1, 0);
935                         return 1;
936                 } else if(want == SSL_ERROR_WANT_WRITE) {
937                         if(c->ssl_shake_state == comm_ssl_shake_write)
938                                 return 1;
939                         c->ssl_shake_state = comm_ssl_shake_write;
940                         comm_point_listen_for_rw(c, 0, 1);
941                         return 1;
942                 } else if(r == 0) {
943                         return 0; /* closed */
944                 } else if(want == SSL_ERROR_SYSCALL) {
945                         /* SYSCALL and errno==0 means closed uncleanly */
946                         if(errno != 0)
947                                 log_err("SSL_handshake syscall: %s",
948                                         strerror(errno));
949                         return 0;
950                 } else {
951                         log_crypto_err("ssl handshake failed");
952                         log_addr(1, "ssl handshake failed", &c->repinfo.addr,
953                                 c->repinfo.addrlen);
954                         return 0;
955                 }
956         }
957         /* this is where peer verification could take place */
958         log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
959                 c->repinfo.addrlen);
960
961         /* setup listen rw correctly */
962         if(c->tcp_is_reading) {
963                 if(c->ssl_shake_state != comm_ssl_shake_read)
964                         comm_point_listen_for_rw(c, 1, 0);
965         } else {
966                 comm_point_listen_for_rw(c, 1, 1);
967         }
968         c->ssl_shake_state = comm_ssl_shake_none;
969         return 1;
970 }
971 #endif /* HAVE_SSL */
972
973 /** ssl read callback on TCP */
974 static int
975 ssl_handle_read(struct comm_point* c)
976 {
977 #ifdef HAVE_SSL
978         int r;
979         if(c->ssl_shake_state != comm_ssl_shake_none) {
980                 if(!ssl_handshake(c))
981                         return 0;
982                 if(c->ssl_shake_state != comm_ssl_shake_none)
983                         return 1;
984         }
985         if(c->tcp_byte_count < sizeof(uint16_t)) {
986                 /* read length bytes */
987                 ERR_clear_error();
988                 if((r=SSL_read(c->ssl, (void*)ldns_buffer_at(c->buffer,
989                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
990                         c->tcp_byte_count))) <= 0) {
991                         int want = SSL_get_error(c->ssl, r);
992                         if(want == SSL_ERROR_ZERO_RETURN) {
993                                 return 0; /* shutdown, closed */
994                         } else if(want == SSL_ERROR_WANT_READ) {
995                                 return 1; /* read more later */
996                         } else if(want == SSL_ERROR_WANT_WRITE) {
997                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
998                                 comm_point_listen_for_rw(c, 0, 1);
999                                 return 1;
1000                         } else if(want == SSL_ERROR_SYSCALL) {
1001                                 if(errno != 0)
1002                                         log_err("SSL_read syscall: %s",
1003                                                 strerror(errno));
1004                                 return 0;
1005                         }
1006                         log_crypto_err("could not SSL_read");
1007                         return 0;
1008                 }
1009                 c->tcp_byte_count += r;
1010                 if(c->tcp_byte_count != sizeof(uint16_t))
1011                         return 1;
1012                 if(ldns_buffer_read_u16_at(c->buffer, 0) >
1013                         ldns_buffer_capacity(c->buffer)) {
1014                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1015                         return 0;
1016                 }
1017                 ldns_buffer_set_limit(c->buffer,
1018                         ldns_buffer_read_u16_at(c->buffer, 0));
1019                 if(ldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1020                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1021                         return 0;
1022                 }
1023                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1024                         (int)ldns_buffer_limit(c->buffer));
1025         }
1026         log_assert(ldns_buffer_remaining(c->buffer) > 0);
1027         ERR_clear_error();
1028         r = SSL_read(c->ssl, (void*)ldns_buffer_current(c->buffer),
1029                 (int)ldns_buffer_remaining(c->buffer));
1030         if(r <= 0) {
1031                 int want = SSL_get_error(c->ssl, r);
1032                 if(want == SSL_ERROR_ZERO_RETURN) {
1033                         return 0; /* shutdown, closed */
1034                 } else if(want == SSL_ERROR_WANT_READ) {
1035                         return 1; /* read more later */
1036                 } else if(want == SSL_ERROR_WANT_WRITE) {
1037                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1038                         comm_point_listen_for_rw(c, 0, 1);
1039                         return 1;
1040                 } else if(want == SSL_ERROR_SYSCALL) {
1041                         if(errno != 0)
1042                                 log_err("SSL_read syscall: %s",
1043                                         strerror(errno));
1044                         return 0;
1045                 }
1046                 log_crypto_err("could not SSL_read");
1047                 return 0;
1048         }
1049         ldns_buffer_skip(c->buffer, (ssize_t)r);
1050         if(ldns_buffer_remaining(c->buffer) <= 0) {
1051                 tcp_callback_reader(c);
1052         }
1053         return 1;
1054 #else
1055         (void)c;
1056         return 0;
1057 #endif /* HAVE_SSL */
1058 }
1059
1060 /** ssl write callback on TCP */
1061 static int
1062 ssl_handle_write(struct comm_point* c)
1063 {
1064 #ifdef HAVE_SSL
1065         int r;
1066         if(c->ssl_shake_state != comm_ssl_shake_none) {
1067                 if(!ssl_handshake(c))
1068                         return 0;
1069                 if(c->ssl_shake_state != comm_ssl_shake_none)
1070                         return 1;
1071         }
1072         /* ignore return, if fails we may simply block */
1073         (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1074         if(c->tcp_byte_count < sizeof(uint16_t)) {
1075                 uint16_t len = htons(ldns_buffer_limit(c->buffer));
1076                 ERR_clear_error();
1077                 r = SSL_write(c->ssl,
1078                         (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1079                         (int)(sizeof(uint16_t)-c->tcp_byte_count));
1080                 if(r <= 0) {
1081                         int want = SSL_get_error(c->ssl, r);
1082                         if(want == SSL_ERROR_ZERO_RETURN) {
1083                                 return 0; /* closed */
1084                         } else if(want == SSL_ERROR_WANT_READ) {
1085                                 c->ssl_shake_state = comm_ssl_shake_read;
1086                                 comm_point_listen_for_rw(c, 1, 0);
1087                                 return 1; /* wait for read condition */
1088                         } else if(want == SSL_ERROR_WANT_WRITE) {
1089                                 return 1; /* write more later */
1090                         } else if(want == SSL_ERROR_SYSCALL) {
1091                                 if(errno != 0)
1092                                         log_err("SSL_write syscall: %s",
1093                                                 strerror(errno));
1094                                 return 0;
1095                         }
1096                         log_crypto_err("could not SSL_write");
1097                         return 0;
1098                 }
1099                 c->tcp_byte_count += r;
1100                 if(c->tcp_byte_count < sizeof(uint16_t))
1101                         return 1;
1102                 ldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1103                         sizeof(uint16_t));
1104                 if(ldns_buffer_remaining(c->buffer) == 0) {
1105                         tcp_callback_writer(c);
1106                         return 1;
1107                 }
1108         }
1109         log_assert(ldns_buffer_remaining(c->buffer) > 0);
1110         ERR_clear_error();
1111         r = SSL_write(c->ssl, (void*)ldns_buffer_current(c->buffer),
1112                 (int)ldns_buffer_remaining(c->buffer));
1113         if(r <= 0) {
1114                 int want = SSL_get_error(c->ssl, r);
1115                 if(want == SSL_ERROR_ZERO_RETURN) {
1116                         return 0; /* closed */
1117                 } else if(want == SSL_ERROR_WANT_READ) {
1118                         c->ssl_shake_state = comm_ssl_shake_read;
1119                         comm_point_listen_for_rw(c, 1, 0);
1120                         return 1; /* wait for read condition */
1121                 } else if(want == SSL_ERROR_WANT_WRITE) {
1122                         return 1; /* write more later */
1123                 } else if(want == SSL_ERROR_SYSCALL) {
1124                         if(errno != 0)
1125                                 log_err("SSL_write syscall: %s",
1126                                         strerror(errno));
1127                         return 0;
1128                 }
1129                 log_crypto_err("could not SSL_write");
1130                 return 0;
1131         }
1132         ldns_buffer_skip(c->buffer, (ssize_t)r);
1133
1134         if(ldns_buffer_remaining(c->buffer) == 0) {
1135                 tcp_callback_writer(c);
1136         }
1137         return 1;
1138 #else
1139         (void)c;
1140         return 0;
1141 #endif /* HAVE_SSL */
1142 }
1143
1144 /** handle ssl tcp connection with dns contents */
1145 static int
1146 ssl_handle_it(struct comm_point* c)
1147 {
1148         if(c->tcp_is_reading)
1149                 return ssl_handle_read(c);
1150         return ssl_handle_write(c);
1151 }
1152
1153 /** Handle tcp reading callback. 
1154  * @param fd: file descriptor of socket.
1155  * @param c: comm point to read from into buffer.
1156  * @param short_ok: if true, very short packets are OK (for comm_local).
1157  * @return: 0 on error 
1158  */
1159 static int
1160 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1161 {
1162         ssize_t r;
1163         log_assert(c->type == comm_tcp || c->type == comm_local);
1164         if(c->ssl)
1165                 return ssl_handle_it(c);
1166         if(!c->tcp_is_reading)
1167                 return 0;
1168
1169         log_assert(fd != -1);
1170         if(c->tcp_byte_count < sizeof(uint16_t)) {
1171                 /* read length bytes */
1172                 r = recv(fd,(void*)ldns_buffer_at(c->buffer,c->tcp_byte_count),
1173                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1174                 if(r == 0)
1175                         return 0;
1176                 else if(r == -1) {
1177 #ifndef USE_WINSOCK
1178                         if(errno == EINTR || errno == EAGAIN)
1179                                 return 1;
1180 #ifdef ECONNRESET
1181                         if(errno == ECONNRESET && verbosity < 2)
1182                                 return 0; /* silence reset by peer */
1183 #endif
1184                         log_err("read (in tcp s): %s", strerror(errno));
1185 #else /* USE_WINSOCK */
1186                         if(WSAGetLastError() == WSAECONNRESET)
1187                                 return 0;
1188                         if(WSAGetLastError() == WSAEINPROGRESS)
1189                                 return 1;
1190                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1191                                 winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1192                                 return 1;
1193                         }
1194                         log_err("read (in tcp s): %s", 
1195                                 wsa_strerror(WSAGetLastError()));
1196 #endif
1197                         log_addr(0, "remote address is", &c->repinfo.addr,
1198                                 c->repinfo.addrlen);
1199                         return 0;
1200                 } 
1201                 c->tcp_byte_count += r;
1202                 if(c->tcp_byte_count != sizeof(uint16_t))
1203                         return 1;
1204                 if(ldns_buffer_read_u16_at(c->buffer, 0) >
1205                         ldns_buffer_capacity(c->buffer)) {
1206                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1207                         return 0;
1208                 }
1209                 ldns_buffer_set_limit(c->buffer, 
1210                         ldns_buffer_read_u16_at(c->buffer, 0));
1211                 if(!short_ok && 
1212                         ldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1213                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1214                         return 0;
1215                 }
1216                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1217                         (int)ldns_buffer_limit(c->buffer));
1218         }
1219
1220         log_assert(ldns_buffer_remaining(c->buffer) > 0);
1221         r = recv(fd, (void*)ldns_buffer_current(c->buffer), 
1222                 ldns_buffer_remaining(c->buffer), 0);
1223         if(r == 0) {
1224                 return 0;
1225         } else if(r == -1) {
1226 #ifndef USE_WINSOCK
1227                 if(errno == EINTR || errno == EAGAIN)
1228                         return 1;
1229                 log_err("read (in tcp r): %s", strerror(errno));
1230 #else /* USE_WINSOCK */
1231                 if(WSAGetLastError() == WSAECONNRESET)
1232                         return 0;
1233                 if(WSAGetLastError() == WSAEINPROGRESS)
1234                         return 1;
1235                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1236                         winsock_tcp_wouldblock(&c->ev->ev, EV_READ);
1237                         return 1;
1238                 }
1239                 log_err("read (in tcp r): %s", 
1240                         wsa_strerror(WSAGetLastError()));
1241 #endif
1242                 log_addr(0, "remote address is", &c->repinfo.addr,
1243                         c->repinfo.addrlen);
1244                 return 0;
1245         }
1246         ldns_buffer_skip(c->buffer, r);
1247         if(ldns_buffer_remaining(c->buffer) <= 0) {
1248                 tcp_callback_reader(c);
1249         }
1250         return 1;
1251 }
1252
1253 /** 
1254  * Handle tcp writing callback. 
1255  * @param fd: file descriptor of socket.
1256  * @param c: comm point to write buffer out of.
1257  * @return: 0 on error
1258  */
1259 static int
1260 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1261 {
1262         ssize_t r;
1263         log_assert(c->type == comm_tcp);
1264         if(c->tcp_is_reading && !c->ssl)
1265                 return 0;
1266         log_assert(fd != -1);
1267         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1268                 /* check for pending error from nonblocking connect */
1269                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1270                 int error = 0;
1271                 socklen_t len = (socklen_t)sizeof(error);
1272                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1273                         &len) < 0){
1274 #ifndef USE_WINSOCK
1275                         error = errno; /* on solaris errno is error */
1276 #else /* USE_WINSOCK */
1277                         error = WSAGetLastError();
1278 #endif
1279                 }
1280 #ifndef USE_WINSOCK
1281 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1282                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1283                         return 1; /* try again later */
1284                 else
1285 #endif
1286                 if(error != 0 && verbosity < 2)
1287                         return 0; /* silence lots of chatter in the logs */
1288                 else if(error != 0) {
1289                         log_err("tcp connect: %s", strerror(error));
1290 #else /* USE_WINSOCK */
1291                 /* examine error */
1292                 if(error == WSAEINPROGRESS)
1293                         return 1;
1294                 else if(error == WSAEWOULDBLOCK) {
1295                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1296                         return 1;
1297                 } else if(error != 0 && verbosity < 2)
1298                         return 0;
1299                 else if(error != 0) {
1300                         log_err("tcp connect: %s", wsa_strerror(error));
1301 #endif /* USE_WINSOCK */
1302                         log_addr(0, "remote address is", &c->repinfo.addr, 
1303                                 c->repinfo.addrlen);
1304                         return 0;
1305                 }
1306         }
1307         if(c->ssl)
1308                 return ssl_handle_it(c);
1309
1310         if(c->tcp_byte_count < sizeof(uint16_t)) {
1311                 uint16_t len = htons(ldns_buffer_limit(c->buffer));
1312 #ifdef HAVE_WRITEV
1313                 struct iovec iov[2];
1314                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1315                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1316                 iov[1].iov_base = ldns_buffer_begin(c->buffer);
1317                 iov[1].iov_len = ldns_buffer_limit(c->buffer);
1318                 log_assert(iov[0].iov_len > 0);
1319                 log_assert(iov[1].iov_len > 0);
1320                 r = writev(fd, iov, 2);
1321 #else /* HAVE_WRITEV */
1322                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1323                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1324 #endif /* HAVE_WRITEV */
1325                 if(r == -1) {
1326 #ifndef USE_WINSOCK
1327 #ifdef EPIPE
1328                         if(errno == EPIPE && verbosity < 2)
1329                                 return 0; /* silence 'broken pipe' */
1330 #endif
1331                         if(errno == EINTR || errno == EAGAIN)
1332                                 return 1;
1333                         log_err("tcp writev: %s", strerror(errno));
1334 #else
1335                         if(WSAGetLastError() == WSAENOTCONN)
1336                                 return 1;
1337                         if(WSAGetLastError() == WSAEINPROGRESS)
1338                                 return 1;
1339                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1340                                 winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1341                                 return 1; 
1342                         }
1343                         log_err("tcp send s: %s", 
1344                                 wsa_strerror(WSAGetLastError()));
1345 #endif
1346                         log_addr(0, "remote address is", &c->repinfo.addr,
1347                                 c->repinfo.addrlen);
1348                         return 0;
1349                 }
1350                 c->tcp_byte_count += r;
1351                 if(c->tcp_byte_count < sizeof(uint16_t))
1352                         return 1;
1353                 ldns_buffer_set_position(c->buffer, c->tcp_byte_count - 
1354                         sizeof(uint16_t));
1355                 if(ldns_buffer_remaining(c->buffer) == 0) {
1356                         tcp_callback_writer(c);
1357                         return 1;
1358                 }
1359         }
1360         log_assert(ldns_buffer_remaining(c->buffer) > 0);
1361         r = send(fd, (void*)ldns_buffer_current(c->buffer), 
1362                 ldns_buffer_remaining(c->buffer), 0);
1363         if(r == -1) {
1364 #ifndef USE_WINSOCK
1365                 if(errno == EINTR || errno == EAGAIN)
1366                         return 1;
1367                 log_err("tcp send r: %s", strerror(errno));
1368 #else
1369                 if(WSAGetLastError() == WSAEINPROGRESS)
1370                         return 1;
1371                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1372                         winsock_tcp_wouldblock(&c->ev->ev, EV_WRITE);
1373                         return 1; 
1374                 }
1375                 log_err("tcp send r: %s", 
1376                         wsa_strerror(WSAGetLastError()));
1377 #endif
1378                 log_addr(0, "remote address is", &c->repinfo.addr,
1379                         c->repinfo.addrlen);
1380                 return 0;
1381         }
1382         ldns_buffer_skip(c->buffer, r);
1383
1384         if(ldns_buffer_remaining(c->buffer) == 0) {
1385                 tcp_callback_writer(c);
1386         }
1387         
1388         return 1;
1389 }
1390
1391 void 
1392 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1393 {
1394         struct comm_point* c = (struct comm_point*)arg;
1395         log_assert(c->type == comm_tcp);
1396         comm_base_now(c->ev->base);
1397
1398         if(event&EV_READ) {
1399                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1400                         reclaim_tcp_handler(c);
1401                         if(!c->tcp_do_close) {
1402                                 fptr_ok(fptr_whitelist_comm_point(
1403                                         c->callback));
1404                                 (void)(*c->callback)(c, c->cb_arg, 
1405                                         NETEVENT_CLOSED, NULL);
1406                         }
1407                 }
1408                 return;
1409         }
1410         if(event&EV_WRITE) {
1411                 if(!comm_point_tcp_handle_write(fd, c)) {
1412                         reclaim_tcp_handler(c);
1413                         if(!c->tcp_do_close) {
1414                                 fptr_ok(fptr_whitelist_comm_point(
1415                                         c->callback));
1416                                 (void)(*c->callback)(c, c->cb_arg, 
1417                                         NETEVENT_CLOSED, NULL);
1418                         }
1419                 }
1420                 return;
1421         }
1422         if(event&EV_TIMEOUT) {
1423                 verbose(VERB_QUERY, "tcp took too long, dropped");
1424                 reclaim_tcp_handler(c);
1425                 if(!c->tcp_do_close) {
1426                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1427                         (void)(*c->callback)(c, c->cb_arg,
1428                                 NETEVENT_TIMEOUT, NULL);
1429                 }
1430                 return;
1431         }
1432         log_err("Ignored event %d for tcphdl.", event);
1433 }
1434
1435 void comm_point_local_handle_callback(int fd, short event, void* arg)
1436 {
1437         struct comm_point* c = (struct comm_point*)arg;
1438         log_assert(c->type == comm_local);
1439         comm_base_now(c->ev->base);
1440
1441         if(event&EV_READ) {
1442                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
1443                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1444                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
1445                                 NULL);
1446                 }
1447                 return;
1448         }
1449         log_err("Ignored event %d for localhdl.", event);
1450 }
1451
1452 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
1453         short event, void* arg)
1454 {
1455         struct comm_point* c = (struct comm_point*)arg;
1456         int err = NETEVENT_NOERROR;
1457         log_assert(c->type == comm_raw);
1458         comm_base_now(c->ev->base);
1459         
1460         if(event&EV_TIMEOUT)
1461                 err = NETEVENT_TIMEOUT;
1462         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1463         (void)(*c->callback)(c, c->cb_arg, err, NULL);
1464 }
1465
1466 struct comm_point* 
1467 comm_point_create_udp(struct comm_base *base, int fd, ldns_buffer* buffer,
1468         comm_point_callback_t* callback, void* callback_arg)
1469 {
1470         struct comm_point* c = (struct comm_point*)calloc(1,
1471                 sizeof(struct comm_point));
1472         short evbits;
1473         if(!c)
1474                 return NULL;
1475         c->ev = (struct internal_event*)calloc(1,
1476                 sizeof(struct internal_event));
1477         if(!c->ev) {
1478                 free(c);
1479                 return NULL;
1480         }
1481         c->ev->base = base;
1482         c->fd = fd;
1483         c->buffer = buffer;
1484         c->timeout = NULL;
1485         c->tcp_is_reading = 0;
1486         c->tcp_byte_count = 0;
1487         c->tcp_parent = NULL;
1488         c->max_tcp_count = 0;
1489         c->tcp_handlers = NULL;
1490         c->tcp_free = NULL;
1491         c->type = comm_udp;
1492         c->tcp_do_close = 0;
1493         c->do_not_close = 0;
1494         c->tcp_do_toggle_rw = 0;
1495         c->tcp_check_nb_connect = 0;
1496         c->inuse = 0;
1497         c->callback = callback;
1498         c->cb_arg = callback_arg;
1499         evbits = EV_READ | EV_PERSIST;
1500         /* libevent stuff */
1501         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_callback, c);
1502         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1503                 log_err("could not baseset udp event");
1504                 comm_point_delete(c);
1505                 return NULL;
1506         }
1507         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1508                 log_err("could not add udp event");
1509                 comm_point_delete(c);
1510                 return NULL;
1511         }
1512         return c;
1513 }
1514
1515 struct comm_point* 
1516 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
1517         ldns_buffer* buffer, 
1518         comm_point_callback_t* callback, void* callback_arg)
1519 {
1520         struct comm_point* c = (struct comm_point*)calloc(1,
1521                 sizeof(struct comm_point));
1522         short evbits;
1523         if(!c)
1524                 return NULL;
1525         c->ev = (struct internal_event*)calloc(1,
1526                 sizeof(struct internal_event));
1527         if(!c->ev) {
1528                 free(c);
1529                 return NULL;
1530         }
1531         c->ev->base = base;
1532         c->fd = fd;
1533         c->buffer = buffer;
1534         c->timeout = NULL;
1535         c->tcp_is_reading = 0;
1536         c->tcp_byte_count = 0;
1537         c->tcp_parent = NULL;
1538         c->max_tcp_count = 0;
1539         c->tcp_handlers = NULL;
1540         c->tcp_free = NULL;
1541         c->type = comm_udp;
1542         c->tcp_do_close = 0;
1543         c->do_not_close = 0;
1544         c->inuse = 0;
1545         c->tcp_do_toggle_rw = 0;
1546         c->tcp_check_nb_connect = 0;
1547         c->callback = callback;
1548         c->cb_arg = callback_arg;
1549         evbits = EV_READ | EV_PERSIST;
1550         /* libevent stuff */
1551         event_set(&c->ev->ev, c->fd, evbits, comm_point_udp_ancil_callback, c);
1552         if(event_base_set(base->eb->base, &c->ev->ev) != 0) {
1553                 log_err("could not baseset udp event");
1554                 comm_point_delete(c);
1555                 return NULL;
1556         }
1557         if(fd!=-1 && event_add(&c->ev->ev, c->timeout) != 0 ) {
1558                 log_err("could not add udp event");
1559                 comm_point_delete(c);
1560                 return NULL;
1561         }
1562         return c;
1563 }
1564
1565 static struct comm_point* 
1566 comm_point_create_tcp_handler(struct comm_base *base, 
1567         struct comm_point* parent, size_t bufsize,
1568         comm_point_callback_t* callback, void* callback_arg)
1569 {
1570         struct comm_point* c = (struct comm_point*)calloc(1,
1571                 sizeof(struct comm_point));
1572         short evbits;
1573         if(!c)
1574                 return NULL;
1575         c->ev = (struct internal_event*)calloc(1,
1576                 sizeof(struct internal_event));
1577         if(!c->ev) {
1578                 free(c);
1579                 return NULL;
1580         }
1581         c->ev->base = base;
1582         c->fd = -1;
1583         c->buffer = ldns_buffer_new(bufsize);
1584         if(!c->buffer) {
1585                 free(c->ev);
1586                 free(c);
1587                 return NULL;
1588         }
1589         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1590         if(!c->timeout) {
1591                 ldns_buffer_free(c->buffer);
1592                 free(c->ev);
1593                 free(c);
1594                 return NULL;
1595         }
1596         c->tcp_is_reading = 0;
1597         c->tcp_byte_count = 0;
1598         c->tcp_parent = parent;
1599         c->max_tcp_count = 0;
1600         c->tcp_handlers = NULL;
1601         c->tcp_free = NULL;
1602         c->type = comm_tcp;
1603         c->tcp_do_close = 0;
1604         c->do_not_close = 0;
1605         c->tcp_do_toggle_rw = 1;
1606         c->tcp_check_nb_connect = 0;
1607         c->repinfo.c = c;
1608         c->callback = callback;
1609         c->cb_arg = callback_arg;
1610         /* add to parent free list */
1611         c->tcp_free = parent->tcp_free;
1612         parent->tcp_free = c;
1613         /* libevent stuff */
1614         evbits = EV_PERSIST | EV_READ | EV_TIMEOUT;
1615         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1616         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1617         {
1618                 log_err("could not basetset tcphdl event");
1619                 parent->tcp_free = c->tcp_free;
1620                 free(c->ev);
1621                 free(c);
1622                 return NULL;
1623         }
1624         return c;
1625 }
1626
1627 struct comm_point* 
1628 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1629         comm_point_callback_t* callback, void* callback_arg)
1630 {
1631         struct comm_point* c = (struct comm_point*)calloc(1,
1632                 sizeof(struct comm_point));
1633         short evbits;
1634         int i;
1635         /* first allocate the TCP accept listener */
1636         if(!c)
1637                 return NULL;
1638         c->ev = (struct internal_event*)calloc(1,
1639                 sizeof(struct internal_event));
1640         if(!c->ev) {
1641                 free(c);
1642                 return NULL;
1643         }
1644         c->ev->base = base;
1645         c->fd = fd;
1646         c->buffer = NULL;
1647         c->timeout = NULL;
1648         c->tcp_is_reading = 0;
1649         c->tcp_byte_count = 0;
1650         c->tcp_parent = NULL;
1651         c->max_tcp_count = num;
1652         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1653                 sizeof(struct comm_point*));
1654         if(!c->tcp_handlers) {
1655                 free(c->ev);
1656                 free(c);
1657                 return NULL;
1658         }
1659         c->tcp_free = NULL;
1660         c->type = comm_tcp_accept;
1661         c->tcp_do_close = 0;
1662         c->do_not_close = 0;
1663         c->tcp_do_toggle_rw = 0;
1664         c->tcp_check_nb_connect = 0;
1665         c->callback = NULL;
1666         c->cb_arg = NULL;
1667         evbits = EV_READ | EV_PERSIST;
1668         /* libevent stuff */
1669         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_accept_callback, c);
1670         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1671                 event_add(&c->ev->ev, c->timeout) != 0 )
1672         {
1673                 log_err("could not add tcpacc event");
1674                 comm_point_delete(c);
1675                 return NULL;
1676         }
1677
1678         /* now prealloc the tcp handlers */
1679         for(i=0; i<num; i++) {
1680                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1681                         c, bufsize, callback, callback_arg);
1682                 if(!c->tcp_handlers[i]) {
1683                         comm_point_delete(c);
1684                         return NULL;
1685                 }
1686         }
1687         
1688         return c;
1689 }
1690
1691 struct comm_point* 
1692 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1693         comm_point_callback_t* callback, void* callback_arg)
1694 {
1695         struct comm_point* c = (struct comm_point*)calloc(1,
1696                 sizeof(struct comm_point));
1697         short evbits;
1698         if(!c)
1699                 return NULL;
1700         c->ev = (struct internal_event*)calloc(1,
1701                 sizeof(struct internal_event));
1702         if(!c->ev) {
1703                 free(c);
1704                 return NULL;
1705         }
1706         c->ev->base = base;
1707         c->fd = -1;
1708         c->buffer = ldns_buffer_new(bufsize);
1709         if(!c->buffer) {
1710                 free(c->ev);
1711                 free(c);
1712                 return NULL;
1713         }
1714         c->timeout = NULL;
1715         c->tcp_is_reading = 0;
1716         c->tcp_byte_count = 0;
1717         c->tcp_parent = NULL;
1718         c->max_tcp_count = 0;
1719         c->tcp_handlers = NULL;
1720         c->tcp_free = NULL;
1721         c->type = comm_tcp;
1722         c->tcp_do_close = 0;
1723         c->do_not_close = 0;
1724         c->tcp_do_toggle_rw = 1;
1725         c->tcp_check_nb_connect = 1;
1726         c->repinfo.c = c;
1727         c->callback = callback;
1728         c->cb_arg = callback_arg;
1729         evbits = EV_PERSIST | EV_WRITE;
1730         event_set(&c->ev->ev, c->fd, evbits, comm_point_tcp_handle_callback, c);
1731         if(event_base_set(base->eb->base, &c->ev->ev) != 0)
1732         {
1733                 log_err("could not basetset tcpout event");
1734                 ldns_buffer_free(c->buffer);
1735                 free(c->ev);
1736                 free(c);
1737                 return NULL;
1738         }
1739
1740         return c;
1741 }
1742
1743 struct comm_point* 
1744 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1745         comm_point_callback_t* callback, void* callback_arg)
1746 {
1747         struct comm_point* c = (struct comm_point*)calloc(1,
1748                 sizeof(struct comm_point));
1749         short evbits;
1750         if(!c)
1751                 return NULL;
1752         c->ev = (struct internal_event*)calloc(1,
1753                 sizeof(struct internal_event));
1754         if(!c->ev) {
1755                 free(c);
1756                 return NULL;
1757         }
1758         c->ev->base = base;
1759         c->fd = fd;
1760         c->buffer = ldns_buffer_new(bufsize);
1761         if(!c->buffer) {
1762                 free(c->ev);
1763                 free(c);
1764                 return NULL;
1765         }
1766         c->timeout = NULL;
1767         c->tcp_is_reading = 1;
1768         c->tcp_byte_count = 0;
1769         c->tcp_parent = NULL;
1770         c->max_tcp_count = 0;
1771         c->tcp_handlers = NULL;
1772         c->tcp_free = NULL;
1773         c->type = comm_local;
1774         c->tcp_do_close = 0;
1775         c->do_not_close = 1;
1776         c->tcp_do_toggle_rw = 0;
1777         c->tcp_check_nb_connect = 0;
1778         c->callback = callback;
1779         c->cb_arg = callback_arg;
1780         /* libevent stuff */
1781         evbits = EV_PERSIST | EV_READ;
1782         event_set(&c->ev->ev, c->fd, evbits, comm_point_local_handle_callback, 
1783                 c);
1784         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1785                 event_add(&c->ev->ev, c->timeout) != 0 )
1786         {
1787                 log_err("could not add localhdl event");
1788                 free(c->ev);
1789                 free(c);
1790                 return NULL;
1791         }
1792         return c;
1793 }
1794
1795 struct comm_point* 
1796 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
1797         comm_point_callback_t* callback, void* callback_arg)
1798 {
1799         struct comm_point* c = (struct comm_point*)calloc(1,
1800                 sizeof(struct comm_point));
1801         short evbits;
1802         if(!c)
1803                 return NULL;
1804         c->ev = (struct internal_event*)calloc(1,
1805                 sizeof(struct internal_event));
1806         if(!c->ev) {
1807                 free(c);
1808                 return NULL;
1809         }
1810         c->ev->base = base;
1811         c->fd = fd;
1812         c->buffer = NULL;
1813         c->timeout = NULL;
1814         c->tcp_is_reading = 0;
1815         c->tcp_byte_count = 0;
1816         c->tcp_parent = NULL;
1817         c->max_tcp_count = 0;
1818         c->tcp_handlers = NULL;
1819         c->tcp_free = NULL;
1820         c->type = comm_raw;
1821         c->tcp_do_close = 0;
1822         c->do_not_close = 1;
1823         c->tcp_do_toggle_rw = 0;
1824         c->tcp_check_nb_connect = 0;
1825         c->callback = callback;
1826         c->cb_arg = callback_arg;
1827         /* libevent stuff */
1828         if(writing)
1829                 evbits = EV_PERSIST | EV_WRITE;
1830         else    evbits = EV_PERSIST | EV_READ;
1831         event_set(&c->ev->ev, c->fd, evbits, comm_point_raw_handle_callback, 
1832                 c);
1833         if(event_base_set(base->eb->base, &c->ev->ev) != 0 ||
1834                 event_add(&c->ev->ev, c->timeout) != 0 )
1835         {
1836                 log_err("could not add rawhdl event");
1837                 free(c->ev);
1838                 free(c);
1839                 return NULL;
1840         }
1841         return c;
1842 }
1843
1844 void 
1845 comm_point_close(struct comm_point* c)
1846 {
1847         if(!c)
1848                 return;
1849         if(c->fd != -1)
1850                 if(event_del(&c->ev->ev) != 0) {
1851                         log_err("could not event_del on close");
1852                 }
1853         /* close fd after removing from event lists, or epoll.. is messed up */
1854         if(c->fd != -1 && !c->do_not_close) {
1855                 verbose(VERB_ALGO, "close fd %d", c->fd);
1856 #ifndef USE_WINSOCK
1857                 close(c->fd);
1858 #else
1859                 closesocket(c->fd);
1860 #endif
1861         }
1862         c->fd = -1;
1863 }
1864
1865 void 
1866 comm_point_delete(struct comm_point* c)
1867 {
1868         if(!c) 
1869                 return;
1870         if(c->type == comm_tcp && c->ssl) {
1871 #ifdef HAVE_SSL
1872                 SSL_shutdown(c->ssl);
1873                 SSL_free(c->ssl);
1874 #endif
1875         }
1876         comm_point_close(c);
1877         if(c->tcp_handlers) {
1878                 int i;
1879                 for(i=0; i<c->max_tcp_count; i++)
1880                         comm_point_delete(c->tcp_handlers[i]);
1881                 free(c->tcp_handlers);
1882         }
1883         free(c->timeout);
1884         if(c->type == comm_tcp || c->type == comm_local)
1885                 ldns_buffer_free(c->buffer);
1886         free(c->ev);
1887         free(c);
1888 }
1889
1890 void 
1891 comm_point_send_reply(struct comm_reply *repinfo)
1892 {
1893         log_assert(repinfo && repinfo->c);
1894         if(repinfo->c->type == comm_udp) {
1895                 if(repinfo->srctype)
1896                         comm_point_send_udp_msg_if(repinfo->c, 
1897                         repinfo->c->buffer, (struct sockaddr*)&repinfo->addr, 
1898                         repinfo->addrlen, repinfo);
1899                 else
1900                         comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
1901                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
1902         } else {
1903                 comm_point_start_listening(repinfo->c, -1, TCP_QUERY_TIMEOUT);
1904         }
1905 }
1906
1907 void 
1908 comm_point_drop_reply(struct comm_reply* repinfo)
1909 {
1910         if(!repinfo)
1911                 return;
1912         log_assert(repinfo && repinfo->c);
1913         log_assert(repinfo->c->type != comm_tcp_accept);
1914         if(repinfo->c->type == comm_udp)
1915                 return;
1916         reclaim_tcp_handler(repinfo->c);
1917 }
1918
1919 void 
1920 comm_point_stop_listening(struct comm_point* c)
1921 {
1922         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
1923         if(event_del(&c->ev->ev) != 0) {
1924                 log_err("event_del error to stoplisten");
1925         }
1926 }
1927
1928 void 
1929 comm_point_start_listening(struct comm_point* c, int newfd, int sec)
1930 {
1931         verbose(VERB_ALGO, "comm point start listening %d", 
1932                 c->fd==-1?newfd:c->fd);
1933         if(c->type == comm_tcp_accept && !c->tcp_free) {
1934                 /* no use to start listening no free slots. */
1935                 return;
1936         }
1937         if(sec != -1 && sec != 0) {
1938                 if(!c->timeout) {
1939                         c->timeout = (struct timeval*)malloc(sizeof(
1940                                 struct timeval));
1941                         if(!c->timeout) {
1942                                 log_err("cpsl: malloc failed. No net read.");
1943                                 return;
1944                         }
1945                 }
1946                 c->ev->ev.ev_events |= EV_TIMEOUT;
1947 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
1948                 c->timeout->tv_sec = sec;
1949                 c->timeout->tv_usec = 0;
1950 #endif /* S_SPLINT_S */
1951         }
1952         if(c->type == comm_tcp) {
1953                 c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1954                 if(c->tcp_is_reading)
1955                         c->ev->ev.ev_events |= EV_READ;
1956                 else    c->ev->ev.ev_events |= EV_WRITE;
1957         }
1958         if(newfd != -1) {
1959                 if(c->fd != -1) {
1960 #ifndef USE_WINSOCK
1961                         close(c->fd);
1962 #else
1963                         closesocket(c->fd);
1964 #endif
1965                 }
1966                 c->fd = newfd;
1967                 c->ev->ev.ev_fd = c->fd;
1968         }
1969         if(event_add(&c->ev->ev, sec==0?NULL:c->timeout) != 0) {
1970                 log_err("event_add failed. in cpsl.");
1971         }
1972 }
1973
1974 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
1975 {
1976         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
1977         if(event_del(&c->ev->ev) != 0) {
1978                 log_err("event_del error to cplf");
1979         }
1980         c->ev->ev.ev_events &= ~(EV_READ|EV_WRITE);
1981         if(rd) c->ev->ev.ev_events |= EV_READ;
1982         if(wr) c->ev->ev.ev_events |= EV_WRITE;
1983         if(event_add(&c->ev->ev, c->timeout) != 0) {
1984                 log_err("event_add failed. in cplf.");
1985         }
1986 }
1987
1988 size_t comm_point_get_mem(struct comm_point* c)
1989 {
1990         size_t s;
1991         if(!c) 
1992                 return 0;
1993         s = sizeof(*c) + sizeof(*c->ev);
1994         if(c->timeout) 
1995                 s += sizeof(*c->timeout);
1996         if(c->type == comm_tcp || c->type == comm_local)
1997                 s += sizeof(*c->buffer) + ldns_buffer_capacity(c->buffer);
1998         if(c->type == comm_tcp_accept) {
1999                 int i;
2000                 for(i=0; i<c->max_tcp_count; i++)
2001                         s += comm_point_get_mem(c->tcp_handlers[i]);
2002         }
2003         return s;
2004 }
2005
2006 struct comm_timer* 
2007 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2008 {
2009         struct comm_timer *tm = (struct comm_timer*)calloc(1,
2010                 sizeof(struct comm_timer));
2011         if(!tm)
2012                 return NULL;
2013         tm->ev_timer = (struct internal_timer*)calloc(1,
2014                 sizeof(struct internal_timer));
2015         if(!tm->ev_timer) {
2016                 log_err("malloc failed");
2017                 free(tm);
2018                 return NULL;
2019         }
2020         tm->ev_timer->base = base;
2021         tm->callback = cb;
2022         tm->cb_arg = cb_arg;
2023         event_set(&tm->ev_timer->ev, -1, EV_TIMEOUT, 
2024                 comm_timer_callback, tm);
2025         if(event_base_set(base->eb->base, &tm->ev_timer->ev) != 0) {
2026                 log_err("timer_create: event_base_set failed.");
2027                 free(tm->ev_timer);
2028                 free(tm);
2029                 return NULL;
2030         }
2031         return tm;
2032 }
2033
2034 void 
2035 comm_timer_disable(struct comm_timer* timer)
2036 {
2037         if(!timer)
2038                 return;
2039         evtimer_del(&timer->ev_timer->ev);
2040         timer->ev_timer->enabled = 0;
2041 }
2042
2043 void 
2044 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2045 {
2046         log_assert(tv);
2047         if(timer->ev_timer->enabled)
2048                 comm_timer_disable(timer);
2049         event_set(&timer->ev_timer->ev, -1, EV_TIMEOUT,
2050                 comm_timer_callback, timer);
2051         if(event_base_set(timer->ev_timer->base->eb->base, 
2052                 &timer->ev_timer->ev) != 0)
2053                 log_err("comm_timer_set: set_base failed.");
2054         if(evtimer_add(&timer->ev_timer->ev, tv) != 0)
2055                 log_err("comm_timer_set: evtimer_add failed.");
2056         timer->ev_timer->enabled = 1;
2057 }
2058
2059 void 
2060 comm_timer_delete(struct comm_timer* timer)
2061 {
2062         if(!timer)
2063                 return;
2064         comm_timer_disable(timer);
2065         free(timer->ev_timer);
2066         free(timer);
2067 }
2068
2069 void 
2070 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2071 {
2072         struct comm_timer* tm = (struct comm_timer*)arg;
2073         if(!(event&EV_TIMEOUT))
2074                 return;
2075         comm_base_now(tm->ev_timer->base);
2076         tm->ev_timer->enabled = 0;
2077         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2078         (*tm->callback)(tm->cb_arg);
2079 }
2080
2081 int 
2082 comm_timer_is_set(struct comm_timer* timer)
2083 {
2084         return (int)timer->ev_timer->enabled;
2085 }
2086
2087 size_t 
2088 comm_timer_get_mem(struct comm_timer* timer)
2089 {
2090         return sizeof(*timer) + sizeof(struct internal_timer);
2091 }
2092
2093 struct comm_signal* 
2094 comm_signal_create(struct comm_base* base,
2095         void (*callback)(int, void*), void* cb_arg)
2096 {
2097         struct comm_signal* com = (struct comm_signal*)malloc(
2098                 sizeof(struct comm_signal));
2099         if(!com) {
2100                 log_err("malloc failed");
2101                 return NULL;
2102         }
2103         com->base = base;
2104         com->callback = callback;
2105         com->cb_arg = cb_arg;
2106         com->ev_signal = NULL;
2107         return com;
2108 }
2109
2110 void 
2111 comm_signal_callback(int sig, short event, void* arg)
2112 {
2113         struct comm_signal* comsig = (struct comm_signal*)arg;
2114         if(!(event & EV_SIGNAL))
2115                 return;
2116         comm_base_now(comsig->base);
2117         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2118         (*comsig->callback)(sig, comsig->cb_arg);
2119 }
2120
2121 int 
2122 comm_signal_bind(struct comm_signal* comsig, int sig)
2123 {
2124         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
2125                 sizeof(struct internal_signal));
2126         if(!entry) {
2127                 log_err("malloc failed");
2128                 return 0;
2129         }
2130         log_assert(comsig);
2131         /* add signal event */
2132         signal_set(&entry->ev, sig, comm_signal_callback, comsig);
2133         if(event_base_set(comsig->base->eb->base, &entry->ev) != 0) {
2134                 log_err("Could not set signal base");
2135                 free(entry);
2136                 return 0;
2137         }
2138         if(signal_add(&entry->ev, NULL) != 0) {
2139                 log_err("Could not add signal handler");
2140                 free(entry);
2141                 return 0;
2142         }
2143         /* link into list */
2144         entry->next = comsig->ev_signal;
2145         comsig->ev_signal = entry;
2146         return 1;
2147 }
2148
2149 void 
2150 comm_signal_delete(struct comm_signal* comsig)
2151 {
2152         struct internal_signal* p, *np;
2153         if(!comsig)
2154                 return;
2155         p=comsig->ev_signal;
2156         while(p) {
2157                 np = p->next;
2158                 signal_del(&p->ev);
2159                 free(p);
2160                 p = np;
2161         }
2162         free(comsig);
2163 }