]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/util/netevent.c
Upgrade Unbound to 1.6.1. More to follow.
[FreeBSD/FreeBSD.git] / contrib / unbound / util / netevent.c
1 /*
2  * util/netevent.c - event notification
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains event notification functions.
40  */
41 #include "config.h"
42 #include "util/netevent.h"
43 #include "util/ub_event.h"
44 #include "util/log.h"
45 #include "util/net_help.h"
46 #include "util/fptr_wlist.h"
47 #include "sldns/pkthdr.h"
48 #include "sldns/sbuffer.h"
49 #include "dnstap/dnstap.h"
50 #ifdef HAVE_OPENSSL_SSL_H
51 #include <openssl/ssl.h>
52 #endif
53 #ifdef HAVE_OPENSSL_ERR_H
54 #include <openssl/err.h>
55 #endif
56
57 /* -------- Start of local definitions -------- */
58 /** if CMSG_ALIGN is not defined on this platform, a workaround */
59 #ifndef CMSG_ALIGN
60 #  ifdef __CMSG_ALIGN
61 #    define CMSG_ALIGN(n) __CMSG_ALIGN(n)
62 #  elif defined(CMSG_DATA_ALIGN)
63 #    define CMSG_ALIGN _CMSG_DATA_ALIGN
64 #  else
65 #    define CMSG_ALIGN(len) (((len)+sizeof(long)-1) & ~(sizeof(long)-1))
66 #  endif
67 #endif
68
69 /** if CMSG_LEN is not defined on this platform, a workaround */
70 #ifndef CMSG_LEN
71 #  define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr))+(len))
72 #endif
73
74 /** if CMSG_SPACE is not defined on this platform, a workaround */
75 #ifndef CMSG_SPACE
76 #  ifdef _CMSG_HDR_ALIGN
77 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+_CMSG_HDR_ALIGN(sizeof(struct cmsghdr)))
78 #  else
79 #    define CMSG_SPACE(l) (CMSG_ALIGN(l)+CMSG_ALIGN(sizeof(struct cmsghdr)))
80 #  endif
81 #endif
82
83 /** The TCP reading or writing query timeout in milliseconds */
84 #define TCP_QUERY_TIMEOUT 120000
85 /** The TCP timeout in msec for fast queries, above half are used */
86 #define TCP_QUERY_TIMEOUT_FAST 200
87
88 #ifndef NONBLOCKING_IS_BROKEN
89 /** number of UDP reads to perform per read indication from select */
90 #define NUM_UDP_PER_SELECT 100
91 #else
92 #define NUM_UDP_PER_SELECT 1
93 #endif
94
95 /**
96  * The internal event structure for keeping ub_event info for the event.
97  * Possibly other structures (list, tree) this is part of.
98  */
99 struct internal_event {
100         /** the comm base */
101         struct comm_base* base;
102         /** ub_event event type */
103         struct ub_event* ev;
104 };
105
106 /**
107  * Internal base structure, so that every thread has its own events.
108  */
109 struct internal_base {
110         /** ub_event event_base type. */
111         struct ub_event_base* base;
112         /** seconds time pointer points here */
113         time_t secs;
114         /** timeval with current time */
115         struct timeval now;
116         /** the event used for slow_accept timeouts */
117         struct ub_event* slow_accept;
118         /** true if slow_accept is enabled */
119         int slow_accept_enabled;
120 };
121
122 /**
123  * Internal timer structure, to store timer event in.
124  */
125 struct internal_timer {
126         /** the super struct from which derived */
127         struct comm_timer super;
128         /** the comm base */
129         struct comm_base* base;
130         /** ub_event event type */
131         struct ub_event* ev;
132         /** is timer enabled */
133         uint8_t enabled;
134 };
135
136 /**
137  * Internal signal structure, to store signal event in.
138  */
139 struct internal_signal {
140         /** ub_event event type */
141         struct ub_event* ev;
142         /** next in signal list */
143         struct internal_signal* next;
144 };
145
146 /** create a tcp handler with a parent */
147 static struct comm_point* comm_point_create_tcp_handler(
148         struct comm_base *base, struct comm_point* parent, size_t bufsize,
149         comm_point_callback_type* callback, void* callback_arg);
150
151 /* -------- End of local definitions -------- */
152
153 struct comm_base* 
154 comm_base_create(int sigs)
155 {
156         struct comm_base* b = (struct comm_base*)calloc(1,
157                 sizeof(struct comm_base));
158         const char *evnm="event", *evsys="", *evmethod="";
159
160         if(!b)
161                 return NULL;
162         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
163         if(!b->eb) {
164                 free(b);
165                 return NULL;
166         }
167         b->eb->base = ub_default_event_base(sigs, &b->eb->secs, &b->eb->now);
168         if(!b->eb->base) {
169                 free(b->eb);
170                 free(b);
171                 return NULL;
172         }
173         ub_comm_base_now(b);
174         ub_get_event_sys(b->eb->base, &evnm, &evsys, &evmethod);
175         verbose(VERB_ALGO, "%s %s user %s method.", evnm, evsys, evmethod);
176         return b;
177 }
178
179 struct comm_base*
180 comm_base_create_event(struct ub_event_base* base)
181 {
182         struct comm_base* b = (struct comm_base*)calloc(1,
183                 sizeof(struct comm_base));
184         if(!b)
185                 return NULL;
186         b->eb = (struct internal_base*)calloc(1, sizeof(struct internal_base));
187         if(!b->eb) {
188                 free(b);
189                 return NULL;
190         }
191         b->eb->base = base;
192         ub_comm_base_now(b);
193         return b;
194 }
195
196 void 
197 comm_base_delete(struct comm_base* b)
198 {
199         if(!b)
200                 return;
201         if(b->eb->slow_accept_enabled) {
202                 if(ub_event_del(b->eb->slow_accept) != 0) {
203                         log_err("could not event_del slow_accept");
204                 }
205                 ub_event_free(b->eb->slow_accept);
206         }
207         ub_event_base_free(b->eb->base);
208         b->eb->base = NULL;
209         free(b->eb);
210         free(b);
211 }
212
213 void 
214 comm_base_delete_no_base(struct comm_base* b)
215 {
216         if(!b)
217                 return;
218         if(b->eb->slow_accept_enabled) {
219                 if(ub_event_del(b->eb->slow_accept) != 0) {
220                         log_err("could not event_del slow_accept");
221                 }
222                 ub_event_free(b->eb->slow_accept);
223         }
224         b->eb->base = NULL;
225         free(b->eb);
226         free(b);
227 }
228
229 void 
230 comm_base_timept(struct comm_base* b, time_t** tt, struct timeval** tv)
231 {
232         *tt = &b->eb->secs;
233         *tv = &b->eb->now;
234 }
235
236 void 
237 comm_base_dispatch(struct comm_base* b)
238 {
239         int retval;
240         retval = ub_event_base_dispatch(b->eb->base);
241         if(retval < 0) {
242                 fatal_exit("event_dispatch returned error %d, "
243                         "errno is %s", retval, strerror(errno));
244         }
245 }
246
247 void comm_base_exit(struct comm_base* b)
248 {
249         if(ub_event_base_loopexit(b->eb->base) != 0) {
250                 log_err("Could not loopexit");
251         }
252 }
253
254 void comm_base_set_slow_accept_handlers(struct comm_base* b,
255         void (*stop_acc)(void*), void (*start_acc)(void*), void* arg)
256 {
257         b->stop_accept = stop_acc;
258         b->start_accept = start_acc;
259         b->cb_arg = arg;
260 }
261
262 struct ub_event_base* comm_base_internal(struct comm_base* b)
263 {
264         return b->eb->base;
265 }
266
267 /** see if errno for udp has to be logged or not uses globals */
268 static int
269 udp_send_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
270 {
271         /* do not log transient errors (unless high verbosity) */
272 #if defined(ENETUNREACH) || defined(EHOSTDOWN) || defined(EHOSTUNREACH) || defined(ENETDOWN)
273         switch(errno) {
274 #  ifdef ENETUNREACH
275                 case ENETUNREACH:
276 #  endif
277 #  ifdef EHOSTDOWN
278                 case EHOSTDOWN:
279 #  endif
280 #  ifdef EHOSTUNREACH
281                 case EHOSTUNREACH:
282 #  endif
283 #  ifdef ENETDOWN
284                 case ENETDOWN:
285 #  endif
286                         if(verbosity < VERB_ALGO)
287                                 return 0;
288                 default:
289                         break;
290         }
291 #endif
292         /* permission denied is gotten for every send if the
293          * network is disconnected (on some OS), squelch it */
294         if( ((errno == EPERM)
295 #  ifdef EADDRNOTAVAIL
296                 /* 'Cannot assign requested address' also when disconnected */
297                 || (errno == EADDRNOTAVAIL)
298 #  endif
299                 ) && verbosity < VERB_DETAIL)
300                 return 0;
301         /* squelch errors where people deploy AAAA ::ffff:bla for
302          * authority servers, which we try for intranets. */
303         if(errno == EINVAL && addr_is_ip4mapped(
304                 (struct sockaddr_storage*)addr, addrlen) &&
305                 verbosity < VERB_DETAIL)
306                 return 0;
307         /* SO_BROADCAST sockopt can give access to 255.255.255.255,
308          * but a dns cache does not need it. */
309         if(errno == EACCES && addr_is_broadcast(
310                 (struct sockaddr_storage*)addr, addrlen) &&
311                 verbosity < VERB_DETAIL)
312                 return 0;
313         return 1;
314 }
315
316 int tcp_connect_errno_needs_log(struct sockaddr* addr, socklen_t addrlen)
317 {
318         return udp_send_errno_needs_log(addr, addrlen);
319 }
320
321 /* send a UDP reply */
322 int
323 comm_point_send_udp_msg(struct comm_point *c, sldns_buffer* packet,
324         struct sockaddr* addr, socklen_t addrlen) 
325 {
326         ssize_t sent;
327         log_assert(c->fd != -1);
328 #ifdef UNBOUND_DEBUG
329         if(sldns_buffer_remaining(packet) == 0)
330                 log_err("error: send empty UDP packet");
331 #endif
332         log_assert(addr && addrlen > 0);
333         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
334                 sldns_buffer_remaining(packet), 0,
335                 addr, addrlen);
336         if(sent == -1) {
337                 /* try again and block, waiting for IO to complete,
338                  * we want to send the answer, and we will wait for
339                  * the ethernet interface buffer to have space. */
340 #ifndef USE_WINSOCK
341                 if(errno == EAGAIN || 
342 #  ifdef EWOULDBLOCK
343                         errno == EWOULDBLOCK ||
344 #  endif
345                         errno == ENOBUFS) {
346 #else
347                 if(WSAGetLastError() == WSAEINPROGRESS ||
348                         WSAGetLastError() == WSAENOBUFS ||
349                         WSAGetLastError() == WSAEWOULDBLOCK) {
350 #endif
351                         int e;
352                         fd_set_block(c->fd);
353                         sent = sendto(c->fd, (void*)sldns_buffer_begin(packet), 
354                                 sldns_buffer_remaining(packet), 0,
355                                 addr, addrlen);
356                         e = errno;
357                         fd_set_nonblock(c->fd);
358                         errno = e;
359                 }
360         }
361         if(sent == -1) {
362                 if(!udp_send_errno_needs_log(addr, addrlen))
363                         return 0;
364 #ifndef USE_WINSOCK
365                 verbose(VERB_OPS, "sendto failed: %s", strerror(errno));
366 #else
367                 verbose(VERB_OPS, "sendto failed: %s", 
368                         wsa_strerror(WSAGetLastError()));
369 #endif
370                 log_addr(VERB_OPS, "remote address is", 
371                         (struct sockaddr_storage*)addr, addrlen);
372                 return 0;
373         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
374                 log_err("sent %d in place of %d bytes", 
375                         (int)sent, (int)sldns_buffer_remaining(packet));
376                 return 0;
377         }
378         return 1;
379 }
380
381 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && (defined(HAVE_RECVMSG) || defined(HAVE_SENDMSG))
382 /** print debug ancillary info */
383 static void p_ancil(const char* str, struct comm_reply* r)
384 {
385         if(r->srctype != 4 && r->srctype != 6) {
386                 log_info("%s: unknown srctype %d", str, r->srctype);
387                 return;
388         }
389         if(r->srctype == 6) {
390                 char buf[1024];
391                 if(inet_ntop(AF_INET6, &r->pktinfo.v6info.ipi6_addr, 
392                         buf, (socklen_t)sizeof(buf)) == 0) {
393                         (void)strlcpy(buf, "(inet_ntop error)", sizeof(buf));
394                 }
395                 buf[sizeof(buf)-1]=0;
396                 log_info("%s: %s %d", str, buf, r->pktinfo.v6info.ipi6_ifindex);
397         } else if(r->srctype == 4) {
398 #ifdef IP_PKTINFO
399                 char buf1[1024], buf2[1024];
400                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_addr, 
401                         buf1, (socklen_t)sizeof(buf1)) == 0) {
402                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
403                 }
404                 buf1[sizeof(buf1)-1]=0;
405 #ifdef HAVE_STRUCT_IN_PKTINFO_IPI_SPEC_DST
406                 if(inet_ntop(AF_INET, &r->pktinfo.v4info.ipi_spec_dst, 
407                         buf2, (socklen_t)sizeof(buf2)) == 0) {
408                         (void)strlcpy(buf2, "(inet_ntop error)", sizeof(buf2));
409                 }
410                 buf2[sizeof(buf2)-1]=0;
411 #else
412                 buf2[0]=0;
413 #endif
414                 log_info("%s: %d %s %s", str, r->pktinfo.v4info.ipi_ifindex,
415                         buf1, buf2);
416 #elif defined(IP_RECVDSTADDR)
417                 char buf1[1024];
418                 if(inet_ntop(AF_INET, &r->pktinfo.v4addr, 
419                         buf1, (socklen_t)sizeof(buf1)) == 0) {
420                         (void)strlcpy(buf1, "(inet_ntop error)", sizeof(buf1));
421                 }
422                 buf1[sizeof(buf1)-1]=0;
423                 log_info("%s: %s", str, buf1);
424 #endif /* IP_PKTINFO or PI_RECVDSTDADDR */
425         }
426 }
427 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG||HAVE_SENDMSG */
428
429 /** send a UDP reply over specified interface*/
430 static int
431 comm_point_send_udp_msg_if(struct comm_point *c, sldns_buffer* packet,
432         struct sockaddr* addr, socklen_t addrlen, struct comm_reply* r) 
433 {
434 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_SENDMSG)
435         ssize_t sent;
436         struct msghdr msg;
437         struct iovec iov[1];
438         char control[256];
439 #ifndef S_SPLINT_S
440         struct cmsghdr *cmsg;
441 #endif /* S_SPLINT_S */
442
443         log_assert(c->fd != -1);
444 #ifdef UNBOUND_DEBUG
445         if(sldns_buffer_remaining(packet) == 0)
446                 log_err("error: send empty UDP packet");
447 #endif
448         log_assert(addr && addrlen > 0);
449
450         msg.msg_name = addr;
451         msg.msg_namelen = addrlen;
452         iov[0].iov_base = sldns_buffer_begin(packet);
453         iov[0].iov_len = sldns_buffer_remaining(packet);
454         msg.msg_iov = iov;
455         msg.msg_iovlen = 1;
456         msg.msg_control = control;
457 #ifndef S_SPLINT_S
458         msg.msg_controllen = sizeof(control);
459 #endif /* S_SPLINT_S */
460         msg.msg_flags = 0;
461
462 #ifndef S_SPLINT_S
463         cmsg = CMSG_FIRSTHDR(&msg);
464         if(r->srctype == 4) {
465 #ifdef IP_PKTINFO
466                 void* cmsg_data;
467                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
468                 log_assert(msg.msg_controllen <= sizeof(control));
469                 cmsg->cmsg_level = IPPROTO_IP;
470                 cmsg->cmsg_type = IP_PKTINFO;
471                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4info,
472                         sizeof(struct in_pktinfo));
473                 /* unset the ifindex to not bypass the routing tables */
474                 cmsg_data = CMSG_DATA(cmsg);
475                 ((struct in_pktinfo *) cmsg_data)->ipi_ifindex = 0;
476                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
477 #elif defined(IP_SENDSRCADDR)
478                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
479                 log_assert(msg.msg_controllen <= sizeof(control));
480                 cmsg->cmsg_level = IPPROTO_IP;
481                 cmsg->cmsg_type = IP_SENDSRCADDR;
482                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v4addr,
483                         sizeof(struct in_addr));
484                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
485 #else
486                 verbose(VERB_ALGO, "no IP_PKTINFO or IP_SENDSRCADDR");
487                 msg.msg_control = NULL;
488 #endif /* IP_PKTINFO or IP_SENDSRCADDR */
489         } else if(r->srctype == 6) {
490                 void* cmsg_data;
491                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
492                 log_assert(msg.msg_controllen <= sizeof(control));
493                 cmsg->cmsg_level = IPPROTO_IPV6;
494                 cmsg->cmsg_type = IPV6_PKTINFO;
495                 memmove(CMSG_DATA(cmsg), &r->pktinfo.v6info,
496                         sizeof(struct in6_pktinfo));
497                 /* unset the ifindex to not bypass the routing tables */
498                 cmsg_data = CMSG_DATA(cmsg);
499                 ((struct in6_pktinfo *) cmsg_data)->ipi6_ifindex = 0;
500                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
501         } else {
502                 /* try to pass all 0 to use default route */
503                 msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
504                 log_assert(msg.msg_controllen <= sizeof(control));
505                 cmsg->cmsg_level = IPPROTO_IPV6;
506                 cmsg->cmsg_type = IPV6_PKTINFO;
507                 memset(CMSG_DATA(cmsg), 0, sizeof(struct in6_pktinfo));
508                 cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
509         }
510 #endif /* S_SPLINT_S */
511         if(verbosity >= VERB_ALGO)
512                 p_ancil("send_udp over interface", r);
513         sent = sendmsg(c->fd, &msg, 0);
514         if(sent == -1) {
515                 /* try again and block, waiting for IO to complete,
516                  * we want to send the answer, and we will wait for
517                  * the ethernet interface buffer to have space. */
518 #ifndef USE_WINSOCK
519                 if(errno == EAGAIN || 
520 #  ifdef EWOULDBLOCK
521                         errno == EWOULDBLOCK ||
522 #  endif
523                         errno == ENOBUFS) {
524 #else
525                 if(WSAGetLastError() == WSAEINPROGRESS ||
526                         WSAGetLastError() == WSAENOBUFS ||
527                         WSAGetLastError() == WSAEWOULDBLOCK) {
528 #endif
529                         int e;
530                         fd_set_block(c->fd);
531                         sent = sendmsg(c->fd, &msg, 0);
532                         e = errno;
533                         fd_set_nonblock(c->fd);
534                         errno = e;
535                 }
536         }
537         if(sent == -1) {
538                 if(!udp_send_errno_needs_log(addr, addrlen))
539                         return 0;
540                 verbose(VERB_OPS, "sendmsg failed: %s", strerror(errno));
541                 log_addr(VERB_OPS, "remote address is", 
542                         (struct sockaddr_storage*)addr, addrlen);
543 #ifdef __NetBSD__
544                 /* netbsd 7 has IP_PKTINFO for recv but not send */
545                 if(errno == EINVAL && r->srctype == 4)
546                         log_err("sendmsg: No support for sendmsg(IP_PKTINFO). "
547                                 "Please disable interface-automatic");
548 #endif
549                 return 0;
550         } else if((size_t)sent != sldns_buffer_remaining(packet)) {
551                 log_err("sent %d in place of %d bytes", 
552                         (int)sent, (int)sldns_buffer_remaining(packet));
553                 return 0;
554         }
555         return 1;
556 #else
557         (void)c;
558         (void)packet;
559         (void)addr;
560         (void)addrlen;
561         (void)r;
562         log_err("sendmsg: IPV6_PKTINFO not supported");
563         return 0;
564 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_SENDMSG */
565 }
566
567 void 
568 comm_point_udp_ancil_callback(int fd, short event, void* arg)
569 {
570 #if defined(AF_INET6) && defined(IPV6_PKTINFO) && defined(HAVE_RECVMSG)
571         struct comm_reply rep;
572         struct msghdr msg;
573         struct iovec iov[1];
574         ssize_t rcv;
575         char ancil[256];
576         int i;
577 #ifndef S_SPLINT_S
578         struct cmsghdr* cmsg;
579 #endif /* S_SPLINT_S */
580
581         rep.c = (struct comm_point*)arg;
582         log_assert(rep.c->type == comm_udp);
583
584         if(!(event&UB_EV_READ))
585                 return;
586         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
587         ub_comm_base_now(rep.c->ev->base);
588         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
589                 sldns_buffer_clear(rep.c->buffer);
590                 rep.addrlen = (socklen_t)sizeof(rep.addr);
591                 log_assert(fd != -1);
592                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
593                 msg.msg_name = &rep.addr;
594                 msg.msg_namelen = (socklen_t)sizeof(rep.addr);
595                 iov[0].iov_base = sldns_buffer_begin(rep.c->buffer);
596                 iov[0].iov_len = sldns_buffer_remaining(rep.c->buffer);
597                 msg.msg_iov = iov;
598                 msg.msg_iovlen = 1;
599                 msg.msg_control = ancil;
600 #ifndef S_SPLINT_S
601                 msg.msg_controllen = sizeof(ancil);
602 #endif /* S_SPLINT_S */
603                 msg.msg_flags = 0;
604                 rcv = recvmsg(fd, &msg, 0);
605                 if(rcv == -1) {
606                         if(errno != EAGAIN && errno != EINTR) {
607                                 log_err("recvmsg failed: %s", strerror(errno));
608                         }
609                         return;
610                 }
611                 rep.addrlen = msg.msg_namelen;
612                 sldns_buffer_skip(rep.c->buffer, rcv);
613                 sldns_buffer_flip(rep.c->buffer);
614                 rep.srctype = 0;
615 #ifndef S_SPLINT_S
616                 for(cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
617                         cmsg = CMSG_NXTHDR(&msg, cmsg)) {
618                         if( cmsg->cmsg_level == IPPROTO_IPV6 &&
619                                 cmsg->cmsg_type == IPV6_PKTINFO) {
620                                 rep.srctype = 6;
621                                 memmove(&rep.pktinfo.v6info, CMSG_DATA(cmsg),
622                                         sizeof(struct in6_pktinfo));
623                                 break;
624 #ifdef IP_PKTINFO
625                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
626                                 cmsg->cmsg_type == IP_PKTINFO) {
627                                 rep.srctype = 4;
628                                 memmove(&rep.pktinfo.v4info, CMSG_DATA(cmsg),
629                                         sizeof(struct in_pktinfo));
630                                 break;
631 #elif defined(IP_RECVDSTADDR)
632                         } else if( cmsg->cmsg_level == IPPROTO_IP &&
633                                 cmsg->cmsg_type == IP_RECVDSTADDR) {
634                                 rep.srctype = 4;
635                                 memmove(&rep.pktinfo.v4addr, CMSG_DATA(cmsg),
636                                         sizeof(struct in_addr));
637                                 break;
638 #endif /* IP_PKTINFO or IP_RECVDSTADDR */
639                         }
640                 }
641                 if(verbosity >= VERB_ALGO)
642                         p_ancil("receive_udp on interface", &rep);
643 #endif /* S_SPLINT_S */
644                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
645                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
646                         /* send back immediate reply */
647                         (void)comm_point_send_udp_msg_if(rep.c, rep.c->buffer,
648                                 (struct sockaddr*)&rep.addr, rep.addrlen, &rep);
649                 }
650                 if(rep.c->fd == -1) /* commpoint closed */
651                         break;
652         }
653 #else
654         (void)fd;
655         (void)event;
656         (void)arg;
657         fatal_exit("recvmsg: No support for IPV6_PKTINFO; IP_PKTINFO or IP_RECVDSTADDR. "
658                 "Please disable interface-automatic");
659 #endif /* AF_INET6 && IPV6_PKTINFO && HAVE_RECVMSG */
660 }
661
662 void 
663 comm_point_udp_callback(int fd, short event, void* arg)
664 {
665         struct comm_reply rep;
666         ssize_t rcv;
667         int i;
668
669         rep.c = (struct comm_point*)arg;
670         log_assert(rep.c->type == comm_udp);
671
672         if(!(event&UB_EV_READ))
673                 return;
674         log_assert(rep.c && rep.c->buffer && rep.c->fd == fd);
675         ub_comm_base_now(rep.c->ev->base);
676         for(i=0; i<NUM_UDP_PER_SELECT; i++) {
677                 sldns_buffer_clear(rep.c->buffer);
678                 rep.addrlen = (socklen_t)sizeof(rep.addr);
679                 log_assert(fd != -1);
680                 log_assert(sldns_buffer_remaining(rep.c->buffer) > 0);
681                 rcv = recvfrom(fd, (void*)sldns_buffer_begin(rep.c->buffer), 
682                         sldns_buffer_remaining(rep.c->buffer), 0, 
683                         (struct sockaddr*)&rep.addr, &rep.addrlen);
684                 if(rcv == -1) {
685 #ifndef USE_WINSOCK
686                         if(errno != EAGAIN && errno != EINTR)
687                                 log_err("recvfrom %d failed: %s", 
688                                         fd, strerror(errno));
689 #else
690                         if(WSAGetLastError() != WSAEINPROGRESS &&
691                                 WSAGetLastError() != WSAECONNRESET &&
692                                 WSAGetLastError()!= WSAEWOULDBLOCK)
693                                 log_err("recvfrom failed: %s",
694                                         wsa_strerror(WSAGetLastError()));
695 #endif
696                         return;
697                 }
698                 sldns_buffer_skip(rep.c->buffer, rcv);
699                 sldns_buffer_flip(rep.c->buffer);
700                 rep.srctype = 0;
701                 fptr_ok(fptr_whitelist_comm_point(rep.c->callback));
702                 if((*rep.c->callback)(rep.c, rep.c->cb_arg, NETEVENT_NOERROR, &rep)) {
703                         /* send back immediate reply */
704                         (void)comm_point_send_udp_msg(rep.c, rep.c->buffer,
705                                 (struct sockaddr*)&rep.addr, rep.addrlen);
706                 }
707                 if(rep.c->fd != fd) /* commpoint closed to -1 or reused for
708                 another UDP port. Note rep.c cannot be reused with TCP fd. */
709                         break;
710         }
711 }
712
713 /** Use a new tcp handler for new query fd, set to read query */
714 static void
715 setup_tcp_handler(struct comm_point* c, int fd, int cur, int max) 
716 {
717         log_assert(c->type == comm_tcp);
718         log_assert(c->fd == -1);
719         sldns_buffer_clear(c->buffer);
720         c->tcp_is_reading = 1;
721         c->tcp_byte_count = 0;
722         c->tcp_timeout_msec = TCP_QUERY_TIMEOUT;
723         /* if more than half the tcp handlers are in use, use a shorter
724          * timeout for this TCP connection, we need to make space for
725          * other connections to be able to get attention */
726         if(cur > max/2)
727                 c->tcp_timeout_msec = TCP_QUERY_TIMEOUT_FAST;
728         comm_point_start_listening(c, fd, c->tcp_timeout_msec);
729 }
730
731 void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
732         short ATTR_UNUSED(event), void* arg)
733 {
734         struct comm_base* b = (struct comm_base*)arg;
735         /* timeout for the slow accept, re-enable accepts again */
736         if(b->start_accept) {
737                 verbose(VERB_ALGO, "wait is over, slow accept disabled");
738                 fptr_ok(fptr_whitelist_start_accept(b->start_accept));
739                 (*b->start_accept)(b->cb_arg);
740                 b->eb->slow_accept_enabled = 0;
741         }
742 }
743
744 int comm_point_perform_accept(struct comm_point* c,
745         struct sockaddr_storage* addr, socklen_t* addrlen)
746 {
747         int new_fd;
748         *addrlen = (socklen_t)sizeof(*addr);
749         new_fd = accept(c->fd, (struct sockaddr*)addr, addrlen);
750         if(new_fd == -1) {
751 #ifndef USE_WINSOCK
752                 /* EINTR is signal interrupt. others are closed connection. */
753                 if(     errno == EINTR || errno == EAGAIN
754 #ifdef EWOULDBLOCK
755                         || errno == EWOULDBLOCK 
756 #endif
757 #ifdef ECONNABORTED
758                         || errno == ECONNABORTED 
759 #endif
760 #ifdef EPROTO
761                         || errno == EPROTO
762 #endif /* EPROTO */
763                         )
764                         return -1;
765 #if defined(ENFILE) && defined(EMFILE)
766                 if(errno == ENFILE || errno == EMFILE) {
767                         /* out of file descriptors, likely outside of our
768                          * control. stop accept() calls for some time */
769                         if(c->ev->base->stop_accept) {
770                                 struct comm_base* b = c->ev->base;
771                                 struct timeval tv;
772                                 verbose(VERB_ALGO, "out of file descriptors: "
773                                         "slow accept");
774                                 b->eb->slow_accept_enabled = 1;
775                                 fptr_ok(fptr_whitelist_stop_accept(
776                                         b->stop_accept));
777                                 (*b->stop_accept)(b->cb_arg);
778                                 /* set timeout, no mallocs */
779                                 tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
780                                 tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
781                                 b->eb->slow_accept = ub_event_new(b->eb->base,
782                                         -1, UB_EV_TIMEOUT,
783                                         comm_base_handle_slow_accept, b);
784                                 if(b->eb->slow_accept == NULL) {
785                                         /* we do not want to log here, because
786                                          * that would spam the logfiles.
787                                          * error: "event_base_set failed." */
788                                 }
789                                 else if(ub_event_add(b->eb->slow_accept, &tv)
790                                         != 0) {
791                                         /* we do not want to log here,
792                                          * error: "event_add failed." */
793                                 }
794                         }
795                         return -1;
796                 }
797 #endif
798                 log_err_addr("accept failed", strerror(errno), addr, *addrlen);
799 #else /* USE_WINSOCK */
800                 if(WSAGetLastError() == WSAEINPROGRESS ||
801                         WSAGetLastError() == WSAECONNRESET)
802                         return -1;
803                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
804                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
805                         return -1;
806                 }
807                 log_err_addr("accept failed", wsa_strerror(WSAGetLastError()),
808                         addr, *addrlen);
809 #endif
810                 return -1;
811         }
812         fd_set_nonblock(new_fd);
813         return new_fd;
814 }
815
816 #ifdef USE_WINSOCK
817 static long win_bio_cb(BIO *b, int oper, const char* ATTR_UNUSED(argp),
818         int ATTR_UNUSED(argi), long argl, long retvalue)
819 {
820         verbose(VERB_ALGO, "bio_cb %d, %s %s %s", oper,
821                 (oper&BIO_CB_RETURN)?"return":"before",
822                 (oper&BIO_CB_READ)?"read":((oper&BIO_CB_WRITE)?"write":"other"),
823                 WSAGetLastError()==WSAEWOULDBLOCK?"wsawb":"");
824         /* on windows, check if previous operation caused EWOULDBLOCK */
825         if( (oper == (BIO_CB_READ|BIO_CB_RETURN) && argl == 0) ||
826                 (oper == (BIO_CB_GETS|BIO_CB_RETURN) && argl == 0)) {
827                 if(WSAGetLastError() == WSAEWOULDBLOCK)
828                         ub_winsock_tcp_wouldblock((struct ub_event*)
829                                 BIO_get_callback_arg(b), UB_EV_READ);
830         }
831         if( (oper == (BIO_CB_WRITE|BIO_CB_RETURN) && argl == 0) ||
832                 (oper == (BIO_CB_PUTS|BIO_CB_RETURN) && argl == 0)) {
833                 if(WSAGetLastError() == WSAEWOULDBLOCK)
834                         ub_winsock_tcp_wouldblock((struct ub_event*)
835                                 BIO_get_callback_arg(b), UB_EV_WRITE);
836         }
837         /* return original return value */
838         return retvalue;
839 }
840
841 /** set win bio callbacks for nonblocking operations */
842 void
843 comm_point_tcp_win_bio_cb(struct comm_point* c, void* thessl)
844 {
845         SSL* ssl = (SSL*)thessl;
846         /* set them both just in case, but usually they are the same BIO */
847         BIO_set_callback(SSL_get_rbio(ssl), &win_bio_cb);
848         BIO_set_callback_arg(SSL_get_rbio(ssl), (char*)c->ev->ev);
849         BIO_set_callback(SSL_get_wbio(ssl), &win_bio_cb);
850         BIO_set_callback_arg(SSL_get_wbio(ssl), (char*)c->ev->ev);
851 }
852 #endif
853
854 void 
855 comm_point_tcp_accept_callback(int fd, short event, void* arg)
856 {
857         struct comm_point* c = (struct comm_point*)arg, *c_hdl;
858         int new_fd;
859         log_assert(c->type == comm_tcp_accept);
860         if(!(event & UB_EV_READ)) {
861                 log_info("ignoring tcp accept event %d", (int)event);
862                 return;
863         }
864         ub_comm_base_now(c->ev->base);
865         /* find free tcp handler. */
866         if(!c->tcp_free) {
867                 log_warn("accepted too many tcp, connections full");
868                 return;
869         }
870         /* accept incoming connection. */
871         c_hdl = c->tcp_free;
872         log_assert(fd != -1);
873         (void)fd;
874         new_fd = comm_point_perform_accept(c, &c_hdl->repinfo.addr,
875                 &c_hdl->repinfo.addrlen);
876         if(new_fd == -1)
877                 return;
878         if(c->ssl) {
879                 c_hdl->ssl = incoming_ssl_fd(c->ssl, new_fd);
880                 if(!c_hdl->ssl) {
881                         c_hdl->fd = new_fd;
882                         comm_point_close(c_hdl);
883                         return;
884                 }
885                 c_hdl->ssl_shake_state = comm_ssl_shake_read;
886 #ifdef USE_WINSOCK
887                 comm_point_tcp_win_bio_cb(c_hdl, c_hdl->ssl);
888 #endif
889         }
890
891         /* grab the tcp handler buffers */
892         c->cur_tcp_count++;
893         c->tcp_free = c_hdl->tcp_free;
894         if(!c->tcp_free) {
895                 /* stop accepting incoming queries for now. */
896                 comm_point_stop_listening(c);
897         }
898         setup_tcp_handler(c_hdl, new_fd, c->cur_tcp_count, c->max_tcp_count);
899 }
900
901 /** Make tcp handler free for next assignment */
902 static void
903 reclaim_tcp_handler(struct comm_point* c)
904 {
905         log_assert(c->type == comm_tcp);
906         if(c->ssl) {
907 #ifdef HAVE_SSL
908                 SSL_shutdown(c->ssl);
909                 SSL_free(c->ssl);
910                 c->ssl = NULL;
911 #endif
912         }
913         comm_point_close(c);
914         if(c->tcp_parent) {
915                 c->tcp_parent->cur_tcp_count--;
916                 c->tcp_free = c->tcp_parent->tcp_free;
917                 c->tcp_parent->tcp_free = c;
918                 if(!c->tcp_free) {
919                         /* re-enable listening on accept socket */
920                         comm_point_start_listening(c->tcp_parent, -1, -1);
921                 }
922         }
923 }
924
925 /** do the callback when writing is done */
926 static void
927 tcp_callback_writer(struct comm_point* c)
928 {
929         log_assert(c->type == comm_tcp);
930         sldns_buffer_clear(c->buffer);
931         if(c->tcp_do_toggle_rw)
932                 c->tcp_is_reading = 1;
933         c->tcp_byte_count = 0;
934         /* switch from listening(write) to listening(read) */
935         comm_point_stop_listening(c);
936         comm_point_start_listening(c, -1, -1);
937 }
938
939 /** do the callback when reading is done */
940 static void
941 tcp_callback_reader(struct comm_point* c)
942 {
943         log_assert(c->type == comm_tcp || c->type == comm_local);
944         sldns_buffer_flip(c->buffer);
945         if(c->tcp_do_toggle_rw)
946                 c->tcp_is_reading = 0;
947         c->tcp_byte_count = 0;
948         if(c->type == comm_tcp)
949                 comm_point_stop_listening(c);
950         fptr_ok(fptr_whitelist_comm_point(c->callback));
951         if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo) ) {
952                 comm_point_start_listening(c, -1, c->tcp_timeout_msec);
953         }
954 }
955
956 /** continue ssl handshake */
957 #ifdef HAVE_SSL
958 static int
959 ssl_handshake(struct comm_point* c)
960 {
961         int r;
962         if(c->ssl_shake_state == comm_ssl_shake_hs_read) {
963                 /* read condition satisfied back to writing */
964                 comm_point_listen_for_rw(c, 1, 1);
965                 c->ssl_shake_state = comm_ssl_shake_none;
966                 return 1;
967         }
968         if(c->ssl_shake_state == comm_ssl_shake_hs_write) {
969                 /* write condition satisfied, back to reading */
970                 comm_point_listen_for_rw(c, 1, 0);
971                 c->ssl_shake_state = comm_ssl_shake_none;
972                 return 1;
973         }
974
975         ERR_clear_error();
976         r = SSL_do_handshake(c->ssl);
977         if(r != 1) {
978                 int want = SSL_get_error(c->ssl, r);
979                 if(want == SSL_ERROR_WANT_READ) {
980                         if(c->ssl_shake_state == comm_ssl_shake_read)
981                                 return 1;
982                         c->ssl_shake_state = comm_ssl_shake_read;
983                         comm_point_listen_for_rw(c, 1, 0);
984                         return 1;
985                 } else if(want == SSL_ERROR_WANT_WRITE) {
986                         if(c->ssl_shake_state == comm_ssl_shake_write)
987                                 return 1;
988                         c->ssl_shake_state = comm_ssl_shake_write;
989                         comm_point_listen_for_rw(c, 0, 1);
990                         return 1;
991                 } else if(r == 0) {
992                         return 0; /* closed */
993                 } else if(want == SSL_ERROR_SYSCALL) {
994                         /* SYSCALL and errno==0 means closed uncleanly */
995                         if(errno != 0)
996                                 log_err("SSL_handshake syscall: %s",
997                                         strerror(errno));
998                         return 0;
999                 } else {
1000                         log_crypto_err("ssl handshake failed");
1001                         log_addr(1, "ssl handshake failed", &c->repinfo.addr,
1002                                 c->repinfo.addrlen);
1003                         return 0;
1004                 }
1005         }
1006         /* this is where peer verification could take place */
1007         log_addr(VERB_ALGO, "SSL DNS connection", &c->repinfo.addr,
1008                 c->repinfo.addrlen);
1009
1010         /* setup listen rw correctly */
1011         if(c->tcp_is_reading) {
1012                 if(c->ssl_shake_state != comm_ssl_shake_read)
1013                         comm_point_listen_for_rw(c, 1, 0);
1014         } else {
1015                 comm_point_listen_for_rw(c, 1, 1);
1016         }
1017         c->ssl_shake_state = comm_ssl_shake_none;
1018         return 1;
1019 }
1020 #endif /* HAVE_SSL */
1021
1022 /** ssl read callback on TCP */
1023 static int
1024 ssl_handle_read(struct comm_point* c)
1025 {
1026 #ifdef HAVE_SSL
1027         int r;
1028         if(c->ssl_shake_state != comm_ssl_shake_none) {
1029                 if(!ssl_handshake(c))
1030                         return 0;
1031                 if(c->ssl_shake_state != comm_ssl_shake_none)
1032                         return 1;
1033         }
1034         if(c->tcp_byte_count < sizeof(uint16_t)) {
1035                 /* read length bytes */
1036                 ERR_clear_error();
1037                 if((r=SSL_read(c->ssl, (void*)sldns_buffer_at(c->buffer,
1038                         c->tcp_byte_count), (int)(sizeof(uint16_t) -
1039                         c->tcp_byte_count))) <= 0) {
1040                         int want = SSL_get_error(c->ssl, r);
1041                         if(want == SSL_ERROR_ZERO_RETURN) {
1042                                 return 0; /* shutdown, closed */
1043                         } else if(want == SSL_ERROR_WANT_READ) {
1044                                 return 1; /* read more later */
1045                         } else if(want == SSL_ERROR_WANT_WRITE) {
1046                                 c->ssl_shake_state = comm_ssl_shake_hs_write;
1047                                 comm_point_listen_for_rw(c, 0, 1);
1048                                 return 1;
1049                         } else if(want == SSL_ERROR_SYSCALL) {
1050                                 if(errno != 0)
1051                                         log_err("SSL_read syscall: %s",
1052                                                 strerror(errno));
1053                                 return 0;
1054                         }
1055                         log_crypto_err("could not SSL_read");
1056                         return 0;
1057                 }
1058                 c->tcp_byte_count += r;
1059                 if(c->tcp_byte_count != sizeof(uint16_t))
1060                         return 1;
1061                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1062                         sldns_buffer_capacity(c->buffer)) {
1063                         verbose(VERB_QUERY, "ssl: dropped larger than buffer");
1064                         return 0;
1065                 }
1066                 sldns_buffer_set_limit(c->buffer,
1067                         sldns_buffer_read_u16_at(c->buffer, 0));
1068                 if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1069                         verbose(VERB_QUERY, "ssl: dropped bogus too short.");
1070                         return 0;
1071                 }
1072                 verbose(VERB_ALGO, "Reading ssl tcp query of length %d",
1073                         (int)sldns_buffer_limit(c->buffer));
1074         }
1075         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1076         ERR_clear_error();
1077         r = SSL_read(c->ssl, (void*)sldns_buffer_current(c->buffer),
1078                 (int)sldns_buffer_remaining(c->buffer));
1079         if(r <= 0) {
1080                 int want = SSL_get_error(c->ssl, r);
1081                 if(want == SSL_ERROR_ZERO_RETURN) {
1082                         return 0; /* shutdown, closed */
1083                 } else if(want == SSL_ERROR_WANT_READ) {
1084                         return 1; /* read more later */
1085                 } else if(want == SSL_ERROR_WANT_WRITE) {
1086                         c->ssl_shake_state = comm_ssl_shake_hs_write;
1087                         comm_point_listen_for_rw(c, 0, 1);
1088                         return 1;
1089                 } else if(want == SSL_ERROR_SYSCALL) {
1090                         if(errno != 0)
1091                                 log_err("SSL_read syscall: %s",
1092                                         strerror(errno));
1093                         return 0;
1094                 }
1095                 log_crypto_err("could not SSL_read");
1096                 return 0;
1097         }
1098         sldns_buffer_skip(c->buffer, (ssize_t)r);
1099         if(sldns_buffer_remaining(c->buffer) <= 0) {
1100                 tcp_callback_reader(c);
1101         }
1102         return 1;
1103 #else
1104         (void)c;
1105         return 0;
1106 #endif /* HAVE_SSL */
1107 }
1108
1109 /** ssl write callback on TCP */
1110 static int
1111 ssl_handle_write(struct comm_point* c)
1112 {
1113 #ifdef HAVE_SSL
1114         int r;
1115         if(c->ssl_shake_state != comm_ssl_shake_none) {
1116                 if(!ssl_handshake(c))
1117                         return 0;
1118                 if(c->ssl_shake_state != comm_ssl_shake_none)
1119                         return 1;
1120         }
1121         /* ignore return, if fails we may simply block */
1122         (void)SSL_set_mode(c->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
1123         if(c->tcp_byte_count < sizeof(uint16_t)) {
1124                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1125                 ERR_clear_error();
1126                 r = SSL_write(c->ssl,
1127                         (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1128                         (int)(sizeof(uint16_t)-c->tcp_byte_count));
1129                 if(r <= 0) {
1130                         int want = SSL_get_error(c->ssl, r);
1131                         if(want == SSL_ERROR_ZERO_RETURN) {
1132                                 return 0; /* closed */
1133                         } else if(want == SSL_ERROR_WANT_READ) {
1134                                 c->ssl_shake_state = comm_ssl_shake_read;
1135                                 comm_point_listen_for_rw(c, 1, 0);
1136                                 return 1; /* wait for read condition */
1137                         } else if(want == SSL_ERROR_WANT_WRITE) {
1138                                 return 1; /* write more later */
1139                         } else if(want == SSL_ERROR_SYSCALL) {
1140                                 if(errno != 0)
1141                                         log_err("SSL_write syscall: %s",
1142                                                 strerror(errno));
1143                                 return 0;
1144                         }
1145                         log_crypto_err("could not SSL_write");
1146                         return 0;
1147                 }
1148                 c->tcp_byte_count += r;
1149                 if(c->tcp_byte_count < sizeof(uint16_t))
1150                         return 1;
1151                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count -
1152                         sizeof(uint16_t));
1153                 if(sldns_buffer_remaining(c->buffer) == 0) {
1154                         tcp_callback_writer(c);
1155                         return 1;
1156                 }
1157         }
1158         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1159         ERR_clear_error();
1160         r = SSL_write(c->ssl, (void*)sldns_buffer_current(c->buffer),
1161                 (int)sldns_buffer_remaining(c->buffer));
1162         if(r <= 0) {
1163                 int want = SSL_get_error(c->ssl, r);
1164                 if(want == SSL_ERROR_ZERO_RETURN) {
1165                         return 0; /* closed */
1166                 } else if(want == SSL_ERROR_WANT_READ) {
1167                         c->ssl_shake_state = comm_ssl_shake_read;
1168                         comm_point_listen_for_rw(c, 1, 0);
1169                         return 1; /* wait for read condition */
1170                 } else if(want == SSL_ERROR_WANT_WRITE) {
1171                         return 1; /* write more later */
1172                 } else if(want == SSL_ERROR_SYSCALL) {
1173                         if(errno != 0)
1174                                 log_err("SSL_write syscall: %s",
1175                                         strerror(errno));
1176                         return 0;
1177                 }
1178                 log_crypto_err("could not SSL_write");
1179                 return 0;
1180         }
1181         sldns_buffer_skip(c->buffer, (ssize_t)r);
1182
1183         if(sldns_buffer_remaining(c->buffer) == 0) {
1184                 tcp_callback_writer(c);
1185         }
1186         return 1;
1187 #else
1188         (void)c;
1189         return 0;
1190 #endif /* HAVE_SSL */
1191 }
1192
1193 /** handle ssl tcp connection with dns contents */
1194 static int
1195 ssl_handle_it(struct comm_point* c)
1196 {
1197         if(c->tcp_is_reading)
1198                 return ssl_handle_read(c);
1199         return ssl_handle_write(c);
1200 }
1201
1202 /** Handle tcp reading callback. 
1203  * @param fd: file descriptor of socket.
1204  * @param c: comm point to read from into buffer.
1205  * @param short_ok: if true, very short packets are OK (for comm_local).
1206  * @return: 0 on error 
1207  */
1208 static int
1209 comm_point_tcp_handle_read(int fd, struct comm_point* c, int short_ok)
1210 {
1211         ssize_t r;
1212         log_assert(c->type == comm_tcp || c->type == comm_local);
1213         if(c->ssl)
1214                 return ssl_handle_it(c);
1215         if(!c->tcp_is_reading)
1216                 return 0;
1217
1218         log_assert(fd != -1);
1219         if(c->tcp_byte_count < sizeof(uint16_t)) {
1220                 /* read length bytes */
1221                 r = recv(fd,(void*)sldns_buffer_at(c->buffer,c->tcp_byte_count),
1222                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1223                 if(r == 0)
1224                         return 0;
1225                 else if(r == -1) {
1226 #ifndef USE_WINSOCK
1227                         if(errno == EINTR || errno == EAGAIN)
1228                                 return 1;
1229 #ifdef ECONNRESET
1230                         if(errno == ECONNRESET && verbosity < 2)
1231                                 return 0; /* silence reset by peer */
1232 #endif
1233                         log_err_addr("read (in tcp s)", strerror(errno),
1234                                 &c->repinfo.addr, c->repinfo.addrlen);
1235 #else /* USE_WINSOCK */
1236                         if(WSAGetLastError() == WSAECONNRESET)
1237                                 return 0;
1238                         if(WSAGetLastError() == WSAEINPROGRESS)
1239                                 return 1;
1240                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1241                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1242                                         UB_EV_READ);
1243                                 return 1;
1244                         }
1245                         log_err_addr("read (in tcp s)", 
1246                                 wsa_strerror(WSAGetLastError()),
1247                                 &c->repinfo.addr, c->repinfo.addrlen);
1248 #endif
1249                         return 0;
1250                 } 
1251                 c->tcp_byte_count += r;
1252                 if(c->tcp_byte_count != sizeof(uint16_t))
1253                         return 1;
1254                 if(sldns_buffer_read_u16_at(c->buffer, 0) >
1255                         sldns_buffer_capacity(c->buffer)) {
1256                         verbose(VERB_QUERY, "tcp: dropped larger than buffer");
1257                         return 0;
1258                 }
1259                 sldns_buffer_set_limit(c->buffer, 
1260                         sldns_buffer_read_u16_at(c->buffer, 0));
1261                 if(!short_ok && 
1262                         sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
1263                         verbose(VERB_QUERY, "tcp: dropped bogus too short.");
1264                         return 0;
1265                 }
1266                 verbose(VERB_ALGO, "Reading tcp query of length %d", 
1267                         (int)sldns_buffer_limit(c->buffer));
1268         }
1269
1270         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1271         r = recv(fd, (void*)sldns_buffer_current(c->buffer), 
1272                 sldns_buffer_remaining(c->buffer), 0);
1273         if(r == 0) {
1274                 return 0;
1275         } else if(r == -1) {
1276 #ifndef USE_WINSOCK
1277                 if(errno == EINTR || errno == EAGAIN)
1278                         return 1;
1279                 log_err_addr("read (in tcp r)", strerror(errno),
1280                         &c->repinfo.addr, c->repinfo.addrlen);
1281 #else /* USE_WINSOCK */
1282                 if(WSAGetLastError() == WSAECONNRESET)
1283                         return 0;
1284                 if(WSAGetLastError() == WSAEINPROGRESS)
1285                         return 1;
1286                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1287                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_READ);
1288                         return 1;
1289                 }
1290                 log_err_addr("read (in tcp r)",
1291                         wsa_strerror(WSAGetLastError()),
1292                         &c->repinfo.addr, c->repinfo.addrlen);
1293 #endif
1294                 return 0;
1295         }
1296         sldns_buffer_skip(c->buffer, r);
1297         if(sldns_buffer_remaining(c->buffer) <= 0) {
1298                 tcp_callback_reader(c);
1299         }
1300         return 1;
1301 }
1302
1303 /** 
1304  * Handle tcp writing callback. 
1305  * @param fd: file descriptor of socket.
1306  * @param c: comm point to write buffer out of.
1307  * @return: 0 on error
1308  */
1309 static int
1310 comm_point_tcp_handle_write(int fd, struct comm_point* c)
1311 {
1312         ssize_t r;
1313         log_assert(c->type == comm_tcp);
1314         if(c->tcp_is_reading && !c->ssl)
1315                 return 0;
1316         log_assert(fd != -1);
1317         if(c->tcp_byte_count == 0 && c->tcp_check_nb_connect) {
1318                 /* check for pending error from nonblocking connect */
1319                 /* from Stevens, unix network programming, vol1, 3rd ed, p450*/
1320                 int error = 0;
1321                 socklen_t len = (socklen_t)sizeof(error);
1322                 if(getsockopt(fd, SOL_SOCKET, SO_ERROR, (void*)&error, 
1323                         &len) < 0){
1324 #ifndef USE_WINSOCK
1325                         error = errno; /* on solaris errno is error */
1326 #else /* USE_WINSOCK */
1327                         error = WSAGetLastError();
1328 #endif
1329                 }
1330 #ifndef USE_WINSOCK
1331 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1332                 if(error == EINPROGRESS || error == EWOULDBLOCK)
1333                         return 1; /* try again later */
1334                 else
1335 #endif
1336                 if(error != 0 && verbosity < 2)
1337                         return 0; /* silence lots of chatter in the logs */
1338                 else if(error != 0) {
1339                         log_err_addr("tcp connect", strerror(error),
1340                                 &c->repinfo.addr, c->repinfo.addrlen);
1341 #else /* USE_WINSOCK */
1342                 /* examine error */
1343                 if(error == WSAEINPROGRESS)
1344                         return 1;
1345                 else if(error == WSAEWOULDBLOCK) {
1346                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1347                         return 1;
1348                 } else if(error != 0 && verbosity < 2)
1349                         return 0;
1350                 else if(error != 0) {
1351                         log_err_addr("tcp connect", wsa_strerror(error),
1352                                 &c->repinfo.addr, c->repinfo.addrlen);
1353 #endif /* USE_WINSOCK */
1354                         return 0;
1355                 }
1356         }
1357         if(c->ssl)
1358                 return ssl_handle_it(c);
1359
1360 #ifdef USE_MSG_FASTOPEN
1361         /* Only try this on first use of a connection that uses tfo, 
1362            otherwise fall through to normal write */
1363         /* Also, TFO support on WINDOWS not implemented at the moment */
1364         if(c->tcp_do_fastopen == 1) {
1365                 /* this form of sendmsg() does both a connect() and send() so need to
1366                    look for various flavours of error*/
1367                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1368                 struct msghdr msg;
1369                 struct iovec iov[2];
1370                 c->tcp_do_fastopen = 0;
1371                 memset(&msg, 0, sizeof(msg));
1372                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1373                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1374                 iov[1].iov_base = sldns_buffer_begin(c->buffer);
1375                 iov[1].iov_len = sldns_buffer_limit(c->buffer);
1376                 log_assert(iov[0].iov_len > 0);
1377                 log_assert(iov[1].iov_len > 0);
1378                 msg.msg_name = &c->repinfo.addr;
1379                 msg.msg_namelen = c->repinfo.addrlen;
1380                 msg.msg_iov = iov;
1381                 msg.msg_iovlen = 2;
1382                 r = sendmsg(fd, &msg, MSG_FASTOPEN);
1383                 if (r == -1) {
1384 #if defined(EINPROGRESS) && defined(EWOULDBLOCK)
1385                         /* Handshake is underway, maybe because no TFO cookie available.
1386                            Come back to write the messsage*/
1387                         if(errno == EINPROGRESS || errno == EWOULDBLOCK)
1388                                 return 1;
1389 #endif
1390                         if(errno == EINTR || errno == EAGAIN)
1391                                 return 1;
1392                         /* Not handling EISCONN here as shouldn't ever hit that case.*/
1393                         if(errno != 0 && verbosity < 2)
1394                                 return 0; /* silence lots of chatter in the logs */
1395                         else if(errno != 0) 
1396                                 log_err_addr("tcp sendmsg", strerror(errno),
1397                                         &c->repinfo.addr, c->repinfo.addrlen);
1398                         return 0;
1399                 } else {
1400                         c->tcp_byte_count += r;
1401                         if(c->tcp_byte_count < sizeof(uint16_t))
1402                                 return 1;
1403                         sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 
1404                                 sizeof(uint16_t));
1405                         if(sldns_buffer_remaining(c->buffer) == 0) {
1406                                 tcp_callback_writer(c);
1407                                 return 1;
1408                         }
1409                 }
1410         }
1411 #endif /* USE_MSG_FASTOPEN */
1412
1413         if(c->tcp_byte_count < sizeof(uint16_t)) {
1414                 uint16_t len = htons(sldns_buffer_limit(c->buffer));
1415 #ifdef HAVE_WRITEV
1416                 struct iovec iov[2];
1417                 iov[0].iov_base = (uint8_t*)&len + c->tcp_byte_count;
1418                 iov[0].iov_len = sizeof(uint16_t) - c->tcp_byte_count;
1419                 iov[1].iov_base = sldns_buffer_begin(c->buffer);
1420                 iov[1].iov_len = sldns_buffer_limit(c->buffer);
1421                 log_assert(iov[0].iov_len > 0);
1422                 log_assert(iov[1].iov_len > 0);
1423                 r = writev(fd, iov, 2);
1424 #else /* HAVE_WRITEV */
1425                 r = send(fd, (void*)(((uint8_t*)&len)+c->tcp_byte_count),
1426                         sizeof(uint16_t)-c->tcp_byte_count, 0);
1427 #endif /* HAVE_WRITEV */
1428                 if(r == -1) {
1429 #ifndef USE_WINSOCK
1430 #  ifdef EPIPE
1431                         if(errno == EPIPE && verbosity < 2)
1432                                 return 0; /* silence 'broken pipe' */
1433   #endif
1434                         if(errno == EINTR || errno == EAGAIN)
1435                                 return 1;
1436 #  ifdef HAVE_WRITEV
1437                         log_err_addr("tcp writev", strerror(errno),
1438                                 &c->repinfo.addr, c->repinfo.addrlen);
1439 #  else /* HAVE_WRITEV */
1440                         log_err_addr("tcp send s", strerror(errno),
1441                                 &c->repinfo.addr, c->repinfo.addrlen);
1442 #  endif /* HAVE_WRITEV */
1443 #else
1444                         if(WSAGetLastError() == WSAENOTCONN)
1445                                 return 1;
1446                         if(WSAGetLastError() == WSAEINPROGRESS)
1447                                 return 1;
1448                         if(WSAGetLastError() == WSAEWOULDBLOCK) {
1449                                 ub_winsock_tcp_wouldblock(c->ev->ev,
1450                                         UB_EV_WRITE);
1451                                 return 1; 
1452                         }
1453                         log_err_addr("tcp send s",
1454                                 wsa_strerror(WSAGetLastError()),
1455                                 &c->repinfo.addr, c->repinfo.addrlen);
1456 #endif
1457                         return 0;
1458                 }
1459                 c->tcp_byte_count += r;
1460                 if(c->tcp_byte_count < sizeof(uint16_t))
1461                         return 1;
1462                 sldns_buffer_set_position(c->buffer, c->tcp_byte_count - 
1463                         sizeof(uint16_t));
1464                 if(sldns_buffer_remaining(c->buffer) == 0) {
1465                         tcp_callback_writer(c);
1466                         return 1;
1467                 }
1468         }
1469         log_assert(sldns_buffer_remaining(c->buffer) > 0);
1470         r = send(fd, (void*)sldns_buffer_current(c->buffer), 
1471                 sldns_buffer_remaining(c->buffer), 0);
1472         if(r == -1) {
1473 #ifndef USE_WINSOCK
1474                 if(errno == EINTR || errno == EAGAIN)
1475                         return 1;
1476                 log_err_addr("tcp send r", strerror(errno),
1477                         &c->repinfo.addr, c->repinfo.addrlen);
1478 #else
1479                 if(WSAGetLastError() == WSAEINPROGRESS)
1480                         return 1;
1481                 if(WSAGetLastError() == WSAEWOULDBLOCK) {
1482                         ub_winsock_tcp_wouldblock(c->ev->ev, UB_EV_WRITE);
1483                         return 1; 
1484                 }
1485                 log_err_addr("tcp send r", wsa_strerror(WSAGetLastError()),
1486                         &c->repinfo.addr, c->repinfo.addrlen);
1487 #endif
1488                 return 0;
1489         }
1490         sldns_buffer_skip(c->buffer, r);
1491
1492         if(sldns_buffer_remaining(c->buffer) == 0) {
1493                 tcp_callback_writer(c);
1494         }
1495         
1496         return 1;
1497 }
1498
1499 void 
1500 comm_point_tcp_handle_callback(int fd, short event, void* arg)
1501 {
1502         struct comm_point* c = (struct comm_point*)arg;
1503         log_assert(c->type == comm_tcp);
1504         ub_comm_base_now(c->ev->base);
1505
1506         if(event&UB_EV_READ) {
1507                 if(!comm_point_tcp_handle_read(fd, c, 0)) {
1508                         reclaim_tcp_handler(c);
1509                         if(!c->tcp_do_close) {
1510                                 fptr_ok(fptr_whitelist_comm_point(
1511                                         c->callback));
1512                                 (void)(*c->callback)(c, c->cb_arg, 
1513                                         NETEVENT_CLOSED, NULL);
1514                         }
1515                 }
1516                 return;
1517         }
1518         if(event&UB_EV_WRITE) {
1519                 if(!comm_point_tcp_handle_write(fd, c)) {
1520                         reclaim_tcp_handler(c);
1521                         if(!c->tcp_do_close) {
1522                                 fptr_ok(fptr_whitelist_comm_point(
1523                                         c->callback));
1524                                 (void)(*c->callback)(c, c->cb_arg, 
1525                                         NETEVENT_CLOSED, NULL);
1526                         }
1527                 }
1528                 return;
1529         }
1530         if(event&UB_EV_TIMEOUT) {
1531                 verbose(VERB_QUERY, "tcp took too long, dropped");
1532                 reclaim_tcp_handler(c);
1533                 if(!c->tcp_do_close) {
1534                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1535                         (void)(*c->callback)(c, c->cb_arg,
1536                                 NETEVENT_TIMEOUT, NULL);
1537                 }
1538                 return;
1539         }
1540         log_err("Ignored event %d for tcphdl.", event);
1541 }
1542
1543 void comm_point_local_handle_callback(int fd, short event, void* arg)
1544 {
1545         struct comm_point* c = (struct comm_point*)arg;
1546         log_assert(c->type == comm_local);
1547         ub_comm_base_now(c->ev->base);
1548
1549         if(event&UB_EV_READ) {
1550                 if(!comm_point_tcp_handle_read(fd, c, 1)) {
1551                         fptr_ok(fptr_whitelist_comm_point(c->callback));
1552                         (void)(*c->callback)(c, c->cb_arg, NETEVENT_CLOSED, 
1553                                 NULL);
1554                 }
1555                 return;
1556         }
1557         log_err("Ignored event %d for localhdl.", event);
1558 }
1559
1560 void comm_point_raw_handle_callback(int ATTR_UNUSED(fd), 
1561         short event, void* arg)
1562 {
1563         struct comm_point* c = (struct comm_point*)arg;
1564         int err = NETEVENT_NOERROR;
1565         log_assert(c->type == comm_raw);
1566         ub_comm_base_now(c->ev->base);
1567         
1568         if(event&UB_EV_TIMEOUT)
1569                 err = NETEVENT_TIMEOUT;
1570         fptr_ok(fptr_whitelist_comm_point_raw(c->callback));
1571         (void)(*c->callback)(c, c->cb_arg, err, NULL);
1572 }
1573
1574 struct comm_point* 
1575 comm_point_create_udp(struct comm_base *base, int fd, sldns_buffer* buffer,
1576         comm_point_callback_type* callback, void* callback_arg)
1577 {
1578         struct comm_point* c = (struct comm_point*)calloc(1,
1579                 sizeof(struct comm_point));
1580         short evbits;
1581         if(!c)
1582                 return NULL;
1583         c->ev = (struct internal_event*)calloc(1,
1584                 sizeof(struct internal_event));
1585         if(!c->ev) {
1586                 free(c);
1587                 return NULL;
1588         }
1589         c->ev->base = base;
1590         c->fd = fd;
1591         c->buffer = buffer;
1592         c->timeout = NULL;
1593         c->tcp_is_reading = 0;
1594         c->tcp_byte_count = 0;
1595         c->tcp_parent = NULL;
1596         c->max_tcp_count = 0;
1597         c->cur_tcp_count = 0;
1598         c->tcp_handlers = NULL;
1599         c->tcp_free = NULL;
1600         c->type = comm_udp;
1601         c->tcp_do_close = 0;
1602         c->do_not_close = 0;
1603         c->tcp_do_toggle_rw = 0;
1604         c->tcp_check_nb_connect = 0;
1605 #ifdef USE_MSG_FASTOPEN
1606         c->tcp_do_fastopen = 0;
1607 #endif
1608         c->inuse = 0;
1609         c->callback = callback;
1610         c->cb_arg = callback_arg;
1611         evbits = UB_EV_READ | UB_EV_PERSIST;
1612         /* ub_event stuff */
1613         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1614                 comm_point_udp_callback, c);
1615         if(c->ev->ev == NULL) {
1616                 log_err("could not baseset udp event");
1617                 comm_point_delete(c);
1618                 return NULL;
1619         }
1620         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
1621                 log_err("could not add udp event");
1622                 comm_point_delete(c);
1623                 return NULL;
1624         }
1625         return c;
1626 }
1627
1628 struct comm_point* 
1629 comm_point_create_udp_ancil(struct comm_base *base, int fd, 
1630         sldns_buffer* buffer, 
1631         comm_point_callback_type* callback, void* callback_arg)
1632 {
1633         struct comm_point* c = (struct comm_point*)calloc(1,
1634                 sizeof(struct comm_point));
1635         short evbits;
1636         if(!c)
1637                 return NULL;
1638         c->ev = (struct internal_event*)calloc(1,
1639                 sizeof(struct internal_event));
1640         if(!c->ev) {
1641                 free(c);
1642                 return NULL;
1643         }
1644         c->ev->base = base;
1645         c->fd = fd;
1646         c->buffer = buffer;
1647         c->timeout = NULL;
1648         c->tcp_is_reading = 0;
1649         c->tcp_byte_count = 0;
1650         c->tcp_parent = NULL;
1651         c->max_tcp_count = 0;
1652         c->cur_tcp_count = 0;
1653         c->tcp_handlers = NULL;
1654         c->tcp_free = NULL;
1655         c->type = comm_udp;
1656         c->tcp_do_close = 0;
1657         c->do_not_close = 0;
1658         c->inuse = 0;
1659         c->tcp_do_toggle_rw = 0;
1660         c->tcp_check_nb_connect = 0;
1661 #ifdef USE_MSG_FASTOPEN
1662         c->tcp_do_fastopen = 0;
1663 #endif
1664         c->callback = callback;
1665         c->cb_arg = callback_arg;
1666         evbits = UB_EV_READ | UB_EV_PERSIST;
1667         /* ub_event stuff */
1668         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1669                 comm_point_udp_ancil_callback, c);
1670         if(c->ev->ev == NULL) {
1671                 log_err("could not baseset udp event");
1672                 comm_point_delete(c);
1673                 return NULL;
1674         }
1675         if(fd!=-1 && ub_event_add(c->ev->ev, c->timeout) != 0 ) {
1676                 log_err("could not add udp event");
1677                 comm_point_delete(c);
1678                 return NULL;
1679         }
1680         return c;
1681 }
1682
1683 static struct comm_point* 
1684 comm_point_create_tcp_handler(struct comm_base *base, 
1685         struct comm_point* parent, size_t bufsize,
1686         comm_point_callback_type* callback, void* callback_arg)
1687 {
1688         struct comm_point* c = (struct comm_point*)calloc(1,
1689                 sizeof(struct comm_point));
1690         short evbits;
1691         if(!c)
1692                 return NULL;
1693         c->ev = (struct internal_event*)calloc(1,
1694                 sizeof(struct internal_event));
1695         if(!c->ev) {
1696                 free(c);
1697                 return NULL;
1698         }
1699         c->ev->base = base;
1700         c->fd = -1;
1701         c->buffer = sldns_buffer_new(bufsize);
1702         if(!c->buffer) {
1703                 free(c->ev);
1704                 free(c);
1705                 return NULL;
1706         }
1707         c->timeout = (struct timeval*)malloc(sizeof(struct timeval));
1708         if(!c->timeout) {
1709                 sldns_buffer_free(c->buffer);
1710                 free(c->ev);
1711                 free(c);
1712                 return NULL;
1713         }
1714         c->tcp_is_reading = 0;
1715         c->tcp_byte_count = 0;
1716         c->tcp_parent = parent;
1717         c->max_tcp_count = 0;
1718         c->cur_tcp_count = 0;
1719         c->tcp_handlers = NULL;
1720         c->tcp_free = NULL;
1721         c->type = comm_tcp;
1722         c->tcp_do_close = 0;
1723         c->do_not_close = 0;
1724         c->tcp_do_toggle_rw = 1;
1725         c->tcp_check_nb_connect = 0;
1726 #ifdef USE_MSG_FASTOPEN
1727         c->tcp_do_fastopen = 0;
1728 #endif
1729         c->repinfo.c = c;
1730         c->callback = callback;
1731         c->cb_arg = callback_arg;
1732         /* add to parent free list */
1733         c->tcp_free = parent->tcp_free;
1734         parent->tcp_free = c;
1735         /* ub_event stuff */
1736         evbits = UB_EV_PERSIST | UB_EV_READ | UB_EV_TIMEOUT;
1737         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1738                 comm_point_tcp_handle_callback, c);
1739         if(c->ev->ev == NULL)
1740         {
1741                 log_err("could not basetset tcphdl event");
1742                 parent->tcp_free = c->tcp_free;
1743                 free(c->ev);
1744                 free(c);
1745                 return NULL;
1746         }
1747         return c;
1748 }
1749
1750 struct comm_point* 
1751 comm_point_create_tcp(struct comm_base *base, int fd, int num, size_t bufsize,
1752         comm_point_callback_type* callback, void* callback_arg)
1753 {
1754         struct comm_point* c = (struct comm_point*)calloc(1,
1755                 sizeof(struct comm_point));
1756         short evbits;
1757         int i;
1758         /* first allocate the TCP accept listener */
1759         if(!c)
1760                 return NULL;
1761         c->ev = (struct internal_event*)calloc(1,
1762                 sizeof(struct internal_event));
1763         if(!c->ev) {
1764                 free(c);
1765                 return NULL;
1766         }
1767         c->ev->base = base;
1768         c->fd = fd;
1769         c->buffer = NULL;
1770         c->timeout = NULL;
1771         c->tcp_is_reading = 0;
1772         c->tcp_byte_count = 0;
1773         c->tcp_parent = NULL;
1774         c->max_tcp_count = num;
1775         c->cur_tcp_count = 0;
1776         c->tcp_handlers = (struct comm_point**)calloc((size_t)num,
1777                 sizeof(struct comm_point*));
1778         if(!c->tcp_handlers) {
1779                 free(c->ev);
1780                 free(c);
1781                 return NULL;
1782         }
1783         c->tcp_free = NULL;
1784         c->type = comm_tcp_accept;
1785         c->tcp_do_close = 0;
1786         c->do_not_close = 0;
1787         c->tcp_do_toggle_rw = 0;
1788         c->tcp_check_nb_connect = 0;
1789 #ifdef USE_MSG_FASTOPEN
1790         c->tcp_do_fastopen = 0;
1791 #endif
1792         c->callback = NULL;
1793         c->cb_arg = NULL;
1794         evbits = UB_EV_READ | UB_EV_PERSIST;
1795         /* ub_event stuff */
1796         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1797                 comm_point_tcp_accept_callback, c);
1798         if(c->ev->ev == NULL) {
1799                 log_err("could not baseset tcpacc event");
1800                 comm_point_delete(c);
1801                 return NULL;
1802         }
1803         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
1804                 log_err("could not add tcpacc event");
1805                 comm_point_delete(c);
1806                 return NULL;
1807         }
1808         /* now prealloc the tcp handlers */
1809         for(i=0; i<num; i++) {
1810                 c->tcp_handlers[i] = comm_point_create_tcp_handler(base,
1811                         c, bufsize, callback, callback_arg);
1812                 if(!c->tcp_handlers[i]) {
1813                         comm_point_delete(c);
1814                         return NULL;
1815                 }
1816         }
1817         
1818         return c;
1819 }
1820
1821 struct comm_point* 
1822 comm_point_create_tcp_out(struct comm_base *base, size_t bufsize,
1823         comm_point_callback_type* callback, void* callback_arg)
1824 {
1825         struct comm_point* c = (struct comm_point*)calloc(1,
1826                 sizeof(struct comm_point));
1827         short evbits;
1828         if(!c)
1829                 return NULL;
1830         c->ev = (struct internal_event*)calloc(1,
1831                 sizeof(struct internal_event));
1832         if(!c->ev) {
1833                 free(c);
1834                 return NULL;
1835         }
1836         c->ev->base = base;
1837         c->fd = -1;
1838         c->buffer = sldns_buffer_new(bufsize);
1839         if(!c->buffer) {
1840                 free(c->ev);
1841                 free(c);
1842                 return NULL;
1843         }
1844         c->timeout = NULL;
1845         c->tcp_is_reading = 0;
1846         c->tcp_byte_count = 0;
1847         c->tcp_parent = NULL;
1848         c->max_tcp_count = 0;
1849         c->cur_tcp_count = 0;
1850         c->tcp_handlers = NULL;
1851         c->tcp_free = NULL;
1852         c->type = comm_tcp;
1853         c->tcp_do_close = 0;
1854         c->do_not_close = 0;
1855         c->tcp_do_toggle_rw = 1;
1856         c->tcp_check_nb_connect = 1;
1857 #ifdef USE_MSG_FASTOPEN
1858         c->tcp_do_fastopen = 1;
1859 #endif
1860         c->repinfo.c = c;
1861         c->callback = callback;
1862         c->cb_arg = callback_arg;
1863         evbits = UB_EV_PERSIST | UB_EV_WRITE;
1864         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1865                 comm_point_tcp_handle_callback, c);
1866         if(c->ev->ev == NULL)
1867         {
1868                 log_err("could not baseset tcpout event");
1869                 sldns_buffer_free(c->buffer);
1870                 free(c->ev);
1871                 free(c);
1872                 return NULL;
1873         }
1874
1875         return c;
1876 }
1877
1878 struct comm_point* 
1879 comm_point_create_local(struct comm_base *base, int fd, size_t bufsize,
1880         comm_point_callback_type* callback, void* callback_arg)
1881 {
1882         struct comm_point* c = (struct comm_point*)calloc(1,
1883                 sizeof(struct comm_point));
1884         short evbits;
1885         if(!c)
1886                 return NULL;
1887         c->ev = (struct internal_event*)calloc(1,
1888                 sizeof(struct internal_event));
1889         if(!c->ev) {
1890                 free(c);
1891                 return NULL;
1892         }
1893         c->ev->base = base;
1894         c->fd = fd;
1895         c->buffer = sldns_buffer_new(bufsize);
1896         if(!c->buffer) {
1897                 free(c->ev);
1898                 free(c);
1899                 return NULL;
1900         }
1901         c->timeout = NULL;
1902         c->tcp_is_reading = 1;
1903         c->tcp_byte_count = 0;
1904         c->tcp_parent = NULL;
1905         c->max_tcp_count = 0;
1906         c->cur_tcp_count = 0;
1907         c->tcp_handlers = NULL;
1908         c->tcp_free = NULL;
1909         c->type = comm_local;
1910         c->tcp_do_close = 0;
1911         c->do_not_close = 1;
1912         c->tcp_do_toggle_rw = 0;
1913         c->tcp_check_nb_connect = 0;
1914 #ifdef USE_MSG_FASTOPEN
1915         c->tcp_do_fastopen = 0;
1916 #endif
1917         c->callback = callback;
1918         c->cb_arg = callback_arg;
1919         /* ub_event stuff */
1920         evbits = UB_EV_PERSIST | UB_EV_READ;
1921         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1922                 comm_point_local_handle_callback, c);
1923         if(c->ev->ev == NULL) {
1924                 log_err("could not baseset localhdl event");
1925                 free(c->ev);
1926                 free(c);
1927                 return NULL;
1928         }
1929         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
1930                 log_err("could not add localhdl event");
1931                 ub_event_free(c->ev->ev);
1932                 free(c->ev);
1933                 free(c);
1934                 return NULL;
1935         }
1936         return c;
1937 }
1938
1939 struct comm_point* 
1940 comm_point_create_raw(struct comm_base* base, int fd, int writing, 
1941         comm_point_callback_type* callback, void* callback_arg)
1942 {
1943         struct comm_point* c = (struct comm_point*)calloc(1,
1944                 sizeof(struct comm_point));
1945         short evbits;
1946         if(!c)
1947                 return NULL;
1948         c->ev = (struct internal_event*)calloc(1,
1949                 sizeof(struct internal_event));
1950         if(!c->ev) {
1951                 free(c);
1952                 return NULL;
1953         }
1954         c->ev->base = base;
1955         c->fd = fd;
1956         c->buffer = NULL;
1957         c->timeout = NULL;
1958         c->tcp_is_reading = 0;
1959         c->tcp_byte_count = 0;
1960         c->tcp_parent = NULL;
1961         c->max_tcp_count = 0;
1962         c->cur_tcp_count = 0;
1963         c->tcp_handlers = NULL;
1964         c->tcp_free = NULL;
1965         c->type = comm_raw;
1966         c->tcp_do_close = 0;
1967         c->do_not_close = 1;
1968         c->tcp_do_toggle_rw = 0;
1969         c->tcp_check_nb_connect = 0;
1970 #ifdef USE_MSG_FASTOPEN
1971         c->tcp_do_fastopen = 0;
1972 #endif
1973         c->callback = callback;
1974         c->cb_arg = callback_arg;
1975         /* ub_event stuff */
1976         if(writing)
1977                 evbits = UB_EV_PERSIST | UB_EV_WRITE;
1978         else    evbits = UB_EV_PERSIST | UB_EV_READ;
1979         c->ev->ev = ub_event_new(base->eb->base, c->fd, evbits,
1980                 comm_point_raw_handle_callback, c);
1981         if(c->ev->ev == NULL) {
1982                 log_err("could not baseset rawhdl event");
1983                 free(c->ev);
1984                 free(c);
1985                 return NULL;
1986         }
1987         if (ub_event_add(c->ev->ev, c->timeout) != 0) {
1988                 log_err("could not add rawhdl event");
1989                 ub_event_free(c->ev->ev);
1990                 free(c->ev);
1991                 free(c);
1992                 return NULL;
1993         }
1994         return c;
1995 }
1996
1997 void 
1998 comm_point_close(struct comm_point* c)
1999 {
2000         if(!c)
2001                 return;
2002         if(c->fd != -1)
2003                 if(ub_event_del(c->ev->ev) != 0) {
2004                         log_err("could not event_del on close");
2005                 }
2006         /* close fd after removing from event lists, or epoll.. is messed up */
2007         if(c->fd != -1 && !c->do_not_close) {
2008                 verbose(VERB_ALGO, "close fd %d", c->fd);
2009 #ifndef USE_WINSOCK
2010                 close(c->fd);
2011 #else
2012                 closesocket(c->fd);
2013 #endif
2014         }
2015         c->fd = -1;
2016 }
2017
2018 void 
2019 comm_point_delete(struct comm_point* c)
2020 {
2021         if(!c) 
2022                 return;
2023         if(c->type == comm_tcp && c->ssl) {
2024 #ifdef HAVE_SSL
2025                 SSL_shutdown(c->ssl);
2026                 SSL_free(c->ssl);
2027 #endif
2028         }
2029         comm_point_close(c);
2030         if(c->tcp_handlers) {
2031                 int i;
2032                 for(i=0; i<c->max_tcp_count; i++)
2033                         comm_point_delete(c->tcp_handlers[i]);
2034                 free(c->tcp_handlers);
2035         }
2036         free(c->timeout);
2037         if(c->type == comm_tcp || c->type == comm_local)
2038                 sldns_buffer_free(c->buffer);
2039         ub_event_free(c->ev->ev);
2040         free(c->ev);
2041         free(c);
2042 }
2043
2044 void 
2045 comm_point_send_reply(struct comm_reply *repinfo)
2046 {
2047         log_assert(repinfo && repinfo->c);
2048         if(repinfo->c->type == comm_udp) {
2049                 if(repinfo->srctype)
2050                         comm_point_send_udp_msg_if(repinfo->c, 
2051                         repinfo->c->buffer, (struct sockaddr*)&repinfo->addr, 
2052                         repinfo->addrlen, repinfo);
2053                 else
2054                         comm_point_send_udp_msg(repinfo->c, repinfo->c->buffer,
2055                         (struct sockaddr*)&repinfo->addr, repinfo->addrlen);
2056 #ifdef USE_DNSTAP
2057                 if(repinfo->c->dtenv != NULL &&
2058                    repinfo->c->dtenv->log_client_response_messages)
2059                         dt_msg_send_client_response(repinfo->c->dtenv,
2060                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2061 #endif
2062         } else {
2063 #ifdef USE_DNSTAP
2064                 if(repinfo->c->tcp_parent->dtenv != NULL &&
2065                    repinfo->c->tcp_parent->dtenv->log_client_response_messages)
2066                         dt_msg_send_client_response(repinfo->c->tcp_parent->dtenv,
2067                         &repinfo->addr, repinfo->c->type, repinfo->c->buffer);
2068 #endif
2069                 comm_point_start_listening(repinfo->c, -1,
2070                         repinfo->c->tcp_timeout_msec);
2071         }
2072 }
2073
2074 void 
2075 comm_point_drop_reply(struct comm_reply* repinfo)
2076 {
2077         if(!repinfo)
2078                 return;
2079         log_assert(repinfo && repinfo->c);
2080         log_assert(repinfo->c->type != comm_tcp_accept);
2081         if(repinfo->c->type == comm_udp)
2082                 return;
2083         reclaim_tcp_handler(repinfo->c);
2084 }
2085
2086 void 
2087 comm_point_stop_listening(struct comm_point* c)
2088 {
2089         verbose(VERB_ALGO, "comm point stop listening %d", c->fd);
2090         if(ub_event_del(c->ev->ev) != 0) {
2091                 log_err("event_del error to stoplisten");
2092         }
2093 }
2094
2095 void 
2096 comm_point_start_listening(struct comm_point* c, int newfd, int msec)
2097 {
2098         verbose(VERB_ALGO, "comm point start listening %d", 
2099                 c->fd==-1?newfd:c->fd);
2100         if(c->type == comm_tcp_accept && !c->tcp_free) {
2101                 /* no use to start listening no free slots. */
2102                 return;
2103         }
2104         if(msec != -1 && msec != 0) {
2105                 if(!c->timeout) {
2106                         c->timeout = (struct timeval*)malloc(sizeof(
2107                                 struct timeval));
2108                         if(!c->timeout) {
2109                                 log_err("cpsl: malloc failed. No net read.");
2110                                 return;
2111                         }
2112                 }
2113                 ub_event_add_bits(c->ev->ev, UB_EV_TIMEOUT);
2114 #ifndef S_SPLINT_S /* splint fails on struct timeval. */
2115                 c->timeout->tv_sec = msec/1000;
2116                 c->timeout->tv_usec = (msec%1000)*1000;
2117 #endif /* S_SPLINT_S */
2118         }
2119         if(c->type == comm_tcp) {
2120                 ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
2121                 if(c->tcp_is_reading)
2122                         ub_event_add_bits(c->ev->ev, UB_EV_READ);
2123                 else    ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
2124         }
2125         if(newfd != -1) {
2126                 if(c->fd != -1) {
2127 #ifndef USE_WINSOCK
2128                         close(c->fd);
2129 #else
2130                         closesocket(c->fd);
2131 #endif
2132                 }
2133                 c->fd = newfd;
2134                 ub_event_set_fd(c->ev->ev, c->fd);
2135         }
2136         if(ub_event_add(c->ev->ev, msec==0?NULL:c->timeout) != 0) {
2137                 log_err("event_add failed. in cpsl.");
2138         }
2139 }
2140
2141 void comm_point_listen_for_rw(struct comm_point* c, int rd, int wr)
2142 {
2143         verbose(VERB_ALGO, "comm point listen_for_rw %d %d", c->fd, wr);
2144         if(ub_event_del(c->ev->ev) != 0) {
2145                 log_err("event_del error to cplf");
2146         }
2147         ub_event_del_bits(c->ev->ev, UB_EV_READ|UB_EV_WRITE);
2148         if(rd) ub_event_add_bits(c->ev->ev, UB_EV_READ);
2149         if(wr) ub_event_add_bits(c->ev->ev, UB_EV_WRITE);
2150         if(ub_event_add(c->ev->ev, c->timeout) != 0) {
2151                 log_err("event_add failed. in cplf.");
2152         }
2153 }
2154
2155 size_t comm_point_get_mem(struct comm_point* c)
2156 {
2157         size_t s;
2158         if(!c) 
2159                 return 0;
2160         s = sizeof(*c) + sizeof(*c->ev);
2161         if(c->timeout) 
2162                 s += sizeof(*c->timeout);
2163         if(c->type == comm_tcp || c->type == comm_local)
2164                 s += sizeof(*c->buffer) + sldns_buffer_capacity(c->buffer);
2165         if(c->type == comm_tcp_accept) {
2166                 int i;
2167                 for(i=0; i<c->max_tcp_count; i++)
2168                         s += comm_point_get_mem(c->tcp_handlers[i]);
2169         }
2170         return s;
2171 }
2172
2173 struct comm_timer* 
2174 comm_timer_create(struct comm_base* base, void (*cb)(void*), void* cb_arg)
2175 {
2176         struct internal_timer *tm = (struct internal_timer*)calloc(1,
2177                 sizeof(struct internal_timer));
2178         if(!tm) {
2179                 log_err("malloc failed");
2180                 return NULL;
2181         }
2182         tm->super.ev_timer = tm;
2183         tm->base = base;
2184         tm->super.callback = cb;
2185         tm->super.cb_arg = cb_arg;
2186         tm->ev = ub_event_new(base->eb->base, -1, UB_EV_TIMEOUT, 
2187                 comm_timer_callback, &tm->super);
2188         if(tm->ev == NULL) {
2189                 log_err("timer_create: event_base_set failed.");
2190                 free(tm);
2191                 return NULL;
2192         }
2193         return &tm->super;
2194 }
2195
2196 void 
2197 comm_timer_disable(struct comm_timer* timer)
2198 {
2199         if(!timer)
2200                 return;
2201         ub_timer_del(timer->ev_timer->ev);
2202         timer->ev_timer->enabled = 0;
2203 }
2204
2205 void 
2206 comm_timer_set(struct comm_timer* timer, struct timeval* tv)
2207 {
2208         log_assert(tv);
2209         if(timer->ev_timer->enabled)
2210                 comm_timer_disable(timer);
2211         if(ub_timer_add(timer->ev_timer->ev, timer->ev_timer->base->eb->base,
2212                 comm_timer_callback, timer, tv) != 0)
2213                 log_err("comm_timer_set: evtimer_add failed.");
2214         timer->ev_timer->enabled = 1;
2215 }
2216
2217 void 
2218 comm_timer_delete(struct comm_timer* timer)
2219 {
2220         if(!timer)
2221                 return;
2222         comm_timer_disable(timer);
2223         /* Free the sub struct timer->ev_timer derived from the super struct timer.
2224          * i.e. assert(timer == timer->ev_timer)
2225          */
2226         ub_event_free(timer->ev_timer->ev);
2227         free(timer->ev_timer);
2228 }
2229
2230 void 
2231 comm_timer_callback(int ATTR_UNUSED(fd), short event, void* arg)
2232 {
2233         struct comm_timer* tm = (struct comm_timer*)arg;
2234         if(!(event&UB_EV_TIMEOUT))
2235                 return;
2236         ub_comm_base_now(tm->ev_timer->base);
2237         tm->ev_timer->enabled = 0;
2238         fptr_ok(fptr_whitelist_comm_timer(tm->callback));
2239         (*tm->callback)(tm->cb_arg);
2240 }
2241
2242 int 
2243 comm_timer_is_set(struct comm_timer* timer)
2244 {
2245         return (int)timer->ev_timer->enabled;
2246 }
2247
2248 size_t 
2249 comm_timer_get_mem(struct comm_timer* ATTR_UNUSED(timer))
2250 {
2251         return sizeof(struct internal_timer);
2252 }
2253
2254 struct comm_signal* 
2255 comm_signal_create(struct comm_base* base,
2256         void (*callback)(int, void*), void* cb_arg)
2257 {
2258         struct comm_signal* com = (struct comm_signal*)malloc(
2259                 sizeof(struct comm_signal));
2260         if(!com) {
2261                 log_err("malloc failed");
2262                 return NULL;
2263         }
2264         com->base = base;
2265         com->callback = callback;
2266         com->cb_arg = cb_arg;
2267         com->ev_signal = NULL;
2268         return com;
2269 }
2270
2271 void 
2272 comm_signal_callback(int sig, short event, void* arg)
2273 {
2274         struct comm_signal* comsig = (struct comm_signal*)arg;
2275         if(!(event & UB_EV_SIGNAL))
2276                 return;
2277         ub_comm_base_now(comsig->base);
2278         fptr_ok(fptr_whitelist_comm_signal(comsig->callback));
2279         (*comsig->callback)(sig, comsig->cb_arg);
2280 }
2281
2282 int 
2283 comm_signal_bind(struct comm_signal* comsig, int sig)
2284 {
2285         struct internal_signal* entry = (struct internal_signal*)calloc(1, 
2286                 sizeof(struct internal_signal));
2287         if(!entry) {
2288                 log_err("malloc failed");
2289                 return 0;
2290         }
2291         log_assert(comsig);
2292         /* add signal event */
2293         entry->ev = ub_signal_new(comsig->base->eb->base, sig,
2294                 comm_signal_callback, comsig);
2295         if(entry->ev == NULL) {
2296                 log_err("Could not create signal event");
2297                 free(entry);
2298                 return 0;
2299         }
2300         if(ub_signal_add(entry->ev, NULL) != 0) {
2301                 log_err("Could not add signal handler");
2302                 ub_event_free(entry->ev);
2303                 free(entry);
2304                 return 0;
2305         }
2306         /* link into list */
2307         entry->next = comsig->ev_signal;
2308         comsig->ev_signal = entry;
2309         return 1;
2310 }
2311
2312 void 
2313 comm_signal_delete(struct comm_signal* comsig)
2314 {
2315         struct internal_signal* p, *np;
2316         if(!comsig)
2317                 return;
2318         p=comsig->ev_signal;
2319         while(p) {
2320                 np = p->next;
2321                 ub_signal_del(p->ev);
2322                 ub_event_free(p->ev);
2323                 free(p);
2324                 p = np;
2325         }
2326         free(comsig);
2327 }