]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/services/outside_network.c
Upgrade Unbound to 1.7.0. More to follow.
[FreeBSD/FreeBSD.git] / contrib / unbound / services / outside_network.c
1 /*
2  * services/outside_network.c - implement sending of queries and wait answer.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file has functions to send queries to authoritative servers and
40  * wait for the pending answer events.
41  */
42 #include "config.h"
43 #include <ctype.h>
44 #ifdef HAVE_SYS_TYPES_H
45 #  include <sys/types.h>
46 #endif
47 #include <sys/time.h>
48 #include "services/outside_network.h"
49 #include "services/listen_dnsport.h"
50 #include "services/cache/infra.h"
51 #include "util/data/msgparse.h"
52 #include "util/data/msgreply.h"
53 #include "util/data/msgencode.h"
54 #include "util/data/dname.h"
55 #include "util/netevent.h"
56 #include "util/log.h"
57 #include "util/net_help.h"
58 #include "util/random.h"
59 #include "util/fptr_wlist.h"
60 #include "sldns/sbuffer.h"
61 #include "dnstap/dnstap.h"
62 #ifdef HAVE_OPENSSL_SSL_H
63 #include <openssl/ssl.h>
64 #endif
65
66 #ifdef HAVE_NETDB_H
67 #include <netdb.h>
68 #endif
69 #include <fcntl.h>
70
71 /** number of times to retry making a random ID that is unique. */
72 #define MAX_ID_RETRY 1000
73 /** number of times to retry finding interface, port that can be opened. */
74 #define MAX_PORT_RETRY 10000
75 /** number of retries on outgoing UDP queries */
76 #define OUTBOUND_UDP_RETRY 1
77
78 /** initiate TCP transaction for serviced query */
79 static void serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff);
80 /** with a fd available, randomize and send UDP */
81 static int randomize_and_send_udp(struct pending* pend, sldns_buffer* packet,
82         int timeout);
83
84 /** remove waiting tcp from the outnet waiting list */
85 static void waiting_list_remove(struct outside_network* outnet,
86         struct waiting_tcp* w);
87
88 int 
89 pending_cmp(const void* key1, const void* key2)
90 {
91         struct pending *p1 = (struct pending*)key1;
92         struct pending *p2 = (struct pending*)key2;
93         if(p1->id < p2->id)
94                 return -1;
95         if(p1->id > p2->id)
96                 return 1;
97         log_assert(p1->id == p2->id);
98         return sockaddr_cmp(&p1->addr, p1->addrlen, &p2->addr, p2->addrlen);
99 }
100
101 int 
102 serviced_cmp(const void* key1, const void* key2)
103 {
104         struct serviced_query* q1 = (struct serviced_query*)key1;
105         struct serviced_query* q2 = (struct serviced_query*)key2;
106         int r;
107         if(q1->qbuflen < q2->qbuflen)
108                 return -1;
109         if(q1->qbuflen > q2->qbuflen)
110                 return 1;
111         log_assert(q1->qbuflen == q2->qbuflen);
112         log_assert(q1->qbuflen >= 15 /* 10 header, root, type, class */);
113         /* alternate casing of qname is still the same query */
114         if((r = memcmp(q1->qbuf, q2->qbuf, 10)) != 0)
115                 return r;
116         if((r = memcmp(q1->qbuf+q1->qbuflen-4, q2->qbuf+q2->qbuflen-4, 4)) != 0)
117                 return r;
118         if(q1->dnssec != q2->dnssec) {
119                 if(q1->dnssec < q2->dnssec)
120                         return -1;
121                 return 1;
122         }
123         if((r = query_dname_compare(q1->qbuf+10, q2->qbuf+10)) != 0)
124                 return r;
125         if((r = edns_opt_list_compare(q1->opt_list, q2->opt_list)) != 0)
126                 return r;
127         return sockaddr_cmp(&q1->addr, q1->addrlen, &q2->addr, q2->addrlen);
128 }
129
130 /** delete waiting_tcp entry. Does not unlink from waiting list. 
131  * @param w: to delete.
132  */
133 static void
134 waiting_tcp_delete(struct waiting_tcp* w)
135 {
136         if(!w) return;
137         if(w->timer)
138                 comm_timer_delete(w->timer);
139         free(w);
140 }
141
142 /** 
143  * Pick random outgoing-interface of that family, and bind it.
144  * port set to 0 so OS picks a port number for us.
145  * if it is the ANY address, do not bind.
146  * @param w: tcp structure with destination address.
147  * @param s: socket fd.
148  * @return false on error, socket closed.
149  */
150 static int
151 pick_outgoing_tcp(struct waiting_tcp* w, int s)
152 {
153         struct port_if* pi = NULL;
154         int num;
155 #ifdef INET6
156         if(addr_is_ip6(&w->addr, w->addrlen))
157                 num = w->outnet->num_ip6;
158         else
159 #endif
160                 num = w->outnet->num_ip4;
161         if(num == 0) {
162                 log_err("no TCP outgoing interfaces of family");
163                 log_addr(VERB_OPS, "for addr", &w->addr, w->addrlen);
164 #ifndef USE_WINSOCK
165                 close(s);
166 #else
167                 closesocket(s);
168 #endif
169                 return 0;
170         }
171 #ifdef INET6
172         if(addr_is_ip6(&w->addr, w->addrlen))
173                 pi = &w->outnet->ip6_ifs[ub_random_max(w->outnet->rnd, num)];
174         else
175 #endif
176                 pi = &w->outnet->ip4_ifs[ub_random_max(w->outnet->rnd, num)];
177         log_assert(pi);
178         if(addr_is_any(&pi->addr, pi->addrlen)) {
179                 /* binding to the ANY interface is for listening sockets */
180                 return 1;
181         }
182         /* set port to 0 */
183         if(addr_is_ip6(&pi->addr, pi->addrlen))
184                 ((struct sockaddr_in6*)&pi->addr)->sin6_port = 0;
185         else    ((struct sockaddr_in*)&pi->addr)->sin_port = 0;
186         if(bind(s, (struct sockaddr*)&pi->addr, pi->addrlen) != 0) {
187 #ifndef USE_WINSOCK
188                 log_err("outgoing tcp: bind: %s", strerror(errno));
189                 close(s);
190 #else
191                 log_err("outgoing tcp: bind: %s", 
192                         wsa_strerror(WSAGetLastError()));
193                 closesocket(s);
194 #endif
195                 return 0;
196         }
197         log_addr(VERB_ALGO, "tcp bound to src", &pi->addr, pi->addrlen);
198         return 1;
199 }
200
201 /** get TCP file descriptor for address, returns -1 on failure,
202  * tcp_mss is 0 or maxseg size to set for TCP packets. */
203 int
204 outnet_get_tcp_fd(struct sockaddr_storage* addr, socklen_t addrlen, int tcp_mss)
205 {
206         int s;
207 #ifdef SO_REUSEADDR
208         int on = 1;
209 #endif
210 #ifdef INET6
211         if(addr_is_ip6(addr, addrlen))
212                 s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
213         else
214 #endif
215                 s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
216         if(s == -1) {
217 #ifndef USE_WINSOCK
218                 log_err_addr("outgoing tcp: socket", strerror(errno),
219                         addr, addrlen);
220 #else
221                 log_err_addr("outgoing tcp: socket", 
222                         wsa_strerror(WSAGetLastError()), addr, addrlen);
223 #endif
224                 return -1;
225         }
226
227 #ifdef SO_REUSEADDR
228         if(setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (void*)&on,
229                 (socklen_t)sizeof(on)) < 0) {
230                 verbose(VERB_ALGO, "outgoing tcp:"
231                         " setsockopt(.. SO_REUSEADDR ..) failed");
232         }
233 #endif
234
235         if(tcp_mss > 0) {
236 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
237                 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG,
238                         (void*)&tcp_mss, (socklen_t)sizeof(tcp_mss)) < 0) {
239                         verbose(VERB_ALGO, "outgoing tcp:"
240                                 " setsockopt(.. TCP_MAXSEG ..) failed");
241                 }
242 #else
243                 verbose(VERB_ALGO, "outgoing tcp:"
244                         " setsockopt(TCP_MAXSEG) unsupported");
245 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
246         }
247
248         return s;
249 }
250
251 /** connect tcp connection to addr, 0 on failure */
252 int
253 outnet_tcp_connect(int s, struct sockaddr_storage* addr, socklen_t addrlen)
254 {
255         if(connect(s, (struct sockaddr*)addr, addrlen) == -1) {
256 #ifndef USE_WINSOCK
257 #ifdef EINPROGRESS
258                 if(errno != EINPROGRESS) {
259 #endif
260                         if(tcp_connect_errno_needs_log(
261                                 (struct sockaddr*)addr, addrlen))
262                                 log_err_addr("outgoing tcp: connect",
263                                         strerror(errno), addr, addrlen);
264                         close(s);
265                         return 0;
266 #ifdef EINPROGRESS
267                 }
268 #endif
269 #else /* USE_WINSOCK */
270                 if(WSAGetLastError() != WSAEINPROGRESS &&
271                         WSAGetLastError() != WSAEWOULDBLOCK) {
272                         closesocket(s);
273                         return 0;
274                 }
275 #endif
276         }
277         return 1;
278 }
279
280 /** use next free buffer to service a tcp query */
281 static int
282 outnet_tcp_take_into_use(struct waiting_tcp* w, uint8_t* pkt, size_t pkt_len)
283 {
284         struct pending_tcp* pend = w->outnet->tcp_free;
285         int s;
286         log_assert(pend);
287         log_assert(pkt);
288         log_assert(w->addrlen > 0);
289         /* open socket */
290         s = outnet_get_tcp_fd(&w->addr, w->addrlen, w->outnet->tcp_mss);
291
292         if(!pick_outgoing_tcp(w, s))
293                 return 0;
294
295         fd_set_nonblock(s);
296 #ifdef USE_OSX_MSG_FASTOPEN
297         /* API for fast open is different here. We use a connectx() function and 
298            then writes can happen as normal even using SSL.*/
299         /* connectx requires that the len be set in the sockaddr struct*/
300         struct sockaddr_in *addr_in = (struct sockaddr_in *)&w->addr;
301         addr_in->sin_len = w->addrlen;
302         sa_endpoints_t endpoints;
303         endpoints.sae_srcif = 0;
304         endpoints.sae_srcaddr = NULL;
305         endpoints.sae_srcaddrlen = 0;
306         endpoints.sae_dstaddr = (struct sockaddr *)&w->addr;
307         endpoints.sae_dstaddrlen = w->addrlen;
308         if (connectx(s, &endpoints, SAE_ASSOCID_ANY,  
309                      CONNECT_DATA_IDEMPOTENT | CONNECT_RESUME_ON_READ_WRITE,
310                      NULL, 0, NULL, NULL) == -1) {
311                 /* if fails, failover to connect for OSX 10.10 */
312 #ifdef EINPROGRESS
313                 if(errno != EINPROGRESS) {
314 #else
315                 if(1) {
316 #endif
317                         if(connect(s, (struct sockaddr*)&w->addr, w->addrlen) == -1) {
318 #else /* USE_OSX_MSG_FASTOPEN*/
319 #ifdef USE_MSG_FASTOPEN
320         pend->c->tcp_do_fastopen = 1;
321         /* Only do TFO for TCP in which case no connect() is required here.
322            Don't combine client TFO with SSL, since OpenSSL can't 
323            currently support doing a handshake on fd that already isn't connected*/
324         if (w->outnet->sslctx && w->ssl_upstream) {
325                 if(connect(s, (struct sockaddr*)&w->addr, w->addrlen) == -1) {
326 #else /* USE_MSG_FASTOPEN*/
327         if(connect(s, (struct sockaddr*)&w->addr, w->addrlen) == -1) {
328 #endif /* USE_MSG_FASTOPEN*/
329 #endif /* USE_OSX_MSG_FASTOPEN*/
330 #ifndef USE_WINSOCK
331 #ifdef EINPROGRESS
332                 if(errno != EINPROGRESS) {
333 #else
334                 if(1) {
335 #endif
336                         if(tcp_connect_errno_needs_log(
337                                 (struct sockaddr*)&w->addr, w->addrlen))
338                                 log_err_addr("outgoing tcp: connect",
339                                         strerror(errno), &w->addr, w->addrlen);
340                         close(s);
341 #else /* USE_WINSOCK */
342                 if(WSAGetLastError() != WSAEINPROGRESS &&
343                         WSAGetLastError() != WSAEWOULDBLOCK) {
344                         closesocket(s);
345 #endif
346                         return 0;
347                 }
348         }
349 #ifdef USE_MSG_FASTOPEN
350         }
351 #endif /* USE_MSG_FASTOPEN */
352 #ifdef USE_OSX_MSG_FASTOPEN
353                 }
354         }
355 #endif /* USE_OSX_MSG_FASTOPEN */
356         if(w->outnet->sslctx && w->ssl_upstream) {
357                 pend->c->ssl = outgoing_ssl_fd(w->outnet->sslctx, s);
358                 if(!pend->c->ssl) {
359                         pend->c->fd = s;
360                         comm_point_close(pend->c);
361                         return 0;
362                 }
363 #ifdef USE_WINSOCK
364                 comm_point_tcp_win_bio_cb(pend->c, pend->c->ssl);
365 #endif
366                 pend->c->ssl_shake_state = comm_ssl_shake_write;
367         }
368         w->pkt = NULL;
369         w->next_waiting = (void*)pend;
370         pend->id = LDNS_ID_WIRE(pkt);
371         w->outnet->num_tcp_outgoing++;
372         w->outnet->tcp_free = pend->next_free;
373         pend->next_free = NULL;
374         pend->query = w;
375         pend->c->repinfo.addrlen = w->addrlen;
376         memcpy(&pend->c->repinfo.addr, &w->addr, w->addrlen);
377         sldns_buffer_clear(pend->c->buffer);
378         sldns_buffer_write(pend->c->buffer, pkt, pkt_len);
379         sldns_buffer_flip(pend->c->buffer);
380         pend->c->tcp_is_reading = 0;
381         pend->c->tcp_byte_count = 0;
382         comm_point_start_listening(pend->c, s, -1);
383         return 1;
384 }
385
386 /** see if buffers can be used to service TCP queries */
387 static void
388 use_free_buffer(struct outside_network* outnet)
389 {
390         struct waiting_tcp* w;
391         while(outnet->tcp_free && outnet->tcp_wait_first 
392                 && !outnet->want_to_quit) {
393                 w = outnet->tcp_wait_first;
394                 outnet->tcp_wait_first = w->next_waiting;
395                 if(outnet->tcp_wait_last == w)
396                         outnet->tcp_wait_last = NULL;
397                 if(!outnet_tcp_take_into_use(w, w->pkt, w->pkt_len)) {
398                         comm_point_callback_type* cb = w->cb;
399                         void* cb_arg = w->cb_arg;
400                         waiting_tcp_delete(w);
401                         fptr_ok(fptr_whitelist_pending_tcp(cb));
402                         (void)(*cb)(NULL, cb_arg, NETEVENT_CLOSED, NULL);
403                 }
404         }
405 }
406
407 /** decommission a tcp buffer, closes commpoint and frees waiting_tcp entry */
408 static void
409 decommission_pending_tcp(struct outside_network* outnet, 
410         struct pending_tcp* pend)
411 {
412         if(pend->c->ssl) {
413 #ifdef HAVE_SSL
414                 SSL_shutdown(pend->c->ssl);
415                 SSL_free(pend->c->ssl);
416                 pend->c->ssl = NULL;
417 #endif
418         }
419         comm_point_close(pend->c);
420         pend->next_free = outnet->tcp_free;
421         outnet->tcp_free = pend;
422         waiting_tcp_delete(pend->query);
423         pend->query = NULL;
424         use_free_buffer(outnet);
425 }
426
427 int 
428 outnet_tcp_cb(struct comm_point* c, void* arg, int error,
429         struct comm_reply *reply_info)
430 {
431         struct pending_tcp* pend = (struct pending_tcp*)arg;
432         struct outside_network* outnet = pend->query->outnet;
433         verbose(VERB_ALGO, "outnettcp cb");
434         if(error != NETEVENT_NOERROR) {
435                 verbose(VERB_QUERY, "outnettcp got tcp error %d", error);
436                 /* pass error below and exit */
437         } else {
438                 /* check ID */
439                 if(sldns_buffer_limit(c->buffer) < sizeof(uint16_t) ||
440                         LDNS_ID_WIRE(sldns_buffer_begin(c->buffer))!=pend->id) {
441                         log_addr(VERB_QUERY, 
442                                 "outnettcp: bad ID in reply, from:",
443                                 &pend->query->addr, pend->query->addrlen);
444                         error = NETEVENT_CLOSED;
445                 }
446         }
447         fptr_ok(fptr_whitelist_pending_tcp(pend->query->cb));
448         (void)(*pend->query->cb)(c, pend->query->cb_arg, error, reply_info);
449         decommission_pending_tcp(outnet, pend);
450         return 0;
451 }
452
453 /** lower use count on pc, see if it can be closed */
454 static void
455 portcomm_loweruse(struct outside_network* outnet, struct port_comm* pc)
456 {
457         struct port_if* pif;
458         pc->num_outstanding--;
459         if(pc->num_outstanding > 0) {
460                 return;
461         }
462         /* close it and replace in unused list */
463         verbose(VERB_ALGO, "close of port %d", pc->number);
464         comm_point_close(pc->cp);
465         pif = pc->pif;
466         log_assert(pif->inuse > 0);
467         pif->avail_ports[pif->avail_total - pif->inuse] = pc->number;
468         pif->inuse--;
469         pif->out[pc->index] = pif->out[pif->inuse];
470         pif->out[pc->index]->index = pc->index;
471         pc->next = outnet->unused_fds;
472         outnet->unused_fds = pc;
473 }
474
475 /** try to send waiting UDP queries */
476 static void
477 outnet_send_wait_udp(struct outside_network* outnet)
478 {
479         struct pending* pend;
480         /* process waiting queries */
481         while(outnet->udp_wait_first && outnet->unused_fds 
482                 && !outnet->want_to_quit) {
483                 pend = outnet->udp_wait_first;
484                 outnet->udp_wait_first = pend->next_waiting;
485                 if(!pend->next_waiting) outnet->udp_wait_last = NULL;
486                 sldns_buffer_clear(outnet->udp_buff);
487                 sldns_buffer_write(outnet->udp_buff, pend->pkt, pend->pkt_len);
488                 sldns_buffer_flip(outnet->udp_buff);
489                 free(pend->pkt); /* freeing now makes get_mem correct */
490                 pend->pkt = NULL; 
491                 pend->pkt_len = 0;
492                 if(!randomize_and_send_udp(pend, outnet->udp_buff,
493                         pend->timeout)) {
494                         /* callback error on pending */
495                         if(pend->cb) {
496                                 fptr_ok(fptr_whitelist_pending_udp(pend->cb));
497                                 (void)(*pend->cb)(outnet->unused_fds->cp, pend->cb_arg, 
498                                         NETEVENT_CLOSED, NULL);
499                         }
500                         pending_delete(outnet, pend);
501                 }
502         }
503 }
504
505 int 
506 outnet_udp_cb(struct comm_point* c, void* arg, int error,
507         struct comm_reply *reply_info)
508 {
509         struct outside_network* outnet = (struct outside_network*)arg;
510         struct pending key;
511         struct pending* p;
512         verbose(VERB_ALGO, "answer cb");
513
514         if(error != NETEVENT_NOERROR) {
515                 verbose(VERB_QUERY, "outnetudp got udp error %d", error);
516                 return 0;
517         }
518         if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
519                 verbose(VERB_QUERY, "outnetudp udp too short");
520                 return 0;
521         }
522         log_assert(reply_info);
523
524         /* setup lookup key */
525         key.id = (unsigned)LDNS_ID_WIRE(sldns_buffer_begin(c->buffer));
526         memcpy(&key.addr, &reply_info->addr, reply_info->addrlen);
527         key.addrlen = reply_info->addrlen;
528         verbose(VERB_ALGO, "Incoming reply id = %4.4x", key.id);
529         log_addr(VERB_ALGO, "Incoming reply addr =", 
530                 &reply_info->addr, reply_info->addrlen);
531
532         /* find it, see if this thing is a valid query response */
533         verbose(VERB_ALGO, "lookup size is %d entries", (int)outnet->pending->count);
534         p = (struct pending*)rbtree_search(outnet->pending, &key);
535         if(!p) {
536                 verbose(VERB_QUERY, "received unwanted or unsolicited udp reply dropped.");
537                 log_buf(VERB_ALGO, "dropped message", c->buffer);
538                 outnet->unwanted_replies++;
539                 if(outnet->unwanted_threshold && ++outnet->unwanted_total 
540                         >= outnet->unwanted_threshold) {
541                         log_warn("unwanted reply total reached threshold (%u)"
542                                 " you may be under attack."
543                                 " defensive action: clearing the cache",
544                                 (unsigned)outnet->unwanted_threshold);
545                         fptr_ok(fptr_whitelist_alloc_cleanup(
546                                 outnet->unwanted_action));
547                         (*outnet->unwanted_action)(outnet->unwanted_param);
548                         outnet->unwanted_total = 0;
549                 }
550                 return 0;
551         }
552
553         verbose(VERB_ALGO, "received udp reply.");
554         log_buf(VERB_ALGO, "udp message", c->buffer);
555         if(p->pc->cp != c) {
556                 verbose(VERB_QUERY, "received reply id,addr on wrong port. "
557                         "dropped.");
558                 outnet->unwanted_replies++;
559                 if(outnet->unwanted_threshold && ++outnet->unwanted_total 
560                         >= outnet->unwanted_threshold) {
561                         log_warn("unwanted reply total reached threshold (%u)"
562                                 " you may be under attack."
563                                 " defensive action: clearing the cache",
564                                 (unsigned)outnet->unwanted_threshold);
565                         fptr_ok(fptr_whitelist_alloc_cleanup(
566                                 outnet->unwanted_action));
567                         (*outnet->unwanted_action)(outnet->unwanted_param);
568                         outnet->unwanted_total = 0;
569                 }
570                 return 0;
571         }
572         comm_timer_disable(p->timer);
573         verbose(VERB_ALGO, "outnet handle udp reply");
574         /* delete from tree first in case callback creates a retry */
575         (void)rbtree_delete(outnet->pending, p->node.key);
576         if(p->cb) {
577                 fptr_ok(fptr_whitelist_pending_udp(p->cb));
578                 (void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_NOERROR, reply_info);
579         }
580         portcomm_loweruse(outnet, p->pc);
581         pending_delete(NULL, p);
582         outnet_send_wait_udp(outnet);
583         return 0;
584 }
585
586 /** calculate number of ip4 and ip6 interfaces*/
587 static void 
588 calc_num46(char** ifs, int num_ifs, int do_ip4, int do_ip6, 
589         int* num_ip4, int* num_ip6)
590 {
591         int i;
592         *num_ip4 = 0;
593         *num_ip6 = 0;
594         if(num_ifs <= 0) {
595                 if(do_ip4)
596                         *num_ip4 = 1;
597                 if(do_ip6)
598                         *num_ip6 = 1;
599                 return;
600         }
601         for(i=0; i<num_ifs; i++)
602         {
603                 if(str_is_ip6(ifs[i])) {
604                         if(do_ip6)
605                                 (*num_ip6)++;
606                 } else {
607                         if(do_ip4)
608                                 (*num_ip4)++;
609                 }
610         }
611
612 }
613
614 void
615 pending_udp_timer_delay_cb(void* arg)
616 {
617         struct pending* p = (struct pending*)arg;
618         struct outside_network* outnet = p->outnet;
619         verbose(VERB_ALGO, "timeout udp with delay");
620         portcomm_loweruse(outnet, p->pc);
621         pending_delete(outnet, p);
622         outnet_send_wait_udp(outnet);
623 }
624
625 void 
626 pending_udp_timer_cb(void *arg)
627 {
628         struct pending* p = (struct pending*)arg;
629         struct outside_network* outnet = p->outnet;
630         /* it timed out */
631         verbose(VERB_ALGO, "timeout udp");
632         if(p->cb) {
633                 fptr_ok(fptr_whitelist_pending_udp(p->cb));
634                 (void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_TIMEOUT, NULL);
635         }
636         /* if delayclose, keep port open for a longer time.
637          * But if the udpwaitlist exists, then we are struggling to
638          * keep up with demand for sockets, so do not wait, but service
639          * the customer (customer service more important than portICMPs) */
640         if(outnet->delayclose && !outnet->udp_wait_first) {
641                 p->cb = NULL;
642                 p->timer->callback = &pending_udp_timer_delay_cb;
643                 comm_timer_set(p->timer, &outnet->delay_tv);
644                 return;
645         }
646         portcomm_loweruse(outnet, p->pc);
647         pending_delete(outnet, p);
648         outnet_send_wait_udp(outnet);
649 }
650
651 /** create pending_tcp buffers */
652 static int
653 create_pending_tcp(struct outside_network* outnet, size_t bufsize)
654 {
655         size_t i;
656         if(outnet->num_tcp == 0)
657                 return 1; /* no tcp needed, nothing to do */
658         if(!(outnet->tcp_conns = (struct pending_tcp **)calloc(
659                         outnet->num_tcp, sizeof(struct pending_tcp*))))
660                 return 0;
661         for(i=0; i<outnet->num_tcp; i++) {
662                 if(!(outnet->tcp_conns[i] = (struct pending_tcp*)calloc(1, 
663                         sizeof(struct pending_tcp))))
664                         return 0;
665                 outnet->tcp_conns[i]->next_free = outnet->tcp_free;
666                 outnet->tcp_free = outnet->tcp_conns[i];
667                 outnet->tcp_conns[i]->c = comm_point_create_tcp_out(
668                         outnet->base, bufsize, outnet_tcp_cb, 
669                         outnet->tcp_conns[i]);
670                 if(!outnet->tcp_conns[i]->c)
671                         return 0;
672         }
673         return 1;
674 }
675
676 /** setup an outgoing interface, ready address */
677 static int setup_if(struct port_if* pif, const char* addrstr, 
678         int* avail, int numavail, size_t numfd)
679 {
680         pif->avail_total = numavail;
681         pif->avail_ports = (int*)memdup(avail, (size_t)numavail*sizeof(int));
682         if(!pif->avail_ports)
683                 return 0;
684         if(!ipstrtoaddr(addrstr, UNBOUND_DNS_PORT, &pif->addr, &pif->addrlen) &&
685            !netblockstrtoaddr(addrstr, UNBOUND_DNS_PORT,
686                               &pif->addr, &pif->addrlen, &pif->pfxlen))
687                 return 0;
688         pif->maxout = (int)numfd;
689         pif->inuse = 0;
690         pif->out = (struct port_comm**)calloc(numfd, 
691                 sizeof(struct port_comm*));
692         if(!pif->out)
693                 return 0;
694         return 1;
695 }
696
697 struct outside_network* 
698 outside_network_create(struct comm_base *base, size_t bufsize, 
699         size_t num_ports, char** ifs, int num_ifs, int do_ip4, 
700         int do_ip6, size_t num_tcp, struct infra_cache* infra,
701         struct ub_randstate* rnd, int use_caps_for_id, int* availports, 
702         int numavailports, size_t unwanted_threshold, int tcp_mss,
703         void (*unwanted_action)(void*), void* unwanted_param, int do_udp,
704         void* sslctx, int delayclose, struct dt_env* dtenv)
705 {
706         struct outside_network* outnet = (struct outside_network*)
707                 calloc(1, sizeof(struct outside_network));
708         size_t k;
709         if(!outnet) {
710                 log_err("malloc failed");
711                 return NULL;
712         }
713         comm_base_timept(base, &outnet->now_secs, &outnet->now_tv);
714         outnet->base = base;
715         outnet->num_tcp = num_tcp;
716         outnet->num_tcp_outgoing = 0;
717         outnet->infra = infra;
718         outnet->rnd = rnd;
719         outnet->sslctx = sslctx;
720 #ifdef USE_DNSTAP
721         outnet->dtenv = dtenv;
722 #else
723         (void)dtenv;
724 #endif
725         outnet->svcd_overhead = 0;
726         outnet->want_to_quit = 0;
727         outnet->unwanted_threshold = unwanted_threshold;
728         outnet->unwanted_action = unwanted_action;
729         outnet->unwanted_param = unwanted_param;
730         outnet->use_caps_for_id = use_caps_for_id;
731         outnet->do_udp = do_udp;
732         outnet->tcp_mss = tcp_mss;
733 #ifndef S_SPLINT_S
734         if(delayclose) {
735                 outnet->delayclose = 1;
736                 outnet->delay_tv.tv_sec = delayclose/1000;
737                 outnet->delay_tv.tv_usec = (delayclose%1000)*1000;
738         }
739 #endif
740         if(numavailports == 0) {
741                 log_err("no outgoing ports available");
742                 outside_network_delete(outnet);
743                 return NULL;
744         }
745 #ifndef INET6
746         do_ip6 = 0;
747 #endif
748         calc_num46(ifs, num_ifs, do_ip4, do_ip6, 
749                 &outnet->num_ip4, &outnet->num_ip6);
750         if(outnet->num_ip4 != 0) {
751                 if(!(outnet->ip4_ifs = (struct port_if*)calloc(
752                         (size_t)outnet->num_ip4, sizeof(struct port_if)))) {
753                         log_err("malloc failed");
754                         outside_network_delete(outnet);
755                         return NULL;
756                 }
757         }
758         if(outnet->num_ip6 != 0) {
759                 if(!(outnet->ip6_ifs = (struct port_if*)calloc(
760                         (size_t)outnet->num_ip6, sizeof(struct port_if)))) {
761                         log_err("malloc failed");
762                         outside_network_delete(outnet);
763                         return NULL;
764                 }
765         }
766         if(     !(outnet->udp_buff = sldns_buffer_new(bufsize)) ||
767                 !(outnet->pending = rbtree_create(pending_cmp)) ||
768                 !(outnet->serviced = rbtree_create(serviced_cmp)) ||
769                 !create_pending_tcp(outnet, bufsize)) {
770                 log_err("malloc failed");
771                 outside_network_delete(outnet);
772                 return NULL;
773         }
774
775         /* allocate commpoints */
776         for(k=0; k<num_ports; k++) {
777                 struct port_comm* pc;
778                 pc = (struct port_comm*)calloc(1, sizeof(*pc));
779                 if(!pc) {
780                         log_err("malloc failed");
781                         outside_network_delete(outnet);
782                         return NULL;
783                 }
784                 pc->cp = comm_point_create_udp(outnet->base, -1, 
785                         outnet->udp_buff, outnet_udp_cb, outnet);
786                 if(!pc->cp) {
787                         log_err("malloc failed");
788                         free(pc);
789                         outside_network_delete(outnet);
790                         return NULL;
791                 }
792                 pc->next = outnet->unused_fds;
793                 outnet->unused_fds = pc;
794         }
795
796         /* allocate interfaces */
797         if(num_ifs == 0) {
798                 if(do_ip4 && !setup_if(&outnet->ip4_ifs[0], "0.0.0.0", 
799                         availports, numavailports, num_ports)) {
800                         log_err("malloc failed");
801                         outside_network_delete(outnet);
802                         return NULL;
803                 }
804                 if(do_ip6 && !setup_if(&outnet->ip6_ifs[0], "::", 
805                         availports, numavailports, num_ports)) {
806                         log_err("malloc failed");
807                         outside_network_delete(outnet);
808                         return NULL;
809                 }
810         } else {
811                 size_t done_4 = 0, done_6 = 0;
812                 int i;
813                 for(i=0; i<num_ifs; i++) {
814                         if(str_is_ip6(ifs[i]) && do_ip6) {
815                                 if(!setup_if(&outnet->ip6_ifs[done_6], ifs[i],
816                                         availports, numavailports, num_ports)){
817                                         log_err("malloc failed");
818                                         outside_network_delete(outnet);
819                                         return NULL;
820                                 }
821                                 done_6++;
822                         }
823                         if(!str_is_ip6(ifs[i]) && do_ip4) {
824                                 if(!setup_if(&outnet->ip4_ifs[done_4], ifs[i],
825                                         availports, numavailports, num_ports)){
826                                         log_err("malloc failed");
827                                         outside_network_delete(outnet);
828                                         return NULL;
829                                 }
830                                 done_4++;
831                         }
832                 }
833         }
834         return outnet;
835 }
836
837 /** helper pending delete */
838 static void
839 pending_node_del(rbnode_type* node, void* arg)
840 {
841         struct pending* pend = (struct pending*)node;
842         struct outside_network* outnet = (struct outside_network*)arg;
843         pending_delete(outnet, pend);
844 }
845
846 /** helper serviced delete */
847 static void
848 serviced_node_del(rbnode_type* node, void* ATTR_UNUSED(arg))
849 {
850         struct serviced_query* sq = (struct serviced_query*)node;
851         struct service_callback* p = sq->cblist, *np;
852         free(sq->qbuf);
853         free(sq->zone);
854         edns_opt_list_free(sq->opt_list);
855         while(p) {
856                 np = p->next;
857                 free(p);
858                 p = np;
859         }
860         free(sq);
861 }
862
863 void 
864 outside_network_quit_prepare(struct outside_network* outnet)
865 {
866         if(!outnet)
867                 return;
868         /* prevent queued items from being sent */
869         outnet->want_to_quit = 1; 
870 }
871
872 void 
873 outside_network_delete(struct outside_network* outnet)
874 {
875         if(!outnet)
876                 return;
877         outnet->want_to_quit = 1;
878         /* check every element, since we can be called on malloc error */
879         if(outnet->pending) {
880                 /* free pending elements, but do no unlink from tree. */
881                 traverse_postorder(outnet->pending, pending_node_del, NULL);
882                 free(outnet->pending);
883         }
884         if(outnet->serviced) {
885                 traverse_postorder(outnet->serviced, serviced_node_del, NULL);
886                 free(outnet->serviced);
887         }
888         if(outnet->udp_buff)
889                 sldns_buffer_free(outnet->udp_buff);
890         if(outnet->unused_fds) {
891                 struct port_comm* p = outnet->unused_fds, *np;
892                 while(p) {
893                         np = p->next;
894                         comm_point_delete(p->cp);
895                         free(p);
896                         p = np;
897                 }
898                 outnet->unused_fds = NULL;
899         }
900         if(outnet->ip4_ifs) {
901                 int i, k;
902                 for(i=0; i<outnet->num_ip4; i++) {
903                         for(k=0; k<outnet->ip4_ifs[i].inuse; k++) {
904                                 struct port_comm* pc = outnet->ip4_ifs[i].
905                                         out[k];
906                                 comm_point_delete(pc->cp);
907                                 free(pc);
908                         }
909                         free(outnet->ip4_ifs[i].avail_ports);
910                         free(outnet->ip4_ifs[i].out);
911                 }
912                 free(outnet->ip4_ifs);
913         }
914         if(outnet->ip6_ifs) {
915                 int i, k;
916                 for(i=0; i<outnet->num_ip6; i++) {
917                         for(k=0; k<outnet->ip6_ifs[i].inuse; k++) {
918                                 struct port_comm* pc = outnet->ip6_ifs[i].
919                                         out[k];
920                                 comm_point_delete(pc->cp);
921                                 free(pc);
922                         }
923                         free(outnet->ip6_ifs[i].avail_ports);
924                         free(outnet->ip6_ifs[i].out);
925                 }
926                 free(outnet->ip6_ifs);
927         }
928         if(outnet->tcp_conns) {
929                 size_t i;
930                 for(i=0; i<outnet->num_tcp; i++)
931                         if(outnet->tcp_conns[i]) {
932                                 comm_point_delete(outnet->tcp_conns[i]->c);
933                                 waiting_tcp_delete(outnet->tcp_conns[i]->query);
934                                 free(outnet->tcp_conns[i]);
935                         }
936                 free(outnet->tcp_conns);
937         }
938         if(outnet->tcp_wait_first) {
939                 struct waiting_tcp* p = outnet->tcp_wait_first, *np;
940                 while(p) {
941                         np = p->next_waiting;
942                         waiting_tcp_delete(p);
943                         p = np;
944                 }
945         }
946         if(outnet->udp_wait_first) {
947                 struct pending* p = outnet->udp_wait_first, *np;
948                 while(p) {
949                         np = p->next_waiting;
950                         pending_delete(NULL, p);
951                         p = np;
952                 }
953         }
954         free(outnet);
955 }
956
957 void 
958 pending_delete(struct outside_network* outnet, struct pending* p)
959 {
960         if(!p)
961                 return;
962         if(outnet && outnet->udp_wait_first &&
963                 (p->next_waiting || p == outnet->udp_wait_last) ) {
964                 /* delete from waiting list, if it is in the waiting list */
965                 struct pending* prev = NULL, *x = outnet->udp_wait_first;
966                 while(x && x != p) {
967                         prev = x;
968                         x = x->next_waiting;
969                 }
970                 if(x) {
971                         log_assert(x == p);
972                         if(prev)
973                                 prev->next_waiting = p->next_waiting;
974                         else    outnet->udp_wait_first = p->next_waiting;
975                         if(outnet->udp_wait_last == p)
976                                 outnet->udp_wait_last = prev;
977                 }
978         }
979         if(outnet) {
980                 (void)rbtree_delete(outnet->pending, p->node.key);
981         }
982         if(p->timer)
983                 comm_timer_delete(p->timer);
984         free(p->pkt);
985         free(p);
986 }
987
988 static void
989 sai6_putrandom(struct sockaddr_in6 *sa, int pfxlen, struct ub_randstate *rnd)
990 {
991         int i, last;
992         if(!(pfxlen > 0 && pfxlen < 128))
993                 return;
994         for(i = 0; i < (128 - pfxlen) / 8; i++) {
995                 sa->sin6_addr.s6_addr[15-i] = (uint8_t)ub_random_max(rnd, 256);
996         }
997         last = pfxlen & 7;
998         if(last != 0) {
999                 sa->sin6_addr.s6_addr[15-i] |=
1000                         ((0xFF >> last) & ub_random_max(rnd, 256));
1001         }
1002 }
1003
1004 /**
1005  * Try to open a UDP socket for outgoing communication.
1006  * Sets sockets options as needed.
1007  * @param addr: socket address.
1008  * @param addrlen: length of address.
1009  * @param pfxlen: length of network prefix (for address randomisation).
1010  * @param port: port override for addr.
1011  * @param inuse: if -1 is returned, this bool means the port was in use.
1012  * @param rnd: random state (for address randomisation).
1013  * @return fd or -1
1014  */
1015 static int
1016 udp_sockport(struct sockaddr_storage* addr, socklen_t addrlen, int pfxlen,
1017         int port, int* inuse, struct ub_randstate* rnd)
1018 {
1019         int fd, noproto;
1020         if(addr_is_ip6(addr, addrlen)) {
1021                 int freebind = 0;
1022                 struct sockaddr_in6 sa = *(struct sockaddr_in6*)addr;
1023                 sa.sin6_port = (in_port_t)htons((uint16_t)port);
1024                 if(pfxlen != 0) {
1025                         freebind = 1;
1026                         sai6_putrandom(&sa, pfxlen, rnd);
1027                 }
1028                 fd = create_udp_sock(AF_INET6, SOCK_DGRAM, 
1029                         (struct sockaddr*)&sa, addrlen, 1, inuse, &noproto,
1030                         0, 0, 0, NULL, 0, freebind, 0);
1031         } else {
1032                 struct sockaddr_in* sa = (struct sockaddr_in*)addr;
1033                 sa->sin_port = (in_port_t)htons((uint16_t)port);
1034                 fd = create_udp_sock(AF_INET, SOCK_DGRAM, 
1035                         (struct sockaddr*)addr, addrlen, 1, inuse, &noproto,
1036                         0, 0, 0, NULL, 0, 0, 0);
1037         }
1038         return fd;
1039 }
1040
1041 /** Select random ID */
1042 static int
1043 select_id(struct outside_network* outnet, struct pending* pend,
1044         sldns_buffer* packet)
1045 {
1046         int id_tries = 0;
1047         pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff;
1048         LDNS_ID_SET(sldns_buffer_begin(packet), pend->id);
1049
1050         /* insert in tree */
1051         pend->node.key = pend;
1052         while(!rbtree_insert(outnet->pending, &pend->node)) {
1053                 /* change ID to avoid collision */
1054                 pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff;
1055                 LDNS_ID_SET(sldns_buffer_begin(packet), pend->id);
1056                 id_tries++;
1057                 if(id_tries == MAX_ID_RETRY) {
1058                         pend->id=99999; /* non existant ID */
1059                         log_err("failed to generate unique ID, drop msg");
1060                         return 0;
1061                 }
1062         }
1063         verbose(VERB_ALGO, "inserted new pending reply id=%4.4x", pend->id);
1064         return 1;
1065 }
1066
1067 /** Select random interface and port */
1068 static int
1069 select_ifport(struct outside_network* outnet, struct pending* pend,
1070         int num_if, struct port_if* ifs)
1071 {
1072         int my_if, my_port, fd, portno, inuse, tries=0;
1073         struct port_if* pif;
1074         /* randomly select interface and port */
1075         if(num_if == 0) {
1076                 verbose(VERB_QUERY, "Need to send query but have no "
1077                         "outgoing interfaces of that family");
1078                 return 0;
1079         }
1080         log_assert(outnet->unused_fds);
1081         tries = 0;
1082         while(1) {
1083                 my_if = ub_random_max(outnet->rnd, num_if);
1084                 pif = &ifs[my_if];
1085                 my_port = ub_random_max(outnet->rnd, pif->avail_total);
1086                 if(my_port < pif->inuse) {
1087                         /* port already open */
1088                         pend->pc = pif->out[my_port];
1089                         verbose(VERB_ALGO, "using UDP if=%d port=%d", 
1090                                 my_if, pend->pc->number);
1091                         break;
1092                 }
1093                 /* try to open new port, if fails, loop to try again */
1094                 log_assert(pif->inuse < pif->maxout);
1095                 portno = pif->avail_ports[my_port - pif->inuse];
1096                 fd = udp_sockport(&pif->addr, pif->addrlen, pif->pfxlen,
1097                         portno, &inuse, outnet->rnd);
1098                 if(fd == -1 && !inuse) {
1099                         /* nonrecoverable error making socket */
1100                         return 0;
1101                 }
1102                 if(fd != -1) {
1103                         verbose(VERB_ALGO, "opened UDP if=%d port=%d", 
1104                                 my_if, portno);
1105                         /* grab fd */
1106                         pend->pc = outnet->unused_fds;
1107                         outnet->unused_fds = pend->pc->next;
1108
1109                         /* setup portcomm */
1110                         pend->pc->next = NULL;
1111                         pend->pc->number = portno;
1112                         pend->pc->pif = pif;
1113                         pend->pc->index = pif->inuse;
1114                         pend->pc->num_outstanding = 0;
1115                         comm_point_start_listening(pend->pc->cp, fd, -1);
1116
1117                         /* grab port in interface */
1118                         pif->out[pif->inuse] = pend->pc;
1119                         pif->avail_ports[my_port - pif->inuse] =
1120                                 pif->avail_ports[pif->avail_total-pif->inuse-1];
1121                         pif->inuse++;
1122                         break;
1123                 }
1124                 /* failed, already in use */
1125                 verbose(VERB_QUERY, "port %d in use, trying another", portno);
1126                 tries++;
1127                 if(tries == MAX_PORT_RETRY) {
1128                         log_err("failed to find an open port, drop msg");
1129                         return 0;
1130                 }
1131         }
1132         log_assert(pend->pc);
1133         pend->pc->num_outstanding++;
1134
1135         return 1;
1136 }
1137
1138 static int
1139 randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, int timeout)
1140 {
1141         struct timeval tv;
1142         struct outside_network* outnet = pend->sq->outnet;
1143
1144         /* select id */
1145         if(!select_id(outnet, pend, packet)) {
1146                 return 0;
1147         }
1148
1149         /* select src_if, port */
1150         if(addr_is_ip6(&pend->addr, pend->addrlen)) {
1151                 if(!select_ifport(outnet, pend, 
1152                         outnet->num_ip6, outnet->ip6_ifs))
1153                         return 0;
1154         } else {
1155                 if(!select_ifport(outnet, pend, 
1156                         outnet->num_ip4, outnet->ip4_ifs))
1157                         return 0;
1158         }
1159         log_assert(pend->pc && pend->pc->cp);
1160
1161         /* send it over the commlink */
1162         if(!comm_point_send_udp_msg(pend->pc->cp, packet, 
1163                 (struct sockaddr*)&pend->addr, pend->addrlen)) {
1164                 portcomm_loweruse(outnet, pend->pc);
1165                 return 0;
1166         }
1167
1168         /* system calls to set timeout after sending UDP to make roundtrip
1169            smaller. */
1170 #ifndef S_SPLINT_S
1171         tv.tv_sec = timeout/1000;
1172         tv.tv_usec = (timeout%1000)*1000;
1173 #endif
1174         comm_timer_set(pend->timer, &tv);
1175
1176 #ifdef USE_DNSTAP
1177         if(outnet->dtenv &&
1178            (outnet->dtenv->log_resolver_query_messages ||
1179             outnet->dtenv->log_forwarder_query_messages))
1180                 dt_msg_send_outside_query(outnet->dtenv, &pend->addr, comm_udp,
1181                 pend->sq->zone, pend->sq->zonelen, packet);
1182 #endif
1183         return 1;
1184 }
1185
1186 struct pending* 
1187 pending_udp_query(struct serviced_query* sq, struct sldns_buffer* packet,
1188         int timeout, comm_point_callback_type* cb, void* cb_arg)
1189 {
1190         struct pending* pend = (struct pending*)calloc(1, sizeof(*pend));
1191         if(!pend) return NULL;
1192         pend->outnet = sq->outnet;
1193         pend->sq = sq;
1194         pend->addrlen = sq->addrlen;
1195         memmove(&pend->addr, &sq->addr, sq->addrlen);
1196         pend->cb = cb;
1197         pend->cb_arg = cb_arg;
1198         pend->node.key = pend;
1199         pend->timer = comm_timer_create(sq->outnet->base, pending_udp_timer_cb,
1200                 pend);
1201         if(!pend->timer) {
1202                 free(pend);
1203                 return NULL;
1204         }
1205
1206         if(sq->outnet->unused_fds == NULL) {
1207                 /* no unused fd, cannot create a new port (randomly) */
1208                 verbose(VERB_ALGO, "no fds available, udp query waiting");
1209                 pend->timeout = timeout;
1210                 pend->pkt_len = sldns_buffer_limit(packet);
1211                 pend->pkt = (uint8_t*)memdup(sldns_buffer_begin(packet),
1212                         pend->pkt_len);
1213                 if(!pend->pkt) {
1214                         comm_timer_delete(pend->timer);
1215                         free(pend);
1216                         return NULL;
1217                 }
1218                 /* put at end of waiting list */
1219                 if(sq->outnet->udp_wait_last)
1220                         sq->outnet->udp_wait_last->next_waiting = pend;
1221                 else 
1222                         sq->outnet->udp_wait_first = pend;
1223                 sq->outnet->udp_wait_last = pend;
1224                 return pend;
1225         }
1226         if(!randomize_and_send_udp(pend, packet, timeout)) {
1227                 pending_delete(sq->outnet, pend);
1228                 return NULL;
1229         }
1230         return pend;
1231 }
1232
1233 void
1234 outnet_tcptimer(void* arg)
1235 {
1236         struct waiting_tcp* w = (struct waiting_tcp*)arg;
1237         struct outside_network* outnet = w->outnet;
1238         comm_point_callback_type* cb;
1239         void* cb_arg;
1240         if(w->pkt) {
1241                 /* it is on the waiting list */
1242                 waiting_list_remove(outnet, w);
1243         } else {
1244                 /* it was in use */
1245                 struct pending_tcp* pend=(struct pending_tcp*)w->next_waiting;
1246                 comm_point_close(pend->c);
1247                 pend->query = NULL;
1248                 pend->next_free = outnet->tcp_free;
1249                 outnet->tcp_free = pend;
1250         }
1251         cb = w->cb;
1252         cb_arg = w->cb_arg;
1253         waiting_tcp_delete(w);
1254         fptr_ok(fptr_whitelist_pending_tcp(cb));
1255         (void)(*cb)(NULL, cb_arg, NETEVENT_TIMEOUT, NULL);
1256         use_free_buffer(outnet);
1257 }
1258
1259 struct waiting_tcp*
1260 pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet,
1261         int timeout, comm_point_callback_type* callback, void* callback_arg)
1262 {
1263         struct pending_tcp* pend = sq->outnet->tcp_free;
1264         struct waiting_tcp* w;
1265         struct timeval tv;
1266         uint16_t id;
1267         /* if no buffer is free allocate space to store query */
1268         w = (struct waiting_tcp*)malloc(sizeof(struct waiting_tcp) 
1269                 + (pend?0:sldns_buffer_limit(packet)));
1270         if(!w) {
1271                 return NULL;
1272         }
1273         if(!(w->timer = comm_timer_create(sq->outnet->base, outnet_tcptimer, w))) {
1274                 free(w);
1275                 return NULL;
1276         }
1277         w->pkt = NULL;
1278         w->pkt_len = 0;
1279         id = ((unsigned)ub_random(sq->outnet->rnd)>>8) & 0xffff;
1280         LDNS_ID_SET(sldns_buffer_begin(packet), id);
1281         memcpy(&w->addr, &sq->addr, sq->addrlen);
1282         w->addrlen = sq->addrlen;
1283         w->outnet = sq->outnet;
1284         w->cb = callback;
1285         w->cb_arg = callback_arg;
1286         w->ssl_upstream = sq->ssl_upstream;
1287 #ifndef S_SPLINT_S
1288         tv.tv_sec = timeout;
1289         tv.tv_usec = 0;
1290 #endif
1291         comm_timer_set(w->timer, &tv);
1292         if(pend) {
1293                 /* we have a buffer available right now */
1294                 if(!outnet_tcp_take_into_use(w, sldns_buffer_begin(packet),
1295                         sldns_buffer_limit(packet))) {
1296                         waiting_tcp_delete(w);
1297                         return NULL;
1298                 }
1299 #ifdef USE_DNSTAP
1300                 if(sq->outnet->dtenv &&
1301                    (sq->outnet->dtenv->log_resolver_query_messages ||
1302                     sq->outnet->dtenv->log_forwarder_query_messages))
1303                 dt_msg_send_outside_query(sq->outnet->dtenv, &sq->addr,
1304                 comm_tcp, sq->zone, sq->zonelen, packet);
1305 #endif
1306         } else {
1307                 /* queue up */
1308                 w->pkt = (uint8_t*)w + sizeof(struct waiting_tcp);
1309                 w->pkt_len = sldns_buffer_limit(packet);
1310                 memmove(w->pkt, sldns_buffer_begin(packet), w->pkt_len);
1311                 w->next_waiting = NULL;
1312                 if(sq->outnet->tcp_wait_last)
1313                         sq->outnet->tcp_wait_last->next_waiting = w;
1314                 else    sq->outnet->tcp_wait_first = w;
1315                 sq->outnet->tcp_wait_last = w;
1316         }
1317         return w;
1318 }
1319
1320 /** create query for serviced queries */
1321 static void
1322 serviced_gen_query(sldns_buffer* buff, uint8_t* qname, size_t qnamelen, 
1323         uint16_t qtype, uint16_t qclass, uint16_t flags)
1324 {
1325         sldns_buffer_clear(buff);
1326         /* skip id */
1327         sldns_buffer_write_u16(buff, flags);
1328         sldns_buffer_write_u16(buff, 1); /* qdcount */
1329         sldns_buffer_write_u16(buff, 0); /* ancount */
1330         sldns_buffer_write_u16(buff, 0); /* nscount */
1331         sldns_buffer_write_u16(buff, 0); /* arcount */
1332         sldns_buffer_write(buff, qname, qnamelen);
1333         sldns_buffer_write_u16(buff, qtype);
1334         sldns_buffer_write_u16(buff, qclass);
1335         sldns_buffer_flip(buff);
1336 }
1337
1338 /** lookup serviced query in serviced query rbtree */
1339 static struct serviced_query*
1340 lookup_serviced(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
1341         struct sockaddr_storage* addr, socklen_t addrlen,
1342         struct edns_option* opt_list)
1343 {
1344         struct serviced_query key;
1345         key.node.key = &key;
1346         key.qbuf = sldns_buffer_begin(buff);
1347         key.qbuflen = sldns_buffer_limit(buff);
1348         key.dnssec = dnssec;
1349         memcpy(&key.addr, addr, addrlen);
1350         key.addrlen = addrlen;
1351         key.outnet = outnet;
1352         key.opt_list = opt_list;
1353         return (struct serviced_query*)rbtree_search(outnet->serviced, &key);
1354 }
1355
1356 /** Create new serviced entry */
1357 static struct serviced_query*
1358 serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
1359         int want_dnssec, int nocaps, int tcp_upstream, int ssl_upstream,
1360         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone,
1361         size_t zonelen, int qtype, struct edns_option* opt_list)
1362 {
1363         struct serviced_query* sq = (struct serviced_query*)malloc(sizeof(*sq));
1364 #ifdef UNBOUND_DEBUG
1365         rbnode_type* ins;
1366 #endif
1367         if(!sq) 
1368                 return NULL;
1369         sq->node.key = sq;
1370         sq->qbuf = memdup(sldns_buffer_begin(buff), sldns_buffer_limit(buff));
1371         if(!sq->qbuf) {
1372                 free(sq);
1373                 return NULL;
1374         }
1375         sq->qbuflen = sldns_buffer_limit(buff);
1376         sq->zone = memdup(zone, zonelen);
1377         if(!sq->zone) {
1378                 free(sq->qbuf);
1379                 free(sq);
1380                 return NULL;
1381         }
1382         sq->zonelen = zonelen;
1383         sq->qtype = qtype;
1384         sq->dnssec = dnssec;
1385         sq->want_dnssec = want_dnssec;
1386         sq->nocaps = nocaps;
1387         sq->tcp_upstream = tcp_upstream;
1388         sq->ssl_upstream = ssl_upstream;
1389         memcpy(&sq->addr, addr, addrlen);
1390         sq->addrlen = addrlen;
1391         sq->opt_list = NULL;
1392         if(opt_list) {
1393                 sq->opt_list = edns_opt_copy_alloc(opt_list);
1394                 if(!sq->opt_list) {
1395                         free(sq->zone);
1396                         free(sq->qbuf);
1397                         free(sq);
1398                         return NULL;
1399                 }
1400         }
1401         sq->outnet = outnet;
1402         sq->cblist = NULL;
1403         sq->pending = NULL;
1404         sq->status = serviced_initial;
1405         sq->retry = 0;
1406         sq->to_be_deleted = 0;
1407 #ifdef UNBOUND_DEBUG
1408         ins = 
1409 #else
1410         (void)
1411 #endif
1412         rbtree_insert(outnet->serviced, &sq->node);
1413         log_assert(ins != NULL); /* must not be already present */
1414         return sq;
1415 }
1416
1417 /** remove waiting tcp from the outnet waiting list */
1418 static void
1419 waiting_list_remove(struct outside_network* outnet, struct waiting_tcp* w)
1420 {
1421         struct waiting_tcp* p = outnet->tcp_wait_first, *prev = NULL;
1422         while(p) {
1423                 if(p == w) {
1424                         /* remove w */
1425                         if(prev)
1426                                 prev->next_waiting = w->next_waiting;
1427                         else    outnet->tcp_wait_first = w->next_waiting;
1428                         if(outnet->tcp_wait_last == w)
1429                                 outnet->tcp_wait_last = prev;
1430                         return;
1431                 }
1432                 prev = p;
1433                 p = p->next_waiting;
1434         }
1435 }
1436
1437 /** cleanup serviced query entry */
1438 static void
1439 serviced_delete(struct serviced_query* sq)
1440 {
1441         if(sq->pending) {
1442                 /* clear up the pending query */
1443                 if(sq->status == serviced_query_UDP_EDNS ||
1444                         sq->status == serviced_query_UDP ||
1445                         sq->status == serviced_query_PROBE_EDNS ||
1446                         sq->status == serviced_query_UDP_EDNS_FRAG ||
1447                         sq->status == serviced_query_UDP_EDNS_fallback) {
1448                         struct pending* p = (struct pending*)sq->pending;
1449                         if(p->pc)
1450                                 portcomm_loweruse(sq->outnet, p->pc);
1451                         pending_delete(sq->outnet, p);
1452                         /* this call can cause reentrant calls back into the
1453                          * mesh */
1454                         outnet_send_wait_udp(sq->outnet);
1455                 } else {
1456                         struct waiting_tcp* p = (struct waiting_tcp*)
1457                                 sq->pending;
1458                         if(p->pkt == NULL) {
1459                                 decommission_pending_tcp(sq->outnet, 
1460                                         (struct pending_tcp*)p->next_waiting);
1461                         } else {
1462                                 waiting_list_remove(sq->outnet, p);
1463                                 waiting_tcp_delete(p);
1464                         }
1465                 }
1466         }
1467         /* does not delete from tree, caller has to do that */
1468         serviced_node_del(&sq->node, NULL);
1469 }
1470
1471 /** perturb a dname capitalization randomly */
1472 static void
1473 serviced_perturb_qname(struct ub_randstate* rnd, uint8_t* qbuf, size_t len)
1474 {
1475         uint8_t lablen;
1476         uint8_t* d = qbuf + 10;
1477         long int random = 0;
1478         int bits = 0;
1479         log_assert(len >= 10 + 5 /* offset qname, root, qtype, qclass */);
1480         (void)len;
1481         lablen = *d++;
1482         while(lablen) {
1483                 while(lablen--) {
1484                         /* only perturb A-Z, a-z */
1485                         if(isalpha((unsigned char)*d)) {
1486                                 /* get a random bit */  
1487                                 if(bits == 0) {
1488                                         random = ub_random(rnd);
1489                                         bits = 30;
1490                                 }
1491                                 if(random & 0x1) {
1492                                         *d = (uint8_t)toupper((unsigned char)*d);
1493                                 } else {
1494                                         *d = (uint8_t)tolower((unsigned char)*d);
1495                                 }
1496                                 random >>= 1;
1497                                 bits--;
1498                         }
1499                         d++;
1500                 }
1501                 lablen = *d++;
1502         }
1503         if(verbosity >= VERB_ALGO) {
1504                 char buf[LDNS_MAX_DOMAINLEN+1];
1505                 dname_str(qbuf+10, buf);
1506                 verbose(VERB_ALGO, "qname perturbed to %s", buf);
1507         }
1508 }
1509
1510 /** put serviced query into a buffer */
1511 static void
1512 serviced_encode(struct serviced_query* sq, sldns_buffer* buff, int with_edns)
1513 {
1514         /* if we are using 0x20 bits for ID randomness, perturb them */
1515         if(sq->outnet->use_caps_for_id && !sq->nocaps) {
1516                 serviced_perturb_qname(sq->outnet->rnd, sq->qbuf, sq->qbuflen);
1517         }
1518         /* generate query */
1519         sldns_buffer_clear(buff);
1520         sldns_buffer_write_u16(buff, 0); /* id placeholder */
1521         sldns_buffer_write(buff, sq->qbuf, sq->qbuflen);
1522         sldns_buffer_flip(buff);
1523         if(with_edns) {
1524                 /* add edns section */
1525                 struct edns_data edns;
1526                 edns.edns_present = 1;
1527                 edns.ext_rcode = 0;
1528                 edns.edns_version = EDNS_ADVERTISED_VERSION;
1529                 edns.opt_list = sq->opt_list;
1530                 if(sq->status == serviced_query_UDP_EDNS_FRAG) {
1531                         if(addr_is_ip6(&sq->addr, sq->addrlen)) {
1532                                 if(EDNS_FRAG_SIZE_IP6 < EDNS_ADVERTISED_SIZE)
1533                                         edns.udp_size = EDNS_FRAG_SIZE_IP6;
1534                                 else    edns.udp_size = EDNS_ADVERTISED_SIZE;
1535                         } else {
1536                                 if(EDNS_FRAG_SIZE_IP4 < EDNS_ADVERTISED_SIZE)
1537                                         edns.udp_size = EDNS_FRAG_SIZE_IP4;
1538                                 else    edns.udp_size = EDNS_ADVERTISED_SIZE;
1539                         }
1540                 } else {
1541                         edns.udp_size = EDNS_ADVERTISED_SIZE;
1542                 }
1543                 edns.bits = 0;
1544                 if(sq->dnssec & EDNS_DO)
1545                         edns.bits = EDNS_DO;
1546                 if(sq->dnssec & BIT_CD)
1547                         LDNS_CD_SET(sldns_buffer_begin(buff));
1548                 attach_edns_record(buff, &edns);
1549         }
1550 }
1551
1552 /**
1553  * Perform serviced query UDP sending operation.
1554  * Sends UDP with EDNS, unless infra host marked non EDNS.
1555  * @param sq: query to send.
1556  * @param buff: buffer scratch space.
1557  * @return 0 on error.
1558  */
1559 static int
1560 serviced_udp_send(struct serviced_query* sq, sldns_buffer* buff)
1561 {
1562         int rtt, vs;
1563         uint8_t edns_lame_known;
1564         time_t now = *sq->outnet->now_secs;
1565
1566         if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone,
1567                 sq->zonelen, now, &vs, &edns_lame_known, &rtt))
1568                 return 0;
1569         sq->last_rtt = rtt;
1570         verbose(VERB_ALGO, "EDNS lookup known=%d vs=%d", edns_lame_known, vs);
1571         if(sq->status == serviced_initial) {
1572                 if(edns_lame_known == 0 && rtt > 5000 && rtt < 10001) {
1573                         /* perform EDNS lame probe - check if server is
1574                          * EDNS lame (EDNS queries to it are dropped) */
1575                         verbose(VERB_ALGO, "serviced query: send probe to see "
1576                                 " if use of EDNS causes timeouts");
1577                         /* even 700 msec may be too small */
1578                         rtt = 1000;
1579                         sq->status = serviced_query_PROBE_EDNS;
1580                 } else if(vs != -1) {
1581                         sq->status = serviced_query_UDP_EDNS;
1582                 } else {        
1583                         sq->status = serviced_query_UDP; 
1584                 }
1585         }
1586         serviced_encode(sq, buff, (sq->status == serviced_query_UDP_EDNS) ||
1587                 (sq->status == serviced_query_UDP_EDNS_FRAG));
1588         sq->last_sent_time = *sq->outnet->now_tv;
1589         sq->edns_lame_known = (int)edns_lame_known;
1590         verbose(VERB_ALGO, "serviced query UDP timeout=%d msec", rtt);
1591         sq->pending = pending_udp_query(sq, buff, rtt,
1592                 serviced_udp_callback, sq);
1593         if(!sq->pending)
1594                 return 0;
1595         return 1;
1596 }
1597
1598 /** check that perturbed qname is identical */
1599 static int
1600 serviced_check_qname(sldns_buffer* pkt, uint8_t* qbuf, size_t qbuflen)
1601 {
1602         uint8_t* d1 = sldns_buffer_begin(pkt)+12;
1603         uint8_t* d2 = qbuf+10;
1604         uint8_t len1, len2;
1605         int count = 0;
1606         if(sldns_buffer_limit(pkt) < 12+1+4) /* packet too small for qname */
1607                 return 0;
1608         log_assert(qbuflen >= 15 /* 10 header, root, type, class */);
1609         len1 = *d1++;
1610         len2 = *d2++;
1611         while(len1 != 0 || len2 != 0) {
1612                 if(LABEL_IS_PTR(len1)) {
1613                         /* check if we can read *d1 with compression ptr rest */
1614                         if(d1 >= sldns_buffer_at(pkt, sldns_buffer_limit(pkt)))
1615                                 return 0;
1616                         d1 = sldns_buffer_begin(pkt)+PTR_OFFSET(len1, *d1);
1617                         /* check if we can read the destination *d1 */
1618                         if(d1 >= sldns_buffer_at(pkt, sldns_buffer_limit(pkt)))
1619                                 return 0;
1620                         len1 = *d1++;
1621                         if(count++ > MAX_COMPRESS_PTRS)
1622                                 return 0;
1623                         continue;
1624                 }
1625                 if(d2 > qbuf+qbuflen)
1626                         return 0;
1627                 if(len1 != len2)
1628                         return 0;
1629                 if(len1 > LDNS_MAX_LABELLEN)
1630                         return 0;
1631                 /* check len1 + 1(next length) are okay to read */
1632                 if(d1+len1 >= sldns_buffer_at(pkt, sldns_buffer_limit(pkt)))
1633                         return 0;
1634                 log_assert(len1 <= LDNS_MAX_LABELLEN);
1635                 log_assert(len2 <= LDNS_MAX_LABELLEN);
1636                 log_assert(len1 == len2 && len1 != 0);
1637                 /* compare the labels - bitwise identical */
1638                 if(memcmp(d1, d2, len1) != 0)
1639                         return 0;
1640                 d1 += len1;
1641                 d2 += len2;
1642                 len1 = *d1++;
1643                 len2 = *d2++;
1644         }
1645         return 1;
1646 }
1647
1648 /** call the callbacks for a serviced query */
1649 static void
1650 serviced_callbacks(struct serviced_query* sq, int error, struct comm_point* c,
1651         struct comm_reply* rep)
1652 {
1653         struct service_callback* p;
1654         int dobackup = (sq->cblist && sq->cblist->next); /* >1 cb*/
1655         uint8_t *backup_p = NULL;
1656         size_t backlen = 0;
1657 #ifdef UNBOUND_DEBUG
1658         rbnode_type* rem =
1659 #else
1660         (void)
1661 #endif
1662         /* remove from tree, and schedule for deletion, so that callbacks
1663          * can safely deregister themselves and even create new serviced
1664          * queries that are identical to this one. */
1665         rbtree_delete(sq->outnet->serviced, sq);
1666         log_assert(rem); /* should have been present */
1667         sq->to_be_deleted = 1; 
1668         verbose(VERB_ALGO, "svcd callbacks start");
1669         if(sq->outnet->use_caps_for_id && error == NETEVENT_NOERROR && c &&
1670                 !sq->nocaps && sq->qtype != LDNS_RR_TYPE_PTR) {
1671                 /* for type PTR do not check perturbed name in answer,
1672                  * compatibility with cisco dns guard boxes that mess up
1673                  * reverse queries 0x20 contents */
1674                 /* noerror and nxdomain must have a qname in reply */
1675                 if(sldns_buffer_read_u16_at(c->buffer, 4) == 0 &&
1676                         (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
1677                                 == LDNS_RCODE_NOERROR || 
1678                          LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
1679                                 == LDNS_RCODE_NXDOMAIN)) {
1680                         verbose(VERB_DETAIL, "no qname in reply to check 0x20ID");
1681                         log_addr(VERB_DETAIL, "from server", 
1682                                 &sq->addr, sq->addrlen);
1683                         log_buf(VERB_DETAIL, "for packet", c->buffer);
1684                         error = NETEVENT_CLOSED;
1685                         c = NULL;
1686                 } else if(sldns_buffer_read_u16_at(c->buffer, 4) > 0 &&
1687                         !serviced_check_qname(c->buffer, sq->qbuf, 
1688                         sq->qbuflen)) {
1689                         verbose(VERB_DETAIL, "wrong 0x20-ID in reply qname");
1690                         log_addr(VERB_DETAIL, "from server", 
1691                                 &sq->addr, sq->addrlen);
1692                         log_buf(VERB_DETAIL, "for packet", c->buffer);
1693                         error = NETEVENT_CAPSFAIL;
1694                         /* and cleanup too */
1695                         pkt_dname_tolower(c->buffer, 
1696                                 sldns_buffer_at(c->buffer, 12));
1697                 } else {
1698                         verbose(VERB_ALGO, "good 0x20-ID in reply qname");
1699                         /* cleanup caps, prettier cache contents. */
1700                         pkt_dname_tolower(c->buffer, 
1701                                 sldns_buffer_at(c->buffer, 12));
1702                 }
1703         }
1704         if(dobackup && c) {
1705                 /* make a backup of the query, since the querystate processing
1706                  * may send outgoing queries that overwrite the buffer.
1707                  * use secondary buffer to store the query.
1708                  * This is a data copy, but faster than packet to server */
1709                 backlen = sldns_buffer_limit(c->buffer);
1710                 backup_p = memdup(sldns_buffer_begin(c->buffer), backlen);
1711                 if(!backup_p) {
1712                         log_err("malloc failure in serviced query callbacks");
1713                         error = NETEVENT_CLOSED;
1714                         c = NULL;
1715                 }
1716                 sq->outnet->svcd_overhead = backlen;
1717         }
1718         /* test the actual sq->cblist, because the next elem could be deleted*/
1719         while((p=sq->cblist) != NULL) {
1720                 sq->cblist = p->next; /* remove this element */
1721                 if(dobackup && c) {
1722                         sldns_buffer_clear(c->buffer);
1723                         sldns_buffer_write(c->buffer, backup_p, backlen);
1724                         sldns_buffer_flip(c->buffer);
1725                 }
1726                 fptr_ok(fptr_whitelist_serviced_query(p->cb));
1727                 (void)(*p->cb)(c, p->cb_arg, error, rep);
1728                 free(p);
1729         }
1730         if(backup_p) {
1731                 free(backup_p);
1732                 sq->outnet->svcd_overhead = 0;
1733         }
1734         verbose(VERB_ALGO, "svcd callbacks end");
1735         log_assert(sq->cblist == NULL);
1736         serviced_delete(sq);
1737 }
1738
1739 int 
1740 serviced_tcp_callback(struct comm_point* c, void* arg, int error,
1741         struct comm_reply* rep)
1742 {
1743         struct serviced_query* sq = (struct serviced_query*)arg;
1744         struct comm_reply r2;
1745         sq->pending = NULL; /* removed after this callback */
1746         if(error != NETEVENT_NOERROR)
1747                 log_addr(VERB_QUERY, "tcp error for address", 
1748                         &sq->addr, sq->addrlen);
1749         if(error==NETEVENT_NOERROR)
1750                 infra_update_tcp_works(sq->outnet->infra, &sq->addr,
1751                         sq->addrlen, sq->zone, sq->zonelen);
1752 #ifdef USE_DNSTAP
1753         if(error==NETEVENT_NOERROR && sq->outnet->dtenv &&
1754            (sq->outnet->dtenv->log_resolver_response_messages ||
1755             sq->outnet->dtenv->log_forwarder_response_messages))
1756                 dt_msg_send_outside_response(sq->outnet->dtenv, &sq->addr,
1757                 c->type, sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen,
1758                 &sq->last_sent_time, sq->outnet->now_tv, c->buffer);
1759 #endif
1760         if(error==NETEVENT_NOERROR && sq->status == serviced_query_TCP_EDNS &&
1761                 (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == 
1762                 LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(sldns_buffer_begin(
1763                 c->buffer)) == LDNS_RCODE_NOTIMPL) ) {
1764                 /* attempt to fallback to nonEDNS */
1765                 sq->status = serviced_query_TCP_EDNS_fallback;
1766                 serviced_tcp_initiate(sq, c->buffer);
1767                 return 0;
1768         } else if(error==NETEVENT_NOERROR && 
1769                 sq->status == serviced_query_TCP_EDNS_fallback &&
1770                         (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == 
1771                         LDNS_RCODE_NOERROR || LDNS_RCODE_WIRE(
1772                         sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NXDOMAIN 
1773                         || LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) 
1774                         == LDNS_RCODE_YXDOMAIN)) {
1775                 /* the fallback produced a result that looks promising, note
1776                  * that this server should be approached without EDNS */
1777                 /* only store noEDNS in cache if domain is noDNSSEC */
1778                 if(!sq->want_dnssec)
1779                   if(!infra_edns_update(sq->outnet->infra, &sq->addr, 
1780                         sq->addrlen, sq->zone, sq->zonelen, -1,
1781                         *sq->outnet->now_secs))
1782                         log_err("Out of memory caching no edns for host");
1783                 sq->status = serviced_query_TCP;
1784         }
1785         if(sq->tcp_upstream || sq->ssl_upstream) {
1786             struct timeval now = *sq->outnet->now_tv;
1787             if(now.tv_sec > sq->last_sent_time.tv_sec ||
1788                 (now.tv_sec == sq->last_sent_time.tv_sec &&
1789                 now.tv_usec > sq->last_sent_time.tv_usec)) {
1790                 /* convert from microseconds to milliseconds */
1791                 int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000
1792                   + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000;
1793                 verbose(VERB_ALGO, "measured TCP-time at %d msec", roundtime);
1794                 log_assert(roundtime >= 0);
1795                 /* only store if less then AUTH_TIMEOUT seconds, it could be
1796                  * huge due to system-hibernated and we woke up */
1797                 if(roundtime < TCP_AUTH_QUERY_TIMEOUT*1000) {
1798                     if(!infra_rtt_update(sq->outnet->infra, &sq->addr,
1799                         sq->addrlen, sq->zone, sq->zonelen, sq->qtype,
1800                         roundtime, sq->last_rtt, (time_t)now.tv_sec))
1801                         log_err("out of memory noting rtt.");
1802                 }
1803             }
1804         }
1805         /* insert address into reply info */
1806         if(!rep) {
1807                 /* create one if there isn't (on errors) */
1808                 rep = &r2;
1809                 r2.c = c;
1810         }
1811         memcpy(&rep->addr, &sq->addr, sq->addrlen);
1812         rep->addrlen = sq->addrlen;
1813         serviced_callbacks(sq, error, c, rep);
1814         return 0;
1815 }
1816
1817 static void
1818 serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff)
1819 {
1820         verbose(VERB_ALGO, "initiate TCP query %s", 
1821                 sq->status==serviced_query_TCP_EDNS?"EDNS":"");
1822         serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS);
1823         sq->last_sent_time = *sq->outnet->now_tv;
1824         sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT,
1825                 serviced_tcp_callback, sq);
1826         if(!sq->pending) {
1827                 /* delete from tree so that a retry by above layer does not
1828                  * clash with this entry */
1829                 log_err("serviced_tcp_initiate: failed to send tcp query");
1830                 serviced_callbacks(sq, NETEVENT_CLOSED, NULL, NULL);
1831         }
1832 }
1833
1834 /** Send serviced query over TCP return false on initial failure */
1835 static int
1836 serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff)
1837 {
1838         int vs, rtt;
1839         uint8_t edns_lame_known;
1840         if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone,
1841                 sq->zonelen, *sq->outnet->now_secs, &vs, &edns_lame_known,
1842                 &rtt))
1843                 return 0;
1844         if(vs != -1)
1845                 sq->status = serviced_query_TCP_EDNS;
1846         else    sq->status = serviced_query_TCP;
1847         serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS);
1848         sq->last_sent_time = *sq->outnet->now_tv;
1849         sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT,
1850                 serviced_tcp_callback, sq);
1851         return sq->pending != NULL;
1852 }
1853
1854 /* see if packet is edns malformed; got zeroes at start.
1855  * This is from servers that return malformed packets to EDNS0 queries,
1856  * but they return good packets for nonEDNS0 queries.
1857  * We try to detect their output; without resorting to a full parse or
1858  * check for too many bytes after the end of the packet. */
1859 static int
1860 packet_edns_malformed(struct sldns_buffer* buf, int qtype)
1861 {
1862         size_t len;
1863         if(sldns_buffer_limit(buf) < LDNS_HEADER_SIZE)
1864                 return 1; /* malformed */
1865         /* they have NOERROR rcode, 1 answer. */
1866         if(LDNS_RCODE_WIRE(sldns_buffer_begin(buf)) != LDNS_RCODE_NOERROR)
1867                 return 0;
1868         /* one query (to skip) and answer records */
1869         if(LDNS_QDCOUNT(sldns_buffer_begin(buf)) != 1 ||
1870                 LDNS_ANCOUNT(sldns_buffer_begin(buf)) == 0)
1871                 return 0;
1872         /* skip qname */
1873         len = dname_valid(sldns_buffer_at(buf, LDNS_HEADER_SIZE),
1874                 sldns_buffer_limit(buf)-LDNS_HEADER_SIZE);
1875         if(len == 0)
1876                 return 0;
1877         if(len == 1 && qtype == 0)
1878                 return 0; /* we asked for '.' and type 0 */
1879         /* and then 4 bytes (type and class of query) */
1880         if(sldns_buffer_limit(buf) < LDNS_HEADER_SIZE + len + 4 + 3)
1881                 return 0;
1882
1883         /* and start with 11 zeroes as the answer RR */
1884         /* so check the qtype of the answer record, qname=0, type=0 */
1885         if(sldns_buffer_at(buf, LDNS_HEADER_SIZE+len+4)[0] == 0 &&
1886            sldns_buffer_at(buf, LDNS_HEADER_SIZE+len+4)[1] == 0 &&
1887            sldns_buffer_at(buf, LDNS_HEADER_SIZE+len+4)[2] == 0)
1888                 return 1;
1889         return 0;
1890 }
1891
1892 int 
1893 serviced_udp_callback(struct comm_point* c, void* arg, int error,
1894         struct comm_reply* rep)
1895 {
1896         struct serviced_query* sq = (struct serviced_query*)arg;
1897         struct outside_network* outnet = sq->outnet;
1898         struct timeval now = *sq->outnet->now_tv;
1899         int fallback_tcp = 0;
1900
1901         sq->pending = NULL; /* removed after callback */
1902         if(error == NETEVENT_TIMEOUT) {
1903                 int rto = 0;
1904                 if(sq->status == serviced_query_PROBE_EDNS) {
1905                         /* non-EDNS probe failed; we do not know its status,
1906                          * keep trying with EDNS, timeout may not be caused
1907                          * by EDNS. */
1908                         sq->status = serviced_query_UDP_EDNS;
1909                 }
1910                 if(sq->status == serviced_query_UDP_EDNS && sq->last_rtt < 5000) {
1911                         /* fallback to 1480/1280 */
1912                         sq->status = serviced_query_UDP_EDNS_FRAG;
1913                         log_name_addr(VERB_ALGO, "try edns1xx0", sq->qbuf+10,
1914                                 &sq->addr, sq->addrlen);
1915                         if(!serviced_udp_send(sq, c->buffer)) {
1916                                 serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
1917                         }
1918                         return 0;
1919                 }
1920                 if(sq->status == serviced_query_UDP_EDNS_FRAG) {
1921                         /* fragmentation size did not fix it */
1922                         sq->status = serviced_query_UDP_EDNS;
1923                 }
1924                 sq->retry++;
1925                 if(!(rto=infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
1926                         sq->zone, sq->zonelen, sq->qtype, -1, sq->last_rtt,
1927                         (time_t)now.tv_sec)))
1928                         log_err("out of memory in UDP exponential backoff");
1929                 if(sq->retry < OUTBOUND_UDP_RETRY) {
1930                         log_name_addr(VERB_ALGO, "retry query", sq->qbuf+10,
1931                                 &sq->addr, sq->addrlen);
1932                         if(!serviced_udp_send(sq, c->buffer)) {
1933                                 serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
1934                         }
1935                         return 0;
1936                 }
1937                 if(rto >= RTT_MAX_TIMEOUT) {
1938                         fallback_tcp = 1;
1939                         /* UDP does not work, fallback to TCP below */
1940                 } else {
1941                         serviced_callbacks(sq, NETEVENT_TIMEOUT, c, rep);
1942                         return 0;
1943                 }
1944         } else if(error != NETEVENT_NOERROR) {
1945                 /* udp returns error (due to no ID or interface available) */
1946                 serviced_callbacks(sq, error, c, rep);
1947                 return 0;
1948         }
1949 #ifdef USE_DNSTAP
1950         if(error == NETEVENT_NOERROR && outnet->dtenv &&
1951            (outnet->dtenv->log_resolver_response_messages ||
1952             outnet->dtenv->log_forwarder_response_messages))
1953                 dt_msg_send_outside_response(outnet->dtenv, &sq->addr, c->type,
1954                 sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen,
1955                 &sq->last_sent_time, sq->outnet->now_tv, c->buffer);
1956 #endif
1957         if(!fallback_tcp) {
1958             if( (sq->status == serviced_query_UDP_EDNS 
1959                 ||sq->status == serviced_query_UDP_EDNS_FRAG)
1960                 && (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) 
1961                         == LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(
1962                         sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOTIMPL
1963                     || packet_edns_malformed(c->buffer, sq->qtype)
1964                         )) {
1965                 /* try to get an answer by falling back without EDNS */
1966                 verbose(VERB_ALGO, "serviced query: attempt without EDNS");
1967                 sq->status = serviced_query_UDP_EDNS_fallback;
1968                 sq->retry = 0;
1969                 if(!serviced_udp_send(sq, c->buffer)) {
1970                         serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
1971                 }
1972                 return 0;
1973             } else if(sq->status == serviced_query_PROBE_EDNS) {
1974                 /* probe without EDNS succeeds, so we conclude that this
1975                  * host likely has EDNS packets dropped */
1976                 log_addr(VERB_DETAIL, "timeouts, concluded that connection to "
1977                         "host drops EDNS packets", &sq->addr, sq->addrlen);
1978                 /* only store noEDNS in cache if domain is noDNSSEC */
1979                 if(!sq->want_dnssec)
1980                   if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
1981                         sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) {
1982                         log_err("Out of memory caching no edns for host");
1983                   }
1984                 sq->status = serviced_query_UDP;
1985             } else if(sq->status == serviced_query_UDP_EDNS && 
1986                 !sq->edns_lame_known) {
1987                 /* now we know that edns queries received answers store that */
1988                 log_addr(VERB_ALGO, "serviced query: EDNS works for",
1989                         &sq->addr, sq->addrlen);
1990                 if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen, 
1991                         sq->zone, sq->zonelen, 0, (time_t)now.tv_sec)) {
1992                         log_err("Out of memory caching edns works");
1993                 }
1994                 sq->edns_lame_known = 1;
1995             } else if(sq->status == serviced_query_UDP_EDNS_fallback &&
1996                 !sq->edns_lame_known && (LDNS_RCODE_WIRE(
1997                 sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOERROR || 
1998                 LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == 
1999                 LDNS_RCODE_NXDOMAIN || LDNS_RCODE_WIRE(sldns_buffer_begin(
2000                 c->buffer)) == LDNS_RCODE_YXDOMAIN)) {
2001                 /* the fallback produced a result that looks promising, note
2002                  * that this server should be approached without EDNS */
2003                 /* only store noEDNS in cache if domain is noDNSSEC */
2004                 if(!sq->want_dnssec) {
2005                   log_addr(VERB_ALGO, "serviced query: EDNS fails for",
2006                         &sq->addr, sq->addrlen);
2007                   if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
2008                         sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) {
2009                         log_err("Out of memory caching no edns for host");
2010                   }
2011                 } else {
2012                   log_addr(VERB_ALGO, "serviced query: EDNS fails, but "
2013                         "not stored because need DNSSEC for", &sq->addr,
2014                         sq->addrlen);
2015                 }
2016                 sq->status = serviced_query_UDP;
2017             }
2018             if(now.tv_sec > sq->last_sent_time.tv_sec ||
2019                 (now.tv_sec == sq->last_sent_time.tv_sec &&
2020                 now.tv_usec > sq->last_sent_time.tv_usec)) {
2021                 /* convert from microseconds to milliseconds */
2022                 int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000
2023                   + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000;
2024                 verbose(VERB_ALGO, "measured roundtrip at %d msec", roundtime);
2025                 log_assert(roundtime >= 0);
2026                 /* in case the system hibernated, do not enter a huge value,
2027                  * above this value gives trouble with server selection */
2028                 if(roundtime < 60000) {
2029                     if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, 
2030                         sq->zone, sq->zonelen, sq->qtype, roundtime,
2031                         sq->last_rtt, (time_t)now.tv_sec))
2032                         log_err("out of memory noting rtt.");
2033                 }
2034             }
2035         } /* end of if_!fallback_tcp */
2036         /* perform TC flag check and TCP fallback after updating our
2037          * cache entries for EDNS status and RTT times */
2038         if(LDNS_TC_WIRE(sldns_buffer_begin(c->buffer)) || fallback_tcp) {
2039                 /* fallback to TCP */
2040                 /* this discards partial UDP contents */
2041                 if(sq->status == serviced_query_UDP_EDNS ||
2042                         sq->status == serviced_query_UDP_EDNS_FRAG ||
2043                         sq->status == serviced_query_UDP_EDNS_fallback)
2044                         /* if we have unfinished EDNS_fallback, start again */
2045                         sq->status = serviced_query_TCP_EDNS;
2046                 else    sq->status = serviced_query_TCP;
2047                 serviced_tcp_initiate(sq, c->buffer);
2048                 return 0;
2049         }
2050         /* yay! an answer */
2051         serviced_callbacks(sq, error, c, rep);
2052         return 0;
2053 }
2054
2055 struct serviced_query* 
2056 outnet_serviced_query(struct outside_network* outnet,
2057         struct query_info* qinfo, uint16_t flags, int dnssec, int want_dnssec,
2058         int nocaps, int tcp_upstream, int ssl_upstream,
2059         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone,
2060         size_t zonelen, struct module_qstate* qstate,
2061         comm_point_callback_type* callback, void* callback_arg, sldns_buffer* buff,
2062         struct module_env* env)
2063 {
2064         struct serviced_query* sq;
2065         struct service_callback* cb;
2066         if(!inplace_cb_query_call(env, qinfo, flags, addr, addrlen, zone, zonelen,
2067                 qstate, qstate->region))
2068                         return NULL;
2069         serviced_gen_query(buff, qinfo->qname, qinfo->qname_len, qinfo->qtype,
2070                 qinfo->qclass, flags);
2071         sq = lookup_serviced(outnet, buff, dnssec, addr, addrlen,
2072                 qstate->edns_opts_back_out);
2073         /* duplicate entries are included in the callback list, because
2074          * there is a counterpart registration by our caller that needs to
2075          * be doubly-removed (with callbacks perhaps). */
2076         if(!(cb = (struct service_callback*)malloc(sizeof(*cb))))
2077                 return NULL;
2078         if(!sq) {
2079                 /* make new serviced query entry */
2080                 sq = serviced_create(outnet, buff, dnssec, want_dnssec, nocaps,
2081                         tcp_upstream, ssl_upstream, addr, addrlen, zone,
2082                         zonelen, (int)qinfo->qtype, qstate->edns_opts_back_out);
2083                 if(!sq) {
2084                         free(cb);
2085                         return NULL;
2086                 }
2087                 /* perform first network action */
2088                 if(outnet->do_udp && !(tcp_upstream || ssl_upstream)) {
2089                         if(!serviced_udp_send(sq, buff)) {
2090                                 (void)rbtree_delete(outnet->serviced, sq);
2091                                 free(sq->qbuf);
2092                                 free(sq->zone);
2093                                 free(sq);
2094                                 free(cb);
2095                                 return NULL;
2096                         }
2097                 } else {
2098                         if(!serviced_tcp_send(sq, buff)) {
2099                                 (void)rbtree_delete(outnet->serviced, sq);
2100                                 free(sq->qbuf);
2101                                 free(sq->zone);
2102                                 free(sq);
2103                                 free(cb);
2104                                 return NULL;
2105                         }
2106                 }
2107         }
2108         /* add callback to list of callbacks */
2109         cb->cb = callback;
2110         cb->cb_arg = callback_arg;
2111         cb->next = sq->cblist;
2112         sq->cblist = cb;
2113         return sq;
2114 }
2115
2116 /** remove callback from list */
2117 static void
2118 callback_list_remove(struct serviced_query* sq, void* cb_arg)
2119 {
2120         struct service_callback** pp = &sq->cblist;
2121         while(*pp) {
2122                 if((*pp)->cb_arg == cb_arg) {
2123                         struct service_callback* del = *pp;
2124                         *pp = del->next;
2125                         free(del);
2126                         return;
2127                 }
2128                 pp = &(*pp)->next;
2129         }
2130 }
2131
2132 void outnet_serviced_query_stop(struct serviced_query* sq, void* cb_arg)
2133 {
2134         if(!sq) 
2135                 return;
2136         callback_list_remove(sq, cb_arg);
2137         /* if callbacks() routine scheduled deletion, let it do that */
2138         if(!sq->cblist && !sq->to_be_deleted) {
2139                 (void)rbtree_delete(sq->outnet->serviced, sq);
2140                 serviced_delete(sq); 
2141         }
2142 }
2143
2144 /** create fd to send to this destination */
2145 static int
2146 fd_for_dest(struct outside_network* outnet, struct sockaddr_storage* to_addr,
2147         socklen_t to_addrlen)
2148 {
2149         struct sockaddr_storage* addr;
2150         socklen_t addrlen;
2151         int i;
2152         int try;
2153
2154         /* select interface */
2155         if(addr_is_ip6(to_addr, to_addrlen)) {
2156                 if(outnet->num_ip6 == 0) {
2157                         char to[64];
2158                         addr_to_str(to_addr, to_addrlen, to, sizeof(to));
2159                         verbose(VERB_QUERY, "need ipv6 to send, but no ipv6 outgoing interfaces, for %s", to);
2160                         return -1;
2161                 }
2162                 i = ub_random_max(outnet->rnd, outnet->num_ip6);
2163                 addr = &outnet->ip6_ifs[i].addr;
2164                 addrlen = outnet->ip6_ifs[i].addrlen;
2165         } else {
2166                 if(outnet->num_ip4 == 0) {
2167                         char to[64];
2168                         addr_to_str(to_addr, to_addrlen, to, sizeof(to));
2169                         verbose(VERB_QUERY, "need ipv4 to send, but no ipv4 outgoing interfaces, for %s", to);
2170                         return -1;
2171                 }
2172                 i = ub_random_max(outnet->rnd, outnet->num_ip4);
2173                 addr = &outnet->ip4_ifs[i].addr;
2174                 addrlen = outnet->ip4_ifs[i].addrlen;
2175         }
2176
2177         /* create fd */
2178         for(try = 0; try<1000; try++) {
2179                 int freebind = 0;
2180                 int noproto = 0;
2181                 int inuse = 0;
2182                 int port = ub_random(outnet->rnd)&0xffff;
2183                 int fd = -1;
2184                 if(addr_is_ip6(to_addr, to_addrlen)) {
2185                         struct sockaddr_in6 sa = *(struct sockaddr_in6*)addr;
2186                         sa.sin6_port = (in_port_t)htons((uint16_t)port);
2187                         fd = create_udp_sock(AF_INET6, SOCK_DGRAM,
2188                                 (struct sockaddr*)&sa, addrlen, 1, &inuse, &noproto,
2189                                 0, 0, 0, NULL, 0, freebind, 0);
2190                 } else {
2191                         struct sockaddr_in* sa = (struct sockaddr_in*)addr;
2192                         sa->sin_port = (in_port_t)htons((uint16_t)port);
2193                         fd = create_udp_sock(AF_INET, SOCK_DGRAM, 
2194                                 (struct sockaddr*)addr, addrlen, 1, &inuse, &noproto,
2195                                 0, 0, 0, NULL, 0, freebind, 0);
2196                 }
2197                 if(fd != -1) {
2198                         return fd;
2199                 }
2200                 if(!inuse) {
2201                         return -1;
2202                 }
2203         }
2204         /* too many tries */
2205         log_err("cannot send probe, ports are in use");
2206         return -1;
2207 }
2208
2209 struct comm_point*
2210 outnet_comm_point_for_udp(struct outside_network* outnet,
2211         comm_point_callback_type* cb, void* cb_arg,
2212         struct sockaddr_storage* to_addr, socklen_t to_addrlen)
2213 {
2214         struct comm_point* cp;
2215         int fd = fd_for_dest(outnet, to_addr, to_addrlen);
2216         if(fd == -1) {
2217                 return NULL;
2218         }
2219         cp = comm_point_create_udp(outnet->base, fd, outnet->udp_buff,
2220                 cb, cb_arg);
2221         if(!cp) {
2222                 log_err("malloc failure");
2223                 close(fd);
2224                 return NULL;
2225         }
2226         return cp;
2227 }
2228
2229 struct comm_point*
2230 outnet_comm_point_for_tcp(struct outside_network* outnet,
2231         comm_point_callback_type* cb, void* cb_arg,
2232         struct sockaddr_storage* to_addr, socklen_t to_addrlen,
2233         sldns_buffer* query, int timeout)
2234 {
2235         struct comm_point* cp;
2236         int fd = outnet_get_tcp_fd(to_addr, to_addrlen, outnet->tcp_mss);
2237         if(fd == -1) {
2238                 return 0;
2239         }
2240         fd_set_nonblock(fd);
2241         if(!outnet_tcp_connect(fd, to_addr, to_addrlen)) {
2242                 /* outnet_tcp_connect has closed fd on error for us */
2243                 return 0;
2244         }
2245         cp = comm_point_create_tcp_out(outnet->base, 65552, cb, cb_arg);
2246         if(!cp) {
2247                 log_err("malloc failure");
2248                 close(fd);
2249                 return 0;
2250         }
2251         cp->repinfo.addrlen = to_addrlen;
2252         memcpy(&cp->repinfo.addr, to_addr, to_addrlen);
2253         /* set timeout on TCP connection */
2254         comm_point_start_listening(cp, fd, timeout);
2255         /* copy scratch buffer to cp->buffer */
2256         sldns_buffer_copy(cp->buffer, query);
2257         return cp;
2258 }
2259
2260 /** setup http request headers in buffer for sending query to destination */
2261 static int
2262 setup_http_request(sldns_buffer* buf, char* host, char* path)
2263 {
2264         sldns_buffer_clear(buf);
2265         sldns_buffer_printf(buf, "GET /%s HTTP/1.1\r\n", path);
2266         sldns_buffer_printf(buf, "Host: %s\r\n", host);
2267         sldns_buffer_printf(buf, "User-Agent: unbound/%s\r\n",
2268                 PACKAGE_VERSION);
2269         /* We do not really do multiple queries per connection,
2270          * but this header setting is also not needed.
2271          * sldns_buffer_printf(buf, "Connection: close\r\n") */
2272         sldns_buffer_printf(buf, "\r\n");
2273         if(sldns_buffer_position(buf)+10 > sldns_buffer_capacity(buf))
2274                 return 0; /* somehow buffer too short, but it is about 60K
2275                 and the request is only a couple bytes long. */
2276         sldns_buffer_flip(buf);
2277         return 1;
2278 }
2279
2280 struct comm_point*
2281 outnet_comm_point_for_http(struct outside_network* outnet,
2282         comm_point_callback_type* cb, void* cb_arg,
2283         struct sockaddr_storage* to_addr, socklen_t to_addrlen, int timeout,
2284         int ssl, char* host, char* path)
2285 {
2286         /* cp calls cb with err=NETEVENT_DONE when transfer is done */
2287         struct comm_point* cp;
2288         int fd = outnet_get_tcp_fd(to_addr, to_addrlen, outnet->tcp_mss);
2289         if(fd == -1) {
2290                 return 0;
2291         }
2292         fd_set_nonblock(fd);
2293         if(!outnet_tcp_connect(fd, to_addr, to_addrlen)) {
2294                 /* outnet_tcp_connect has closed fd on error for us */
2295                 return 0;
2296         }
2297         cp = comm_point_create_http_out(outnet->base, 65552, cb, cb_arg,
2298                 outnet->udp_buff);
2299         if(!cp) {
2300                 log_err("malloc failure");
2301                 close(fd);
2302                 return 0;
2303         }
2304         cp->repinfo.addrlen = to_addrlen;
2305         memcpy(&cp->repinfo.addr, to_addr, to_addrlen);
2306
2307         /* setup for SSL (if needed) */
2308         if(ssl) {
2309                 cp->ssl = outgoing_ssl_fd(outnet->sslctx, fd);
2310                 if(!cp->ssl) {
2311                         log_err("cannot setup https");
2312                         comm_point_delete(cp);
2313                         return NULL;
2314                 }
2315 #ifdef USE_WINSOCK
2316                 comm_point_tcp_win_bio_cb(cp, cp->ssl);
2317 #endif
2318                 cp->ssl_shake_state = comm_ssl_shake_write;
2319                 /* https verification */
2320 #ifdef HAVE_SSL_SET1_HOST
2321                 if((SSL_CTX_get_verify_mode(outnet->sslctx)&SSL_VERIFY_PEER)) {
2322                         /* because we set SSL_VERIFY_PEER, in netevent in
2323                          * ssl_handshake, it'll check if the certificate
2324                          * verification has succeeded */
2325                         /* SSL_VERIFY_PEER is set on the sslctx */
2326                         /* and the certificates to verify with are loaded into
2327                          * it with SSL_load_verify_locations or
2328                          * SSL_CTX_set_default_verify_paths */
2329                         /* setting the hostname makes openssl verify the
2330                          * host name in the x509 certificate in the
2331                          * SSL connection*/
2332                         if(!SSL_set1_host(cp->ssl, host)) {
2333                                 log_err("SSL_set1_host failed");
2334                                 comm_point_delete(cp);
2335                                 return NULL;
2336                         }
2337                 }
2338 #endif /* HAVE_SSL_SET1_HOST */
2339         }
2340
2341         /* set timeout on TCP connection */
2342         comm_point_start_listening(cp, fd, timeout);
2343
2344         /* setup http request in cp->buffer */
2345         if(!setup_http_request(cp->buffer, host, path)) {
2346                 log_err("error setting up http request");
2347                 comm_point_delete(cp);
2348                 return NULL;
2349         }
2350         return cp;
2351 }
2352
2353 /** get memory used by waiting tcp entry (in use or not) */
2354 static size_t
2355 waiting_tcp_get_mem(struct waiting_tcp* w)
2356 {
2357         size_t s;
2358         if(!w) return 0;
2359         s = sizeof(*w) + w->pkt_len;
2360         if(w->timer)
2361                 s += comm_timer_get_mem(w->timer);
2362         return s;
2363 }
2364
2365 /** get memory used by port if */
2366 static size_t
2367 if_get_mem(struct port_if* pif)
2368 {
2369         size_t s;
2370         int i;
2371         s = sizeof(*pif) + sizeof(int)*pif->avail_total +
2372                 sizeof(struct port_comm*)*pif->maxout;
2373         for(i=0; i<pif->inuse; i++)
2374                 s += sizeof(*pif->out[i]) + 
2375                         comm_point_get_mem(pif->out[i]->cp);
2376         return s;
2377 }
2378
2379 /** get memory used by waiting udp */
2380 static size_t
2381 waiting_udp_get_mem(struct pending* w)
2382 {
2383         size_t s;
2384         s = sizeof(*w) + comm_timer_get_mem(w->timer) + w->pkt_len;
2385         return s;
2386 }
2387
2388 size_t outnet_get_mem(struct outside_network* outnet)
2389 {
2390         size_t i;
2391         int k;
2392         struct waiting_tcp* w;
2393         struct pending* u;
2394         struct serviced_query* sq;
2395         struct service_callback* sb;
2396         struct port_comm* pc;
2397         size_t s = sizeof(*outnet) + sizeof(*outnet->base) + 
2398                 sizeof(*outnet->udp_buff) + 
2399                 sldns_buffer_capacity(outnet->udp_buff);
2400         /* second buffer is not ours */
2401         for(pc = outnet->unused_fds; pc; pc = pc->next) {
2402                 s += sizeof(*pc) + comm_point_get_mem(pc->cp);
2403         }
2404         for(k=0; k<outnet->num_ip4; k++)
2405                 s += if_get_mem(&outnet->ip4_ifs[k]);
2406         for(k=0; k<outnet->num_ip6; k++)
2407                 s += if_get_mem(&outnet->ip6_ifs[k]);
2408         for(u=outnet->udp_wait_first; u; u=u->next_waiting)
2409                 s += waiting_udp_get_mem(u);
2410         
2411         s += sizeof(struct pending_tcp*)*outnet->num_tcp;
2412         for(i=0; i<outnet->num_tcp; i++) {
2413                 s += sizeof(struct pending_tcp);
2414                 s += comm_point_get_mem(outnet->tcp_conns[i]->c);
2415                 if(outnet->tcp_conns[i]->query)
2416                         s += waiting_tcp_get_mem(outnet->tcp_conns[i]->query);
2417         }
2418         for(w=outnet->tcp_wait_first; w; w = w->next_waiting)
2419                 s += waiting_tcp_get_mem(w);
2420         s += sizeof(*outnet->pending);
2421         s += (sizeof(struct pending) + comm_timer_get_mem(NULL)) * 
2422                 outnet->pending->count;
2423         s += sizeof(*outnet->serviced);
2424         s += outnet->svcd_overhead;
2425         RBTREE_FOR(sq, struct serviced_query*, outnet->serviced) {
2426                 s += sizeof(*sq) + sq->qbuflen;
2427                 for(sb = sq->cblist; sb; sb = sb->next)
2428                         s += sizeof(*sb);
2429         }
2430         return s;
2431 }
2432
2433 size_t 
2434 serviced_get_mem(struct serviced_query* sq)
2435 {
2436         struct service_callback* sb;
2437         size_t s;
2438         s = sizeof(*sq) + sq->qbuflen;
2439         for(sb = sq->cblist; sb; sb = sb->next)
2440                 s += sizeof(*sb);
2441         if(sq->status == serviced_query_UDP_EDNS ||
2442                 sq->status == serviced_query_UDP ||
2443                 sq->status == serviced_query_PROBE_EDNS ||
2444                 sq->status == serviced_query_UDP_EDNS_FRAG ||
2445                 sq->status == serviced_query_UDP_EDNS_fallback) {
2446                 s += sizeof(struct pending);
2447                 s += comm_timer_get_mem(NULL);
2448         } else {
2449                 /* does not have size of the pkt pointer */
2450                 /* always has a timer except on malloc failures */
2451
2452                 /* these sizes are part of the main outside network mem */
2453                 /*
2454                 s += sizeof(struct waiting_tcp);
2455                 s += comm_timer_get_mem(NULL);
2456                 */
2457         }
2458         return s;
2459 }
2460