]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/services/outside_network.c
MFV r302218: file 5.28.
[FreeBSD/FreeBSD.git] / contrib / unbound / services / outside_network.c
1 /*
2  * services/outside_network.c - implement sending of queries and wait answer.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  * 
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  * 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * 
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  * 
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file has functions to send queries to authoritative servers and
40  * wait for the pending answer events.
41  */
42 #include "config.h"
43 #include <ctype.h>
44 #ifdef HAVE_SYS_TYPES_H
45 #  include <sys/types.h>
46 #endif
47 #include <sys/time.h>
48 #include "services/outside_network.h"
49 #include "services/listen_dnsport.h"
50 #include "services/cache/infra.h"
51 #include "util/data/msgparse.h"
52 #include "util/data/msgreply.h"
53 #include "util/data/msgencode.h"
54 #include "util/data/dname.h"
55 #include "util/netevent.h"
56 #include "util/log.h"
57 #include "util/net_help.h"
58 #include "util/random.h"
59 #include "util/fptr_wlist.h"
60 #include "sldns/sbuffer.h"
61 #include "dnstap/dnstap.h"
62 #ifdef HAVE_OPENSSL_SSL_H
63 #include <openssl/ssl.h>
64 #endif
65
66 #ifdef HAVE_NETDB_H
67 #include <netdb.h>
68 #endif
69 #include <fcntl.h>
70
71 /** number of times to retry making a random ID that is unique. */
72 #define MAX_ID_RETRY 1000
73 /** number of times to retry finding interface, port that can be opened. */
74 #define MAX_PORT_RETRY 10000
75 /** number of retries on outgoing UDP queries */
76 #define OUTBOUND_UDP_RETRY 1
77
78 /** initiate TCP transaction for serviced query */
79 static void serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff);
80 /** with a fd available, randomize and send UDP */
81 static int randomize_and_send_udp(struct pending* pend, sldns_buffer* packet,
82         int timeout);
83
84 /** remove waiting tcp from the outnet waiting list */
85 static void waiting_list_remove(struct outside_network* outnet,
86         struct waiting_tcp* w);
87
88 int 
89 pending_cmp(const void* key1, const void* key2)
90 {
91         struct pending *p1 = (struct pending*)key1;
92         struct pending *p2 = (struct pending*)key2;
93         if(p1->id < p2->id)
94                 return -1;
95         if(p1->id > p2->id)
96                 return 1;
97         log_assert(p1->id == p2->id);
98         return sockaddr_cmp(&p1->addr, p1->addrlen, &p2->addr, p2->addrlen);
99 }
100
101 int 
102 serviced_cmp(const void* key1, const void* key2)
103 {
104         struct serviced_query* q1 = (struct serviced_query*)key1;
105         struct serviced_query* q2 = (struct serviced_query*)key2;
106         int r;
107         if(q1->qbuflen < q2->qbuflen)
108                 return -1;
109         if(q1->qbuflen > q2->qbuflen)
110                 return 1;
111         log_assert(q1->qbuflen == q2->qbuflen);
112         log_assert(q1->qbuflen >= 15 /* 10 header, root, type, class */);
113         /* alternate casing of qname is still the same query */
114         if((r = memcmp(q1->qbuf, q2->qbuf, 10)) != 0)
115                 return r;
116         if((r = memcmp(q1->qbuf+q1->qbuflen-4, q2->qbuf+q2->qbuflen-4, 4)) != 0)
117                 return r;
118         if(q1->dnssec != q2->dnssec) {
119                 if(q1->dnssec < q2->dnssec)
120                         return -1;
121                 return 1;
122         }
123         if((r = query_dname_compare(q1->qbuf+10, q2->qbuf+10)) != 0)
124                 return r;
125         return sockaddr_cmp(&q1->addr, q1->addrlen, &q2->addr, q2->addrlen);
126 }
127
128 /** delete waiting_tcp entry. Does not unlink from waiting list. 
129  * @param w: to delete.
130  */
131 static void
132 waiting_tcp_delete(struct waiting_tcp* w)
133 {
134         if(!w) return;
135         if(w->timer)
136                 comm_timer_delete(w->timer);
137         free(w);
138 }
139
140 /** 
141  * Pick random outgoing-interface of that family, and bind it.
142  * port set to 0 so OS picks a port number for us.
143  * if it is the ANY address, do not bind.
144  * @param w: tcp structure with destination address.
145  * @param s: socket fd.
146  * @return false on error, socket closed.
147  */
148 static int
149 pick_outgoing_tcp(struct waiting_tcp* w, int s)
150 {
151         struct port_if* pi = NULL;
152         int num;
153 #ifdef INET6
154         if(addr_is_ip6(&w->addr, w->addrlen))
155                 num = w->outnet->num_ip6;
156         else
157 #endif
158                 num = w->outnet->num_ip4;
159         if(num == 0) {
160                 log_err("no TCP outgoing interfaces of family");
161                 log_addr(VERB_OPS, "for addr", &w->addr, w->addrlen);
162 #ifndef USE_WINSOCK
163                 close(s);
164 #else
165                 closesocket(s);
166 #endif
167                 return 0;
168         }
169 #ifdef INET6
170         if(addr_is_ip6(&w->addr, w->addrlen))
171                 pi = &w->outnet->ip6_ifs[ub_random_max(w->outnet->rnd, num)];
172         else
173 #endif
174                 pi = &w->outnet->ip4_ifs[ub_random_max(w->outnet->rnd, num)];
175         log_assert(pi);
176         if(addr_is_any(&pi->addr, pi->addrlen)) {
177                 /* binding to the ANY interface is for listening sockets */
178                 return 1;
179         }
180         /* set port to 0 */
181         if(addr_is_ip6(&pi->addr, pi->addrlen))
182                 ((struct sockaddr_in6*)&pi->addr)->sin6_port = 0;
183         else    ((struct sockaddr_in*)&pi->addr)->sin_port = 0;
184         if(bind(s, (struct sockaddr*)&pi->addr, pi->addrlen) != 0) {
185 #ifndef USE_WINSOCK
186                 log_err("outgoing tcp: bind: %s", strerror(errno));
187                 close(s);
188 #else
189                 log_err("outgoing tcp: bind: %s", 
190                         wsa_strerror(WSAGetLastError()));
191                 closesocket(s);
192 #endif
193                 return 0;
194         }
195         log_addr(VERB_ALGO, "tcp bound to src", &pi->addr, pi->addrlen);
196         return 1;
197 }
198
199 /** use next free buffer to service a tcp query */
200 static int
201 outnet_tcp_take_into_use(struct waiting_tcp* w, uint8_t* pkt, size_t pkt_len)
202 {
203         struct pending_tcp* pend = w->outnet->tcp_free;
204         int s;
205         log_assert(pend);
206         log_assert(pkt);
207         log_assert(w->addrlen > 0);
208         /* open socket */
209 #ifdef INET6
210         if(addr_is_ip6(&w->addr, w->addrlen))
211                 s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
212         else
213 #endif
214                 s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
215         if(s == -1) {
216 #ifndef USE_WINSOCK
217                 log_err_addr("outgoing tcp: socket", strerror(errno),
218                         &w->addr, w->addrlen);
219 #else
220                 log_err_addr("outgoing tcp: socket", 
221                         wsa_strerror(WSAGetLastError()), &w->addr, w->addrlen);
222 #endif
223                 return 0;
224         }
225
226         if (w->outnet->tcp_mss > 0) {
227 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
228                 if(setsockopt(s, IPPROTO_TCP, TCP_MAXSEG,
229                         (void*)&w->outnet->tcp_mss,
230                         (socklen_t)sizeof(w->outnet->tcp_mss)) < 0) {
231                         verbose(VERB_ALGO, "outgoing tcp:"
232                                 " setsockopt(.. SO_REUSEADDR ..) failed");
233                 }
234 #else
235                 verbose(VERB_ALGO, "outgoing tcp:"
236                         " setsockopt(TCP_MAXSEG) unsupported");
237 #endif /* defined(IPPROTO_TCP) && defined(TCP_MAXSEG) */
238         }
239
240         if(!pick_outgoing_tcp(w, s))
241                 return 0;
242
243         fd_set_nonblock(s);
244         if(connect(s, (struct sockaddr*)&w->addr, w->addrlen) == -1) {
245 #ifndef USE_WINSOCK
246 #ifdef EINPROGRESS
247                 if(errno != EINPROGRESS) {
248 #else
249                 if(1) {
250 #endif
251                         if(tcp_connect_errno_needs_log(
252                                 (struct sockaddr*)&w->addr, w->addrlen))
253                                 log_err_addr("outgoing tcp: connect",
254                                         strerror(errno), &w->addr, w->addrlen);
255                         close(s);
256 #else /* USE_WINSOCK */
257                 if(WSAGetLastError() != WSAEINPROGRESS &&
258                         WSAGetLastError() != WSAEWOULDBLOCK) {
259                         closesocket(s);
260 #endif
261                         return 0;
262                 }
263         }
264         if(w->outnet->sslctx && w->ssl_upstream) {
265                 pend->c->ssl = outgoing_ssl_fd(w->outnet->sslctx, s);
266                 if(!pend->c->ssl) {
267                         pend->c->fd = s;
268                         comm_point_close(pend->c);
269                         return 0;
270                 }
271 #ifdef USE_WINSOCK
272                 comm_point_tcp_win_bio_cb(pend->c, pend->c->ssl);
273 #endif
274                 pend->c->ssl_shake_state = comm_ssl_shake_write;
275         }
276         w->pkt = NULL;
277         w->next_waiting = (void*)pend;
278         pend->id = LDNS_ID_WIRE(pkt);
279         w->outnet->num_tcp_outgoing++;
280         w->outnet->tcp_free = pend->next_free;
281         pend->next_free = NULL;
282         pend->query = w;
283         pend->c->repinfo.addrlen = w->addrlen;
284         memcpy(&pend->c->repinfo.addr, &w->addr, w->addrlen);
285         sldns_buffer_clear(pend->c->buffer);
286         sldns_buffer_write(pend->c->buffer, pkt, pkt_len);
287         sldns_buffer_flip(pend->c->buffer);
288         pend->c->tcp_is_reading = 0;
289         pend->c->tcp_byte_count = 0;
290         comm_point_start_listening(pend->c, s, -1);
291         return 1;
292 }
293
294 /** see if buffers can be used to service TCP queries */
295 static void
296 use_free_buffer(struct outside_network* outnet)
297 {
298         struct waiting_tcp* w;
299         while(outnet->tcp_free && outnet->tcp_wait_first 
300                 && !outnet->want_to_quit) {
301                 w = outnet->tcp_wait_first;
302                 outnet->tcp_wait_first = w->next_waiting;
303                 if(outnet->tcp_wait_last == w)
304                         outnet->tcp_wait_last = NULL;
305                 if(!outnet_tcp_take_into_use(w, w->pkt, w->pkt_len)) {
306                         comm_point_callback_t* cb = w->cb;
307                         void* cb_arg = w->cb_arg;
308                         waiting_tcp_delete(w);
309                         fptr_ok(fptr_whitelist_pending_tcp(cb));
310                         (void)(*cb)(NULL, cb_arg, NETEVENT_CLOSED, NULL);
311                 }
312         }
313 }
314
315 /** decomission a tcp buffer, closes commpoint and frees waiting_tcp entry */
316 static void
317 decomission_pending_tcp(struct outside_network* outnet, 
318         struct pending_tcp* pend)
319 {
320         if(pend->c->ssl) {
321 #ifdef HAVE_SSL
322                 SSL_shutdown(pend->c->ssl);
323                 SSL_free(pend->c->ssl);
324                 pend->c->ssl = NULL;
325 #endif
326         }
327         comm_point_close(pend->c);
328         pend->next_free = outnet->tcp_free;
329         outnet->tcp_free = pend;
330         waiting_tcp_delete(pend->query);
331         pend->query = NULL;
332         use_free_buffer(outnet);
333 }
334
335 int 
336 outnet_tcp_cb(struct comm_point* c, void* arg, int error,
337         struct comm_reply *reply_info)
338 {
339         struct pending_tcp* pend = (struct pending_tcp*)arg;
340         struct outside_network* outnet = pend->query->outnet;
341         verbose(VERB_ALGO, "outnettcp cb");
342         if(error != NETEVENT_NOERROR) {
343                 verbose(VERB_QUERY, "outnettcp got tcp error %d", error);
344                 /* pass error below and exit */
345         } else {
346                 /* check ID */
347                 if(sldns_buffer_limit(c->buffer) < sizeof(uint16_t) ||
348                         LDNS_ID_WIRE(sldns_buffer_begin(c->buffer))!=pend->id) {
349                         log_addr(VERB_QUERY, 
350                                 "outnettcp: bad ID in reply, from:",
351                                 &pend->query->addr, pend->query->addrlen);
352                         error = NETEVENT_CLOSED;
353                 }
354         }
355         fptr_ok(fptr_whitelist_pending_tcp(pend->query->cb));
356         (void)(*pend->query->cb)(c, pend->query->cb_arg, error, reply_info);
357         decomission_pending_tcp(outnet, pend);
358         return 0;
359 }
360
361 /** lower use count on pc, see if it can be closed */
362 static void
363 portcomm_loweruse(struct outside_network* outnet, struct port_comm* pc)
364 {
365         struct port_if* pif;
366         pc->num_outstanding--;
367         if(pc->num_outstanding > 0) {
368                 return;
369         }
370         /* close it and replace in unused list */
371         verbose(VERB_ALGO, "close of port %d", pc->number);
372         comm_point_close(pc->cp);
373         pif = pc->pif;
374         log_assert(pif->inuse > 0);
375         pif->avail_ports[pif->avail_total - pif->inuse] = pc->number;
376         pif->inuse--;
377         pif->out[pc->index] = pif->out[pif->inuse];
378         pif->out[pc->index]->index = pc->index;
379         pc->next = outnet->unused_fds;
380         outnet->unused_fds = pc;
381 }
382
383 /** try to send waiting UDP queries */
384 static void
385 outnet_send_wait_udp(struct outside_network* outnet)
386 {
387         struct pending* pend;
388         /* process waiting queries */
389         while(outnet->udp_wait_first && outnet->unused_fds 
390                 && !outnet->want_to_quit) {
391                 pend = outnet->udp_wait_first;
392                 outnet->udp_wait_first = pend->next_waiting;
393                 if(!pend->next_waiting) outnet->udp_wait_last = NULL;
394                 sldns_buffer_clear(outnet->udp_buff);
395                 sldns_buffer_write(outnet->udp_buff, pend->pkt, pend->pkt_len);
396                 sldns_buffer_flip(outnet->udp_buff);
397                 free(pend->pkt); /* freeing now makes get_mem correct */
398                 pend->pkt = NULL; 
399                 pend->pkt_len = 0;
400                 if(!randomize_and_send_udp(pend, outnet->udp_buff,
401                         pend->timeout)) {
402                         /* callback error on pending */
403                         if(pend->cb) {
404                                 fptr_ok(fptr_whitelist_pending_udp(pend->cb));
405                                 (void)(*pend->cb)(outnet->unused_fds->cp, pend->cb_arg, 
406                                         NETEVENT_CLOSED, NULL);
407                         }
408                         pending_delete(outnet, pend);
409                 }
410         }
411 }
412
413 int 
414 outnet_udp_cb(struct comm_point* c, void* arg, int error,
415         struct comm_reply *reply_info)
416 {
417         struct outside_network* outnet = (struct outside_network*)arg;
418         struct pending key;
419         struct pending* p;
420         verbose(VERB_ALGO, "answer cb");
421
422         if(error != NETEVENT_NOERROR) {
423                 verbose(VERB_QUERY, "outnetudp got udp error %d", error);
424                 return 0;
425         }
426         if(sldns_buffer_limit(c->buffer) < LDNS_HEADER_SIZE) {
427                 verbose(VERB_QUERY, "outnetudp udp too short");
428                 return 0;
429         }
430         log_assert(reply_info);
431
432         /* setup lookup key */
433         key.id = (unsigned)LDNS_ID_WIRE(sldns_buffer_begin(c->buffer));
434         memcpy(&key.addr, &reply_info->addr, reply_info->addrlen);
435         key.addrlen = reply_info->addrlen;
436         verbose(VERB_ALGO, "Incoming reply id = %4.4x", key.id);
437         log_addr(VERB_ALGO, "Incoming reply addr =", 
438                 &reply_info->addr, reply_info->addrlen);
439
440         /* find it, see if this thing is a valid query response */
441         verbose(VERB_ALGO, "lookup size is %d entries", (int)outnet->pending->count);
442         p = (struct pending*)rbtree_search(outnet->pending, &key);
443         if(!p) {
444                 verbose(VERB_QUERY, "received unwanted or unsolicited udp reply dropped.");
445                 log_buf(VERB_ALGO, "dropped message", c->buffer);
446                 outnet->unwanted_replies++;
447                 if(outnet->unwanted_threshold && ++outnet->unwanted_total 
448                         >= outnet->unwanted_threshold) {
449                         log_warn("unwanted reply total reached threshold (%u)"
450                                 " you may be under attack."
451                                 " defensive action: clearing the cache",
452                                 (unsigned)outnet->unwanted_threshold);
453                         fptr_ok(fptr_whitelist_alloc_cleanup(
454                                 outnet->unwanted_action));
455                         (*outnet->unwanted_action)(outnet->unwanted_param);
456                         outnet->unwanted_total = 0;
457                 }
458                 return 0;
459         }
460
461         verbose(VERB_ALGO, "received udp reply.");
462         log_buf(VERB_ALGO, "udp message", c->buffer);
463         if(p->pc->cp != c) {
464                 verbose(VERB_QUERY, "received reply id,addr on wrong port. "
465                         "dropped.");
466                 outnet->unwanted_replies++;
467                 if(outnet->unwanted_threshold && ++outnet->unwanted_total 
468                         >= outnet->unwanted_threshold) {
469                         log_warn("unwanted reply total reached threshold (%u)"
470                                 " you may be under attack."
471                                 " defensive action: clearing the cache",
472                                 (unsigned)outnet->unwanted_threshold);
473                         fptr_ok(fptr_whitelist_alloc_cleanup(
474                                 outnet->unwanted_action));
475                         (*outnet->unwanted_action)(outnet->unwanted_param);
476                         outnet->unwanted_total = 0;
477                 }
478                 return 0;
479         }
480         comm_timer_disable(p->timer);
481         verbose(VERB_ALGO, "outnet handle udp reply");
482         /* delete from tree first in case callback creates a retry */
483         (void)rbtree_delete(outnet->pending, p->node.key);
484         if(p->cb) {
485                 fptr_ok(fptr_whitelist_pending_udp(p->cb));
486                 (void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_NOERROR, reply_info);
487         }
488         portcomm_loweruse(outnet, p->pc);
489         pending_delete(NULL, p);
490         outnet_send_wait_udp(outnet);
491         return 0;
492 }
493
494 /** calculate number of ip4 and ip6 interfaces*/
495 static void 
496 calc_num46(char** ifs, int num_ifs, int do_ip4, int do_ip6, 
497         int* num_ip4, int* num_ip6)
498 {
499         int i;
500         *num_ip4 = 0;
501         *num_ip6 = 0;
502         if(num_ifs <= 0) {
503                 if(do_ip4)
504                         *num_ip4 = 1;
505                 if(do_ip6)
506                         *num_ip6 = 1;
507                 return;
508         }
509         for(i=0; i<num_ifs; i++)
510         {
511                 if(str_is_ip6(ifs[i])) {
512                         if(do_ip6)
513                                 (*num_ip6)++;
514                 } else {
515                         if(do_ip4)
516                                 (*num_ip4)++;
517                 }
518         }
519
520 }
521
522 void
523 pending_udp_timer_delay_cb(void* arg)
524 {
525         struct pending* p = (struct pending*)arg;
526         struct outside_network* outnet = p->outnet;
527         verbose(VERB_ALGO, "timeout udp with delay");
528         portcomm_loweruse(outnet, p->pc);
529         pending_delete(outnet, p);
530         outnet_send_wait_udp(outnet);
531 }
532
533 void 
534 pending_udp_timer_cb(void *arg)
535 {
536         struct pending* p = (struct pending*)arg;
537         struct outside_network* outnet = p->outnet;
538         /* it timed out */
539         verbose(VERB_ALGO, "timeout udp");
540         if(p->cb) {
541                 fptr_ok(fptr_whitelist_pending_udp(p->cb));
542                 (void)(*p->cb)(p->pc->cp, p->cb_arg, NETEVENT_TIMEOUT, NULL);
543         }
544         /* if delayclose, keep port open for a longer time.
545          * But if the udpwaitlist exists, then we are struggling to
546          * keep up with demand for sockets, so do not wait, but service
547          * the customer (customer service more important than portICMPs) */
548         if(outnet->delayclose && !outnet->udp_wait_first) {
549                 p->cb = NULL;
550                 p->timer->callback = &pending_udp_timer_delay_cb;
551                 comm_timer_set(p->timer, &outnet->delay_tv);
552                 return;
553         }
554         portcomm_loweruse(outnet, p->pc);
555         pending_delete(outnet, p);
556         outnet_send_wait_udp(outnet);
557 }
558
559 /** create pending_tcp buffers */
560 static int
561 create_pending_tcp(struct outside_network* outnet, size_t bufsize)
562 {
563         size_t i;
564         if(outnet->num_tcp == 0)
565                 return 1; /* no tcp needed, nothing to do */
566         if(!(outnet->tcp_conns = (struct pending_tcp **)calloc(
567                         outnet->num_tcp, sizeof(struct pending_tcp*))))
568                 return 0;
569         for(i=0; i<outnet->num_tcp; i++) {
570                 if(!(outnet->tcp_conns[i] = (struct pending_tcp*)calloc(1, 
571                         sizeof(struct pending_tcp))))
572                         return 0;
573                 outnet->tcp_conns[i]->next_free = outnet->tcp_free;
574                 outnet->tcp_free = outnet->tcp_conns[i];
575                 outnet->tcp_conns[i]->c = comm_point_create_tcp_out(
576                         outnet->base, bufsize, outnet_tcp_cb, 
577                         outnet->tcp_conns[i]);
578                 if(!outnet->tcp_conns[i]->c)
579                         return 0;
580         }
581         return 1;
582 }
583
584 /** setup an outgoing interface, ready address */
585 static int setup_if(struct port_if* pif, const char* addrstr, 
586         int* avail, int numavail, size_t numfd)
587 {
588         pif->avail_total = numavail;
589         pif->avail_ports = (int*)memdup(avail, (size_t)numavail*sizeof(int));
590         if(!pif->avail_ports)
591                 return 0;
592         if(!ipstrtoaddr(addrstr, UNBOUND_DNS_PORT, &pif->addr, &pif->addrlen))
593                 return 0;
594         pif->maxout = (int)numfd;
595         pif->inuse = 0;
596         pif->out = (struct port_comm**)calloc(numfd, 
597                 sizeof(struct port_comm*));
598         if(!pif->out)
599                 return 0;
600         return 1;
601 }
602
603 struct outside_network* 
604 outside_network_create(struct comm_base *base, size_t bufsize, 
605         size_t num_ports, char** ifs, int num_ifs, int do_ip4, 
606         int do_ip6, size_t num_tcp, struct infra_cache* infra,
607         struct ub_randstate* rnd, int use_caps_for_id, int* availports, 
608         int numavailports, size_t unwanted_threshold, int tcp_mss,
609         void (*unwanted_action)(void*), void* unwanted_param, int do_udp,
610         void* sslctx, int delayclose, struct dt_env* dtenv)
611 {
612         struct outside_network* outnet = (struct outside_network*)
613                 calloc(1, sizeof(struct outside_network));
614         size_t k;
615         if(!outnet) {
616                 log_err("malloc failed");
617                 return NULL;
618         }
619         comm_base_timept(base, &outnet->now_secs, &outnet->now_tv);
620         outnet->base = base;
621         outnet->num_tcp = num_tcp;
622         outnet->num_tcp_outgoing = 0;
623         outnet->infra = infra;
624         outnet->rnd = rnd;
625         outnet->sslctx = sslctx;
626 #ifdef USE_DNSTAP
627         outnet->dtenv = dtenv;
628 #else
629         (void)dtenv;
630 #endif
631         outnet->svcd_overhead = 0;
632         outnet->want_to_quit = 0;
633         outnet->unwanted_threshold = unwanted_threshold;
634         outnet->unwanted_action = unwanted_action;
635         outnet->unwanted_param = unwanted_param;
636         outnet->use_caps_for_id = use_caps_for_id;
637         outnet->do_udp = do_udp;
638         outnet->tcp_mss = tcp_mss;
639 #ifndef S_SPLINT_S
640         if(delayclose) {
641                 outnet->delayclose = 1;
642                 outnet->delay_tv.tv_sec = delayclose/1000;
643                 outnet->delay_tv.tv_usec = (delayclose%1000)*1000;
644         }
645 #endif
646         if(numavailports == 0) {
647                 log_err("no outgoing ports available");
648                 outside_network_delete(outnet);
649                 return NULL;
650         }
651 #ifndef INET6
652         do_ip6 = 0;
653 #endif
654         calc_num46(ifs, num_ifs, do_ip4, do_ip6, 
655                 &outnet->num_ip4, &outnet->num_ip6);
656         if(outnet->num_ip4 != 0) {
657                 if(!(outnet->ip4_ifs = (struct port_if*)calloc(
658                         (size_t)outnet->num_ip4, sizeof(struct port_if)))) {
659                         log_err("malloc failed");
660                         outside_network_delete(outnet);
661                         return NULL;
662                 }
663         }
664         if(outnet->num_ip6 != 0) {
665                 if(!(outnet->ip6_ifs = (struct port_if*)calloc(
666                         (size_t)outnet->num_ip6, sizeof(struct port_if)))) {
667                         log_err("malloc failed");
668                         outside_network_delete(outnet);
669                         return NULL;
670                 }
671         }
672         if(     !(outnet->udp_buff = sldns_buffer_new(bufsize)) ||
673                 !(outnet->pending = rbtree_create(pending_cmp)) ||
674                 !(outnet->serviced = rbtree_create(serviced_cmp)) ||
675                 !create_pending_tcp(outnet, bufsize)) {
676                 log_err("malloc failed");
677                 outside_network_delete(outnet);
678                 return NULL;
679         }
680
681         /* allocate commpoints */
682         for(k=0; k<num_ports; k++) {
683                 struct port_comm* pc;
684                 pc = (struct port_comm*)calloc(1, sizeof(*pc));
685                 if(!pc) {
686                         log_err("malloc failed");
687                         outside_network_delete(outnet);
688                         return NULL;
689                 }
690                 pc->cp = comm_point_create_udp(outnet->base, -1, 
691                         outnet->udp_buff, outnet_udp_cb, outnet);
692                 if(!pc->cp) {
693                         log_err("malloc failed");
694                         free(pc);
695                         outside_network_delete(outnet);
696                         return NULL;
697                 }
698                 pc->next = outnet->unused_fds;
699                 outnet->unused_fds = pc;
700         }
701
702         /* allocate interfaces */
703         if(num_ifs == 0) {
704                 if(do_ip4 && !setup_if(&outnet->ip4_ifs[0], "0.0.0.0", 
705                         availports, numavailports, num_ports)) {
706                         log_err("malloc failed");
707                         outside_network_delete(outnet);
708                         return NULL;
709                 }
710                 if(do_ip6 && !setup_if(&outnet->ip6_ifs[0], "::", 
711                         availports, numavailports, num_ports)) {
712                         log_err("malloc failed");
713                         outside_network_delete(outnet);
714                         return NULL;
715                 }
716         } else {
717                 size_t done_4 = 0, done_6 = 0;
718                 int i;
719                 for(i=0; i<num_ifs; i++) {
720                         if(str_is_ip6(ifs[i]) && do_ip6) {
721                                 if(!setup_if(&outnet->ip6_ifs[done_6], ifs[i],
722                                         availports, numavailports, num_ports)){
723                                         log_err("malloc failed");
724                                         outside_network_delete(outnet);
725                                         return NULL;
726                                 }
727                                 done_6++;
728                         }
729                         if(!str_is_ip6(ifs[i]) && do_ip4) {
730                                 if(!setup_if(&outnet->ip4_ifs[done_4], ifs[i],
731                                         availports, numavailports, num_ports)){
732                                         log_err("malloc failed");
733                                         outside_network_delete(outnet);
734                                         return NULL;
735                                 }
736                                 done_4++;
737                         }
738                 }
739         }
740         return outnet;
741 }
742
743 /** helper pending delete */
744 static void
745 pending_node_del(rbnode_t* node, void* arg)
746 {
747         struct pending* pend = (struct pending*)node;
748         struct outside_network* outnet = (struct outside_network*)arg;
749         pending_delete(outnet, pend);
750 }
751
752 /** helper serviced delete */
753 static void
754 serviced_node_del(rbnode_t* node, void* ATTR_UNUSED(arg))
755 {
756         struct serviced_query* sq = (struct serviced_query*)node;
757         struct service_callback* p = sq->cblist, *np;
758         free(sq->qbuf);
759         free(sq->zone);
760         while(p) {
761                 np = p->next;
762                 free(p);
763                 p = np;
764         }
765         free(sq);
766 }
767
768 void 
769 outside_network_quit_prepare(struct outside_network* outnet)
770 {
771         if(!outnet)
772                 return;
773         /* prevent queued items from being sent */
774         outnet->want_to_quit = 1; 
775 }
776
777 void 
778 outside_network_delete(struct outside_network* outnet)
779 {
780         if(!outnet)
781                 return;
782         outnet->want_to_quit = 1;
783         /* check every element, since we can be called on malloc error */
784         if(outnet->pending) {
785                 /* free pending elements, but do no unlink from tree. */
786                 traverse_postorder(outnet->pending, pending_node_del, NULL);
787                 free(outnet->pending);
788         }
789         if(outnet->serviced) {
790                 traverse_postorder(outnet->serviced, serviced_node_del, NULL);
791                 free(outnet->serviced);
792         }
793         if(outnet->udp_buff)
794                 sldns_buffer_free(outnet->udp_buff);
795         if(outnet->unused_fds) {
796                 struct port_comm* p = outnet->unused_fds, *np;
797                 while(p) {
798                         np = p->next;
799                         comm_point_delete(p->cp);
800                         free(p);
801                         p = np;
802                 }
803                 outnet->unused_fds = NULL;
804         }
805         if(outnet->ip4_ifs) {
806                 int i, k;
807                 for(i=0; i<outnet->num_ip4; i++) {
808                         for(k=0; k<outnet->ip4_ifs[i].inuse; k++) {
809                                 struct port_comm* pc = outnet->ip4_ifs[i].
810                                         out[k];
811                                 comm_point_delete(pc->cp);
812                                 free(pc);
813                         }
814                         free(outnet->ip4_ifs[i].avail_ports);
815                         free(outnet->ip4_ifs[i].out);
816                 }
817                 free(outnet->ip4_ifs);
818         }
819         if(outnet->ip6_ifs) {
820                 int i, k;
821                 for(i=0; i<outnet->num_ip6; i++) {
822                         for(k=0; k<outnet->ip6_ifs[i].inuse; k++) {
823                                 struct port_comm* pc = outnet->ip6_ifs[i].
824                                         out[k];
825                                 comm_point_delete(pc->cp);
826                                 free(pc);
827                         }
828                         free(outnet->ip6_ifs[i].avail_ports);
829                         free(outnet->ip6_ifs[i].out);
830                 }
831                 free(outnet->ip6_ifs);
832         }
833         if(outnet->tcp_conns) {
834                 size_t i;
835                 for(i=0; i<outnet->num_tcp; i++)
836                         if(outnet->tcp_conns[i]) {
837                                 comm_point_delete(outnet->tcp_conns[i]->c);
838                                 waiting_tcp_delete(outnet->tcp_conns[i]->query);
839                                 free(outnet->tcp_conns[i]);
840                         }
841                 free(outnet->tcp_conns);
842         }
843         if(outnet->tcp_wait_first) {
844                 struct waiting_tcp* p = outnet->tcp_wait_first, *np;
845                 while(p) {
846                         np = p->next_waiting;
847                         waiting_tcp_delete(p);
848                         p = np;
849                 }
850         }
851         if(outnet->udp_wait_first) {
852                 struct pending* p = outnet->udp_wait_first, *np;
853                 while(p) {
854                         np = p->next_waiting;
855                         pending_delete(NULL, p);
856                         p = np;
857                 }
858         }
859         free(outnet);
860 }
861
862 void 
863 pending_delete(struct outside_network* outnet, struct pending* p)
864 {
865         if(!p)
866                 return;
867         if(outnet && outnet->udp_wait_first &&
868                 (p->next_waiting || p == outnet->udp_wait_last) ) {
869                 /* delete from waiting list, if it is in the waiting list */
870                 struct pending* prev = NULL, *x = outnet->udp_wait_first;
871                 while(x && x != p) {
872                         prev = x;
873                         x = x->next_waiting;
874                 }
875                 if(x) {
876                         log_assert(x == p);
877                         if(prev)
878                                 prev->next_waiting = p->next_waiting;
879                         else    outnet->udp_wait_first = p->next_waiting;
880                         if(outnet->udp_wait_last == p)
881                                 outnet->udp_wait_last = prev;
882                 }
883         }
884         if(outnet) {
885                 (void)rbtree_delete(outnet->pending, p->node.key);
886         }
887         if(p->timer)
888                 comm_timer_delete(p->timer);
889         free(p->pkt);
890         free(p);
891 }
892
893 /**
894  * Try to open a UDP socket for outgoing communication.
895  * Sets sockets options as needed.
896  * @param addr: socket address.
897  * @param addrlen: length of address.
898  * @param port: port override for addr.
899  * @param inuse: if -1 is returned, this bool means the port was in use.
900  * @return fd or -1
901  */
902 static int
903 udp_sockport(struct sockaddr_storage* addr, socklen_t addrlen, int port, 
904         int* inuse)
905 {
906         int fd, noproto;
907         if(addr_is_ip6(addr, addrlen)) {
908                 struct sockaddr_in6* sa = (struct sockaddr_in6*)addr;
909                 sa->sin6_port = (in_port_t)htons((uint16_t)port);
910                 fd = create_udp_sock(AF_INET6, SOCK_DGRAM, 
911                         (struct sockaddr*)addr, addrlen, 1, inuse, &noproto,
912                         0, 0, 0, NULL, 0);
913         } else {
914                 struct sockaddr_in* sa = (struct sockaddr_in*)addr;
915                 sa->sin_port = (in_port_t)htons((uint16_t)port);
916                 fd = create_udp_sock(AF_INET, SOCK_DGRAM, 
917                         (struct sockaddr*)addr, addrlen, 1, inuse, &noproto,
918                         0, 0, 0, NULL, 0);
919         }
920         return fd;
921 }
922
923 /** Select random ID */
924 static int
925 select_id(struct outside_network* outnet, struct pending* pend,
926         sldns_buffer* packet)
927 {
928         int id_tries = 0;
929         pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff;
930         LDNS_ID_SET(sldns_buffer_begin(packet), pend->id);
931
932         /* insert in tree */
933         pend->node.key = pend;
934         while(!rbtree_insert(outnet->pending, &pend->node)) {
935                 /* change ID to avoid collision */
936                 pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff;
937                 LDNS_ID_SET(sldns_buffer_begin(packet), pend->id);
938                 id_tries++;
939                 if(id_tries == MAX_ID_RETRY) {
940                         pend->id=99999; /* non existant ID */
941                         log_err("failed to generate unique ID, drop msg");
942                         return 0;
943                 }
944         }
945         verbose(VERB_ALGO, "inserted new pending reply id=%4.4x", pend->id);
946         return 1;
947 }
948
949 /** Select random interface and port */
950 static int
951 select_ifport(struct outside_network* outnet, struct pending* pend,
952         int num_if, struct port_if* ifs)
953 {
954         int my_if, my_port, fd, portno, inuse, tries=0;
955         struct port_if* pif;
956         /* randomly select interface and port */
957         if(num_if == 0) {
958                 verbose(VERB_QUERY, "Need to send query but have no "
959                         "outgoing interfaces of that family");
960                 return 0;
961         }
962         log_assert(outnet->unused_fds);
963         tries = 0;
964         while(1) {
965                 my_if = ub_random_max(outnet->rnd, num_if);
966                 pif = &ifs[my_if];
967                 my_port = ub_random_max(outnet->rnd, pif->avail_total);
968                 if(my_port < pif->inuse) {
969                         /* port already open */
970                         pend->pc = pif->out[my_port];
971                         verbose(VERB_ALGO, "using UDP if=%d port=%d", 
972                                 my_if, pend->pc->number);
973                         break;
974                 }
975                 /* try to open new port, if fails, loop to try again */
976                 log_assert(pif->inuse < pif->maxout);
977                 portno = pif->avail_ports[my_port - pif->inuse];
978                 fd = udp_sockport(&pif->addr, pif->addrlen, portno, &inuse);
979                 if(fd == -1 && !inuse) {
980                         /* nonrecoverable error making socket */
981                         return 0;
982                 }
983                 if(fd != -1) {
984                         verbose(VERB_ALGO, "opened UDP if=%d port=%d", 
985                                 my_if, portno);
986                         /* grab fd */
987                         pend->pc = outnet->unused_fds;
988                         outnet->unused_fds = pend->pc->next;
989
990                         /* setup portcomm */
991                         pend->pc->next = NULL;
992                         pend->pc->number = portno;
993                         pend->pc->pif = pif;
994                         pend->pc->index = pif->inuse;
995                         pend->pc->num_outstanding = 0;
996                         comm_point_start_listening(pend->pc->cp, fd, -1);
997
998                         /* grab port in interface */
999                         pif->out[pif->inuse] = pend->pc;
1000                         pif->avail_ports[my_port - pif->inuse] =
1001                                 pif->avail_ports[pif->avail_total-pif->inuse-1];
1002                         pif->inuse++;
1003                         break;
1004                 }
1005                 /* failed, already in use */
1006                 verbose(VERB_QUERY, "port %d in use, trying another", portno);
1007                 tries++;
1008                 if(tries == MAX_PORT_RETRY) {
1009                         log_err("failed to find an open port, drop msg");
1010                         return 0;
1011                 }
1012         }
1013         log_assert(pend->pc);
1014         pend->pc->num_outstanding++;
1015
1016         return 1;
1017 }
1018
1019 static int
1020 randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, int timeout)
1021 {
1022         struct timeval tv;
1023         struct outside_network* outnet = pend->sq->outnet;
1024
1025         /* select id */
1026         if(!select_id(outnet, pend, packet)) {
1027                 return 0;
1028         }
1029
1030         /* select src_if, port */
1031         if(addr_is_ip6(&pend->addr, pend->addrlen)) {
1032                 if(!select_ifport(outnet, pend, 
1033                         outnet->num_ip6, outnet->ip6_ifs))
1034                         return 0;
1035         } else {
1036                 if(!select_ifport(outnet, pend, 
1037                         outnet->num_ip4, outnet->ip4_ifs))
1038                         return 0;
1039         }
1040         log_assert(pend->pc && pend->pc->cp);
1041
1042         /* send it over the commlink */
1043         if(!comm_point_send_udp_msg(pend->pc->cp, packet, 
1044                 (struct sockaddr*)&pend->addr, pend->addrlen)) {
1045                 portcomm_loweruse(outnet, pend->pc);
1046                 return 0;
1047         }
1048
1049         /* system calls to set timeout after sending UDP to make roundtrip
1050            smaller. */
1051 #ifndef S_SPLINT_S
1052         tv.tv_sec = timeout/1000;
1053         tv.tv_usec = (timeout%1000)*1000;
1054 #endif
1055         comm_timer_set(pend->timer, &tv);
1056
1057 #ifdef USE_DNSTAP
1058         if(outnet->dtenv &&
1059            (outnet->dtenv->log_resolver_query_messages ||
1060             outnet->dtenv->log_forwarder_query_messages))
1061                 dt_msg_send_outside_query(outnet->dtenv, &pend->addr, comm_udp,
1062                 pend->sq->zone, pend->sq->zonelen, packet);
1063 #endif
1064         return 1;
1065 }
1066
1067 struct pending* 
1068 pending_udp_query(struct serviced_query* sq, struct sldns_buffer* packet,
1069         int timeout, comm_point_callback_t* cb, void* cb_arg)
1070 {
1071         struct pending* pend = (struct pending*)calloc(1, sizeof(*pend));
1072         if(!pend) return NULL;
1073         pend->outnet = sq->outnet;
1074         pend->sq = sq;
1075         pend->addrlen = sq->addrlen;
1076         memmove(&pend->addr, &sq->addr, sq->addrlen);
1077         pend->cb = cb;
1078         pend->cb_arg = cb_arg;
1079         pend->node.key = pend;
1080         pend->timer = comm_timer_create(sq->outnet->base, pending_udp_timer_cb,
1081                 pend);
1082         if(!pend->timer) {
1083                 free(pend);
1084                 return NULL;
1085         }
1086
1087         if(sq->outnet->unused_fds == NULL) {
1088                 /* no unused fd, cannot create a new port (randomly) */
1089                 verbose(VERB_ALGO, "no fds available, udp query waiting");
1090                 pend->timeout = timeout;
1091                 pend->pkt_len = sldns_buffer_limit(packet);
1092                 pend->pkt = (uint8_t*)memdup(sldns_buffer_begin(packet),
1093                         pend->pkt_len);
1094                 if(!pend->pkt) {
1095                         comm_timer_delete(pend->timer);
1096                         free(pend);
1097                         return NULL;
1098                 }
1099                 /* put at end of waiting list */
1100                 if(sq->outnet->udp_wait_last)
1101                         sq->outnet->udp_wait_last->next_waiting = pend;
1102                 else 
1103                         sq->outnet->udp_wait_first = pend;
1104                 sq->outnet->udp_wait_last = pend;
1105                 return pend;
1106         }
1107         if(!randomize_and_send_udp(pend, packet, timeout)) {
1108                 pending_delete(sq->outnet, pend);
1109                 return NULL;
1110         }
1111         return pend;
1112 }
1113
1114 void
1115 outnet_tcptimer(void* arg)
1116 {
1117         struct waiting_tcp* w = (struct waiting_tcp*)arg;
1118         struct outside_network* outnet = w->outnet;
1119         comm_point_callback_t* cb;
1120         void* cb_arg;
1121         if(w->pkt) {
1122                 /* it is on the waiting list */
1123                 waiting_list_remove(outnet, w);
1124         } else {
1125                 /* it was in use */
1126                 struct pending_tcp* pend=(struct pending_tcp*)w->next_waiting;
1127                 comm_point_close(pend->c);
1128                 pend->query = NULL;
1129                 pend->next_free = outnet->tcp_free;
1130                 outnet->tcp_free = pend;
1131         }
1132         cb = w->cb;
1133         cb_arg = w->cb_arg;
1134         waiting_tcp_delete(w);
1135         fptr_ok(fptr_whitelist_pending_tcp(cb));
1136         (void)(*cb)(NULL, cb_arg, NETEVENT_TIMEOUT, NULL);
1137         use_free_buffer(outnet);
1138 }
1139
1140 struct waiting_tcp*
1141 pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet,
1142         int timeout, comm_point_callback_t* callback, void* callback_arg)
1143 {
1144         struct pending_tcp* pend = sq->outnet->tcp_free;
1145         struct waiting_tcp* w;
1146         struct timeval tv;
1147         uint16_t id;
1148         /* if no buffer is free allocate space to store query */
1149         w = (struct waiting_tcp*)malloc(sizeof(struct waiting_tcp) 
1150                 + (pend?0:sldns_buffer_limit(packet)));
1151         if(!w) {
1152                 return NULL;
1153         }
1154         if(!(w->timer = comm_timer_create(sq->outnet->base, outnet_tcptimer, w))) {
1155                 free(w);
1156                 return NULL;
1157         }
1158         w->pkt = NULL;
1159         w->pkt_len = 0;
1160         id = ((unsigned)ub_random(sq->outnet->rnd)>>8) & 0xffff;
1161         LDNS_ID_SET(sldns_buffer_begin(packet), id);
1162         memcpy(&w->addr, &sq->addr, sq->addrlen);
1163         w->addrlen = sq->addrlen;
1164         w->outnet = sq->outnet;
1165         w->cb = callback;
1166         w->cb_arg = callback_arg;
1167         w->ssl_upstream = sq->ssl_upstream;
1168 #ifndef S_SPLINT_S
1169         tv.tv_sec = timeout;
1170         tv.tv_usec = 0;
1171 #endif
1172         comm_timer_set(w->timer, &tv);
1173         if(pend) {
1174                 /* we have a buffer available right now */
1175                 if(!outnet_tcp_take_into_use(w, sldns_buffer_begin(packet),
1176                         sldns_buffer_limit(packet))) {
1177                         waiting_tcp_delete(w);
1178                         return NULL;
1179                 }
1180 #ifdef USE_DNSTAP
1181                 if(sq->outnet->dtenv &&
1182                    (sq->outnet->dtenv->log_resolver_query_messages ||
1183                     sq->outnet->dtenv->log_forwarder_query_messages))
1184                 dt_msg_send_outside_query(sq->outnet->dtenv, &sq->addr,
1185                 comm_tcp, sq->zone, sq->zonelen, packet);
1186 #endif
1187         } else {
1188                 /* queue up */
1189                 w->pkt = (uint8_t*)w + sizeof(struct waiting_tcp);
1190                 w->pkt_len = sldns_buffer_limit(packet);
1191                 memmove(w->pkt, sldns_buffer_begin(packet), w->pkt_len);
1192                 w->next_waiting = NULL;
1193                 if(sq->outnet->tcp_wait_last)
1194                         sq->outnet->tcp_wait_last->next_waiting = w;
1195                 else    sq->outnet->tcp_wait_first = w;
1196                 sq->outnet->tcp_wait_last = w;
1197         }
1198         return w;
1199 }
1200
1201 /** create query for serviced queries */
1202 static void
1203 serviced_gen_query(sldns_buffer* buff, uint8_t* qname, size_t qnamelen, 
1204         uint16_t qtype, uint16_t qclass, uint16_t flags)
1205 {
1206         sldns_buffer_clear(buff);
1207         /* skip id */
1208         sldns_buffer_write_u16(buff, flags);
1209         sldns_buffer_write_u16(buff, 1); /* qdcount */
1210         sldns_buffer_write_u16(buff, 0); /* ancount */
1211         sldns_buffer_write_u16(buff, 0); /* nscount */
1212         sldns_buffer_write_u16(buff, 0); /* arcount */
1213         sldns_buffer_write(buff, qname, qnamelen);
1214         sldns_buffer_write_u16(buff, qtype);
1215         sldns_buffer_write_u16(buff, qclass);
1216         sldns_buffer_flip(buff);
1217 }
1218
1219 /** lookup serviced query in serviced query rbtree */
1220 static struct serviced_query*
1221 lookup_serviced(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
1222         struct sockaddr_storage* addr, socklen_t addrlen)
1223 {
1224         struct serviced_query key;
1225         key.node.key = &key;
1226         key.qbuf = sldns_buffer_begin(buff);
1227         key.qbuflen = sldns_buffer_limit(buff);
1228         key.dnssec = dnssec;
1229         memcpy(&key.addr, addr, addrlen);
1230         key.addrlen = addrlen;
1231         key.outnet = outnet;
1232         return (struct serviced_query*)rbtree_search(outnet->serviced, &key);
1233 }
1234
1235 /** Create new serviced entry */
1236 static struct serviced_query*
1237 serviced_create(struct outside_network* outnet, sldns_buffer* buff, int dnssec,
1238         int want_dnssec, int nocaps, int tcp_upstream, int ssl_upstream,
1239         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone,
1240         size_t zonelen, int qtype)
1241 {
1242         struct serviced_query* sq = (struct serviced_query*)malloc(sizeof(*sq));
1243 #ifdef UNBOUND_DEBUG
1244         rbnode_t* ins;
1245 #endif
1246         if(!sq) 
1247                 return NULL;
1248         sq->node.key = sq;
1249         sq->qbuf = memdup(sldns_buffer_begin(buff), sldns_buffer_limit(buff));
1250         if(!sq->qbuf) {
1251                 free(sq);
1252                 return NULL;
1253         }
1254         sq->qbuflen = sldns_buffer_limit(buff);
1255         sq->zone = memdup(zone, zonelen);
1256         if(!sq->zone) {
1257                 free(sq->qbuf);
1258                 free(sq);
1259                 return NULL;
1260         }
1261         sq->zonelen = zonelen;
1262         sq->qtype = qtype;
1263         sq->dnssec = dnssec;
1264         sq->want_dnssec = want_dnssec;
1265         sq->nocaps = nocaps;
1266         sq->tcp_upstream = tcp_upstream;
1267         sq->ssl_upstream = ssl_upstream;
1268         memcpy(&sq->addr, addr, addrlen);
1269         sq->addrlen = addrlen;
1270         sq->outnet = outnet;
1271         sq->cblist = NULL;
1272         sq->pending = NULL;
1273         sq->status = serviced_initial;
1274         sq->retry = 0;
1275         sq->to_be_deleted = 0;
1276 #ifdef UNBOUND_DEBUG
1277         ins = 
1278 #else
1279         (void)
1280 #endif
1281         rbtree_insert(outnet->serviced, &sq->node);
1282         log_assert(ins != NULL); /* must not be already present */
1283         return sq;
1284 }
1285
1286 /** remove waiting tcp from the outnet waiting list */
1287 static void
1288 waiting_list_remove(struct outside_network* outnet, struct waiting_tcp* w)
1289 {
1290         struct waiting_tcp* p = outnet->tcp_wait_first, *prev = NULL;
1291         while(p) {
1292                 if(p == w) {
1293                         /* remove w */
1294                         if(prev)
1295                                 prev->next_waiting = w->next_waiting;
1296                         else    outnet->tcp_wait_first = w->next_waiting;
1297                         if(outnet->tcp_wait_last == w)
1298                                 outnet->tcp_wait_last = prev;
1299                         return;
1300                 }
1301                 prev = p;
1302                 p = p->next_waiting;
1303         }
1304 }
1305
1306 /** cleanup serviced query entry */
1307 static void
1308 serviced_delete(struct serviced_query* sq)
1309 {
1310         if(sq->pending) {
1311                 /* clear up the pending query */
1312                 if(sq->status == serviced_query_UDP_EDNS ||
1313                         sq->status == serviced_query_UDP ||
1314                         sq->status == serviced_query_PROBE_EDNS ||
1315                         sq->status == serviced_query_UDP_EDNS_FRAG ||
1316                         sq->status == serviced_query_UDP_EDNS_fallback) {
1317                         struct pending* p = (struct pending*)sq->pending;
1318                         if(p->pc)
1319                                 portcomm_loweruse(sq->outnet, p->pc);
1320                         pending_delete(sq->outnet, p);
1321                         /* this call can cause reentrant calls back into the
1322                          * mesh */
1323                         outnet_send_wait_udp(sq->outnet);
1324                 } else {
1325                         struct waiting_tcp* p = (struct waiting_tcp*)
1326                                 sq->pending;
1327                         if(p->pkt == NULL) {
1328                                 decomission_pending_tcp(sq->outnet, 
1329                                         (struct pending_tcp*)p->next_waiting);
1330                         } else {
1331                                 waiting_list_remove(sq->outnet, p);
1332                                 waiting_tcp_delete(p);
1333                         }
1334                 }
1335         }
1336         /* does not delete from tree, caller has to do that */
1337         serviced_node_del(&sq->node, NULL);
1338 }
1339
1340 /** perturb a dname capitalization randomly */
1341 static void
1342 serviced_perturb_qname(struct ub_randstate* rnd, uint8_t* qbuf, size_t len)
1343 {
1344         uint8_t lablen;
1345         uint8_t* d = qbuf + 10;
1346         long int random = 0;
1347         int bits = 0;
1348         log_assert(len >= 10 + 5 /* offset qname, root, qtype, qclass */);
1349         lablen = *d++;
1350         while(lablen) {
1351                 while(lablen--) {
1352                         /* only perturb A-Z, a-z */
1353                         if(isalpha((unsigned char)*d)) {
1354                                 /* get a random bit */  
1355                                 if(bits == 0) {
1356                                         random = ub_random(rnd);
1357                                         bits = 30;
1358                                 }
1359                                 if(random & 0x1) {
1360                                         *d = (uint8_t)toupper((unsigned char)*d);
1361                                 } else {
1362                                         *d = (uint8_t)tolower((unsigned char)*d);
1363                                 }
1364                                 random >>= 1;
1365                                 bits--;
1366                         }
1367                         d++;
1368                 }
1369                 lablen = *d++;
1370         }
1371         if(verbosity >= VERB_ALGO) {
1372                 char buf[LDNS_MAX_DOMAINLEN+1];
1373                 dname_str(qbuf+10, buf);
1374                 verbose(VERB_ALGO, "qname perturbed to %s", buf);
1375         }
1376 }
1377
1378 /** put serviced query into a buffer */
1379 static void
1380 serviced_encode(struct serviced_query* sq, sldns_buffer* buff, int with_edns)
1381 {
1382         /* if we are using 0x20 bits for ID randomness, perturb them */
1383         if(sq->outnet->use_caps_for_id && !sq->nocaps) {
1384                 serviced_perturb_qname(sq->outnet->rnd, sq->qbuf, sq->qbuflen);
1385         }
1386         /* generate query */
1387         sldns_buffer_clear(buff);
1388         sldns_buffer_write_u16(buff, 0); /* id placeholder */
1389         sldns_buffer_write(buff, sq->qbuf, sq->qbuflen);
1390         sldns_buffer_flip(buff);
1391         if(with_edns) {
1392                 /* add edns section */
1393                 struct edns_data edns;
1394                 edns.edns_present = 1;
1395                 edns.ext_rcode = 0;
1396                 edns.edns_version = EDNS_ADVERTISED_VERSION;
1397                 if(sq->status == serviced_query_UDP_EDNS_FRAG) {
1398                         if(addr_is_ip6(&sq->addr, sq->addrlen)) {
1399                                 if(EDNS_FRAG_SIZE_IP6 < EDNS_ADVERTISED_SIZE)
1400                                         edns.udp_size = EDNS_FRAG_SIZE_IP6;
1401                                 else    edns.udp_size = EDNS_ADVERTISED_SIZE;
1402                         } else {
1403                                 if(EDNS_FRAG_SIZE_IP4 < EDNS_ADVERTISED_SIZE)
1404                                         edns.udp_size = EDNS_FRAG_SIZE_IP4;
1405                                 else    edns.udp_size = EDNS_ADVERTISED_SIZE;
1406                         }
1407                 } else {
1408                         edns.udp_size = EDNS_ADVERTISED_SIZE;
1409                 }
1410                 edns.bits = 0;
1411                 if(sq->dnssec & EDNS_DO)
1412                         edns.bits = EDNS_DO;
1413                 if(sq->dnssec & BIT_CD)
1414                         LDNS_CD_SET(sldns_buffer_begin(buff));
1415                 attach_edns_record(buff, &edns);
1416         }
1417 }
1418
1419 /**
1420  * Perform serviced query UDP sending operation.
1421  * Sends UDP with EDNS, unless infra host marked non EDNS.
1422  * @param sq: query to send.
1423  * @param buff: buffer scratch space.
1424  * @return 0 on error.
1425  */
1426 static int
1427 serviced_udp_send(struct serviced_query* sq, sldns_buffer* buff)
1428 {
1429         int rtt, vs;
1430         uint8_t edns_lame_known;
1431         time_t now = *sq->outnet->now_secs;
1432
1433         if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone,
1434                 sq->zonelen, now, &vs, &edns_lame_known, &rtt))
1435                 return 0;
1436         sq->last_rtt = rtt;
1437         verbose(VERB_ALGO, "EDNS lookup known=%d vs=%d", edns_lame_known, vs);
1438         if(sq->status == serviced_initial) {
1439                 if(edns_lame_known == 0 && rtt > 5000 && rtt < 10001) {
1440                         /* perform EDNS lame probe - check if server is
1441                          * EDNS lame (EDNS queries to it are dropped) */
1442                         verbose(VERB_ALGO, "serviced query: send probe to see "
1443                                 " if use of EDNS causes timeouts");
1444                         /* even 700 msec may be too small */
1445                         rtt = 1000;
1446                         sq->status = serviced_query_PROBE_EDNS;
1447                 } else if(vs != -1) {
1448                         sq->status = serviced_query_UDP_EDNS;
1449                 } else {        
1450                         sq->status = serviced_query_UDP; 
1451                 }
1452         }
1453         serviced_encode(sq, buff, (sq->status == serviced_query_UDP_EDNS) ||
1454                 (sq->status == serviced_query_UDP_EDNS_FRAG));
1455         sq->last_sent_time = *sq->outnet->now_tv;
1456         sq->edns_lame_known = (int)edns_lame_known;
1457         verbose(VERB_ALGO, "serviced query UDP timeout=%d msec", rtt);
1458         sq->pending = pending_udp_query(sq, buff, rtt,
1459                 serviced_udp_callback, sq);
1460         if(!sq->pending)
1461                 return 0;
1462         return 1;
1463 }
1464
1465 /** check that perturbed qname is identical */
1466 static int
1467 serviced_check_qname(sldns_buffer* pkt, uint8_t* qbuf, size_t qbuflen)
1468 {
1469         uint8_t* d1 = sldns_buffer_at(pkt, 12);
1470         uint8_t* d2 = qbuf+10;
1471         uint8_t len1, len2;
1472         int count = 0;
1473         log_assert(qbuflen >= 15 /* 10 header, root, type, class */);
1474         len1 = *d1++;
1475         len2 = *d2++;
1476         if(sldns_buffer_limit(pkt) < 12+1+4) /* packet too small for qname */
1477                 return 0;
1478         while(len1 != 0 || len2 != 0) {
1479                 if(LABEL_IS_PTR(len1)) {
1480                         d1 = sldns_buffer_at(pkt, PTR_OFFSET(len1, *d1));
1481                         if(d1 >= sldns_buffer_at(pkt, sldns_buffer_limit(pkt)))
1482                                 return 0;
1483                         len1 = *d1++;
1484                         if(count++ > MAX_COMPRESS_PTRS)
1485                                 return 0;
1486                         continue;
1487                 }
1488                 if(d2 > qbuf+qbuflen)
1489                         return 0;
1490                 if(len1 != len2)
1491                         return 0;
1492                 if(len1 > LDNS_MAX_LABELLEN)
1493                         return 0;
1494                 log_assert(len1 <= LDNS_MAX_LABELLEN);
1495                 log_assert(len2 <= LDNS_MAX_LABELLEN);
1496                 log_assert(len1 == len2 && len1 != 0);
1497                 /* compare the labels - bitwise identical */
1498                 if(memcmp(d1, d2, len1) != 0)
1499                         return 0;
1500                 d1 += len1;
1501                 d2 += len2;
1502                 len1 = *d1++;
1503                 len2 = *d2++;
1504         }
1505         return 1;
1506 }
1507
1508 /** call the callbacks for a serviced query */
1509 static void
1510 serviced_callbacks(struct serviced_query* sq, int error, struct comm_point* c,
1511         struct comm_reply* rep)
1512 {
1513         struct service_callback* p;
1514         int dobackup = (sq->cblist && sq->cblist->next); /* >1 cb*/
1515         uint8_t *backup_p = NULL;
1516         size_t backlen = 0;
1517 #ifdef UNBOUND_DEBUG
1518         rbnode_t* rem =
1519 #else
1520         (void)
1521 #endif
1522         /* remove from tree, and schedule for deletion, so that callbacks
1523          * can safely deregister themselves and even create new serviced
1524          * queries that are identical to this one. */
1525         rbtree_delete(sq->outnet->serviced, sq);
1526         log_assert(rem); /* should have been present */
1527         sq->to_be_deleted = 1; 
1528         verbose(VERB_ALGO, "svcd callbacks start");
1529         if(sq->outnet->use_caps_for_id && error == NETEVENT_NOERROR && c &&
1530                 !sq->nocaps) {
1531                 /* noerror and nxdomain must have a qname in reply */
1532                 if(sldns_buffer_read_u16_at(c->buffer, 4) == 0 &&
1533                         (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
1534                                 == LDNS_RCODE_NOERROR || 
1535                          LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer))
1536                                 == LDNS_RCODE_NXDOMAIN)) {
1537                         verbose(VERB_DETAIL, "no qname in reply to check 0x20ID");
1538                         log_addr(VERB_DETAIL, "from server", 
1539                                 &sq->addr, sq->addrlen);
1540                         log_buf(VERB_DETAIL, "for packet", c->buffer);
1541                         error = NETEVENT_CLOSED;
1542                         c = NULL;
1543                 } else if(sldns_buffer_read_u16_at(c->buffer, 4) > 0 &&
1544                         !serviced_check_qname(c->buffer, sq->qbuf, 
1545                         sq->qbuflen)) {
1546                         verbose(VERB_DETAIL, "wrong 0x20-ID in reply qname");
1547                         log_addr(VERB_DETAIL, "from server", 
1548                                 &sq->addr, sq->addrlen);
1549                         log_buf(VERB_DETAIL, "for packet", c->buffer);
1550                         error = NETEVENT_CAPSFAIL;
1551                         /* and cleanup too */
1552                         pkt_dname_tolower(c->buffer, 
1553                                 sldns_buffer_at(c->buffer, 12));
1554                 } else {
1555                         verbose(VERB_ALGO, "good 0x20-ID in reply qname");
1556                         /* cleanup caps, prettier cache contents. */
1557                         pkt_dname_tolower(c->buffer, 
1558                                 sldns_buffer_at(c->buffer, 12));
1559                 }
1560         }
1561         if(dobackup && c) {
1562                 /* make a backup of the query, since the querystate processing
1563                  * may send outgoing queries that overwrite the buffer.
1564                  * use secondary buffer to store the query.
1565                  * This is a data copy, but faster than packet to server */
1566                 backlen = sldns_buffer_limit(c->buffer);
1567                 backup_p = memdup(sldns_buffer_begin(c->buffer), backlen);
1568                 if(!backup_p) {
1569                         log_err("malloc failure in serviced query callbacks");
1570                         error = NETEVENT_CLOSED;
1571                         c = NULL;
1572                 }
1573                 sq->outnet->svcd_overhead = backlen;
1574         }
1575         /* test the actual sq->cblist, because the next elem could be deleted*/
1576         while((p=sq->cblist) != NULL) {
1577                 sq->cblist = p->next; /* remove this element */
1578                 if(dobackup && c) {
1579                         sldns_buffer_clear(c->buffer);
1580                         sldns_buffer_write(c->buffer, backup_p, backlen);
1581                         sldns_buffer_flip(c->buffer);
1582                 }
1583                 fptr_ok(fptr_whitelist_serviced_query(p->cb));
1584                 (void)(*p->cb)(c, p->cb_arg, error, rep);
1585                 free(p);
1586         }
1587         if(backup_p) {
1588                 free(backup_p);
1589                 sq->outnet->svcd_overhead = 0;
1590         }
1591         verbose(VERB_ALGO, "svcd callbacks end");
1592         log_assert(sq->cblist == NULL);
1593         serviced_delete(sq);
1594 }
1595
1596 int 
1597 serviced_tcp_callback(struct comm_point* c, void* arg, int error,
1598         struct comm_reply* rep)
1599 {
1600         struct serviced_query* sq = (struct serviced_query*)arg;
1601         struct comm_reply r2;
1602         sq->pending = NULL; /* removed after this callback */
1603         if(error != NETEVENT_NOERROR)
1604                 log_addr(VERB_QUERY, "tcp error for address", 
1605                         &sq->addr, sq->addrlen);
1606         if(error==NETEVENT_NOERROR)
1607                 infra_update_tcp_works(sq->outnet->infra, &sq->addr,
1608                         sq->addrlen, sq->zone, sq->zonelen);
1609 #ifdef USE_DNSTAP
1610         if(error==NETEVENT_NOERROR && sq->outnet->dtenv &&
1611            (sq->outnet->dtenv->log_resolver_response_messages ||
1612             sq->outnet->dtenv->log_forwarder_response_messages))
1613                 dt_msg_send_outside_response(sq->outnet->dtenv, &sq->addr,
1614                 c->type, sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen,
1615                 &sq->last_sent_time, sq->outnet->now_tv, c->buffer);
1616 #endif
1617         if(error==NETEVENT_NOERROR && sq->status == serviced_query_TCP_EDNS &&
1618                 (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == 
1619                 LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(sldns_buffer_begin(
1620                 c->buffer)) == LDNS_RCODE_NOTIMPL) ) {
1621                 /* attempt to fallback to nonEDNS */
1622                 sq->status = serviced_query_TCP_EDNS_fallback;
1623                 serviced_tcp_initiate(sq, c->buffer);
1624                 return 0;
1625         } else if(error==NETEVENT_NOERROR && 
1626                 sq->status == serviced_query_TCP_EDNS_fallback &&
1627                         (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == 
1628                         LDNS_RCODE_NOERROR || LDNS_RCODE_WIRE(
1629                         sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NXDOMAIN 
1630                         || LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) 
1631                         == LDNS_RCODE_YXDOMAIN)) {
1632                 /* the fallback produced a result that looks promising, note
1633                  * that this server should be approached without EDNS */
1634                 /* only store noEDNS in cache if domain is noDNSSEC */
1635                 if(!sq->want_dnssec)
1636                   if(!infra_edns_update(sq->outnet->infra, &sq->addr, 
1637                         sq->addrlen, sq->zone, sq->zonelen, -1,
1638                         *sq->outnet->now_secs))
1639                         log_err("Out of memory caching no edns for host");
1640                 sq->status = serviced_query_TCP;
1641         }
1642         if(sq->tcp_upstream || sq->ssl_upstream) {
1643             struct timeval now = *sq->outnet->now_tv;
1644             if(now.tv_sec > sq->last_sent_time.tv_sec ||
1645                 (now.tv_sec == sq->last_sent_time.tv_sec &&
1646                 now.tv_usec > sq->last_sent_time.tv_usec)) {
1647                 /* convert from microseconds to milliseconds */
1648                 int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000
1649                   + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000;
1650                 verbose(VERB_ALGO, "measured TCP-time at %d msec", roundtime);
1651                 log_assert(roundtime >= 0);
1652                 /* only store if less then AUTH_TIMEOUT seconds, it could be
1653                  * huge due to system-hibernated and we woke up */
1654                 if(roundtime < TCP_AUTH_QUERY_TIMEOUT*1000) {
1655                     if(!infra_rtt_update(sq->outnet->infra, &sq->addr,
1656                         sq->addrlen, sq->zone, sq->zonelen, sq->qtype,
1657                         roundtime, sq->last_rtt, (time_t)now.tv_sec))
1658                         log_err("out of memory noting rtt.");
1659                 }
1660             }
1661         }
1662         /* insert address into reply info */
1663         if(!rep) {
1664                 /* create one if there isn't (on errors) */
1665                 rep = &r2;
1666                 r2.c = c;
1667         }
1668         memcpy(&rep->addr, &sq->addr, sq->addrlen);
1669         rep->addrlen = sq->addrlen;
1670         serviced_callbacks(sq, error, c, rep);
1671         return 0;
1672 }
1673
1674 static void
1675 serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff)
1676 {
1677         verbose(VERB_ALGO, "initiate TCP query %s", 
1678                 sq->status==serviced_query_TCP_EDNS?"EDNS":"");
1679         serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS);
1680         sq->last_sent_time = *sq->outnet->now_tv;
1681         sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT,
1682                 serviced_tcp_callback, sq);
1683         if(!sq->pending) {
1684                 /* delete from tree so that a retry by above layer does not
1685                  * clash with this entry */
1686                 log_err("serviced_tcp_initiate: failed to send tcp query");
1687                 serviced_callbacks(sq, NETEVENT_CLOSED, NULL, NULL);
1688         }
1689 }
1690
1691 /** Send serviced query over TCP return false on initial failure */
1692 static int
1693 serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff)
1694 {
1695         int vs, rtt;
1696         uint8_t edns_lame_known;
1697         if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone,
1698                 sq->zonelen, *sq->outnet->now_secs, &vs, &edns_lame_known,
1699                 &rtt))
1700                 return 0;
1701         if(vs != -1)
1702                 sq->status = serviced_query_TCP_EDNS;
1703         else    sq->status = serviced_query_TCP;
1704         serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS);
1705         sq->last_sent_time = *sq->outnet->now_tv;
1706         sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT,
1707                 serviced_tcp_callback, sq);
1708         return sq->pending != NULL;
1709 }
1710
1711 int 
1712 serviced_udp_callback(struct comm_point* c, void* arg, int error,
1713         struct comm_reply* rep)
1714 {
1715         struct serviced_query* sq = (struct serviced_query*)arg;
1716         struct outside_network* outnet = sq->outnet;
1717         struct timeval now = *sq->outnet->now_tv;
1718         int fallback_tcp = 0;
1719
1720         sq->pending = NULL; /* removed after callback */
1721         if(error == NETEVENT_TIMEOUT) {
1722                 int rto = 0;
1723                 if(sq->status == serviced_query_PROBE_EDNS) {
1724                         /* non-EDNS probe failed; we do not know its status,
1725                          * keep trying with EDNS, timeout may not be caused
1726                          * by EDNS. */
1727                         sq->status = serviced_query_UDP_EDNS;
1728                 }
1729                 if(sq->status == serviced_query_UDP_EDNS && sq->last_rtt < 5000) {
1730                         /* fallback to 1480/1280 */
1731                         sq->status = serviced_query_UDP_EDNS_FRAG;
1732                         log_name_addr(VERB_ALGO, "try edns1xx0", sq->qbuf+10,
1733                                 &sq->addr, sq->addrlen);
1734                         if(!serviced_udp_send(sq, c->buffer)) {
1735                                 serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
1736                         }
1737                         return 0;
1738                 }
1739                 if(sq->status == serviced_query_UDP_EDNS_FRAG) {
1740                         /* fragmentation size did not fix it */
1741                         sq->status = serviced_query_UDP_EDNS;
1742                 }
1743                 sq->retry++;
1744                 if(!(rto=infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
1745                         sq->zone, sq->zonelen, sq->qtype, -1, sq->last_rtt,
1746                         (time_t)now.tv_sec)))
1747                         log_err("out of memory in UDP exponential backoff");
1748                 if(sq->retry < OUTBOUND_UDP_RETRY) {
1749                         log_name_addr(VERB_ALGO, "retry query", sq->qbuf+10,
1750                                 &sq->addr, sq->addrlen);
1751                         if(!serviced_udp_send(sq, c->buffer)) {
1752                                 serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
1753                         }
1754                         return 0;
1755                 }
1756                 if(rto >= RTT_MAX_TIMEOUT) {
1757                         fallback_tcp = 1;
1758                         /* UDP does not work, fallback to TCP below */
1759                 } else {
1760                         serviced_callbacks(sq, NETEVENT_TIMEOUT, c, rep);
1761                         return 0;
1762                 }
1763         } else if(error != NETEVENT_NOERROR) {
1764                 /* udp returns error (due to no ID or interface available) */
1765                 serviced_callbacks(sq, error, c, rep);
1766                 return 0;
1767         }
1768 #ifdef USE_DNSTAP
1769         if(outnet->dtenv &&
1770            (outnet->dtenv->log_resolver_response_messages ||
1771             outnet->dtenv->log_forwarder_response_messages))
1772                 dt_msg_send_outside_response(outnet->dtenv, &sq->addr, c->type,
1773                 sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen,
1774                 &sq->last_sent_time, sq->outnet->now_tv, c->buffer);
1775 #endif
1776         if(!fallback_tcp) {
1777             if( (sq->status == serviced_query_UDP_EDNS 
1778                 ||sq->status == serviced_query_UDP_EDNS_FRAG)
1779                 && (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) 
1780                         == LDNS_RCODE_FORMERR || LDNS_RCODE_WIRE(
1781                         sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOTIMPL)) {
1782                 /* try to get an answer by falling back without EDNS */
1783                 verbose(VERB_ALGO, "serviced query: attempt without EDNS");
1784                 sq->status = serviced_query_UDP_EDNS_fallback;
1785                 sq->retry = 0;
1786                 if(!serviced_udp_send(sq, c->buffer)) {
1787                         serviced_callbacks(sq, NETEVENT_CLOSED, c, rep);
1788                 }
1789                 return 0;
1790             } else if(sq->status == serviced_query_PROBE_EDNS) {
1791                 /* probe without EDNS succeeds, so we conclude that this
1792                  * host likely has EDNS packets dropped */
1793                 log_addr(VERB_DETAIL, "timeouts, concluded that connection to "
1794                         "host drops EDNS packets", &sq->addr, sq->addrlen);
1795                 /* only store noEDNS in cache if domain is noDNSSEC */
1796                 if(!sq->want_dnssec)
1797                   if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
1798                         sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) {
1799                         log_err("Out of memory caching no edns for host");
1800                   }
1801                 sq->status = serviced_query_UDP;
1802             } else if(sq->status == serviced_query_UDP_EDNS && 
1803                 !sq->edns_lame_known) {
1804                 /* now we know that edns queries received answers store that */
1805                 log_addr(VERB_ALGO, "serviced query: EDNS works for",
1806                         &sq->addr, sq->addrlen);
1807                 if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen, 
1808                         sq->zone, sq->zonelen, 0, (time_t)now.tv_sec)) {
1809                         log_err("Out of memory caching edns works");
1810                 }
1811                 sq->edns_lame_known = 1;
1812             } else if(sq->status == serviced_query_UDP_EDNS_fallback &&
1813                 !sq->edns_lame_known && (LDNS_RCODE_WIRE(
1814                 sldns_buffer_begin(c->buffer)) == LDNS_RCODE_NOERROR || 
1815                 LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == 
1816                 LDNS_RCODE_NXDOMAIN || LDNS_RCODE_WIRE(sldns_buffer_begin(
1817                 c->buffer)) == LDNS_RCODE_YXDOMAIN)) {
1818                 /* the fallback produced a result that looks promising, note
1819                  * that this server should be approached without EDNS */
1820                 /* only store noEDNS in cache if domain is noDNSSEC */
1821                 if(!sq->want_dnssec) {
1822                   log_addr(VERB_ALGO, "serviced query: EDNS fails for",
1823                         &sq->addr, sq->addrlen);
1824                   if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
1825                         sq->zone, sq->zonelen, -1, (time_t)now.tv_sec)) {
1826                         log_err("Out of memory caching no edns for host");
1827                   }
1828                 } else {
1829                   log_addr(VERB_ALGO, "serviced query: EDNS fails, but "
1830                         "not stored because need DNSSEC for", &sq->addr,
1831                         sq->addrlen);
1832                 }
1833                 sq->status = serviced_query_UDP;
1834             }
1835             if(now.tv_sec > sq->last_sent_time.tv_sec ||
1836                 (now.tv_sec == sq->last_sent_time.tv_sec &&
1837                 now.tv_usec > sq->last_sent_time.tv_usec)) {
1838                 /* convert from microseconds to milliseconds */
1839                 int roundtime = ((int)(now.tv_sec - sq->last_sent_time.tv_sec))*1000
1840                   + ((int)now.tv_usec - (int)sq->last_sent_time.tv_usec)/1000;
1841                 verbose(VERB_ALGO, "measured roundtrip at %d msec", roundtime);
1842                 log_assert(roundtime >= 0);
1843                 /* in case the system hibernated, do not enter a huge value,
1844                  * above this value gives trouble with server selection */
1845                 if(roundtime < 60000) {
1846                     if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen, 
1847                         sq->zone, sq->zonelen, sq->qtype, roundtime,
1848                         sq->last_rtt, (time_t)now.tv_sec))
1849                         log_err("out of memory noting rtt.");
1850                 }
1851             }
1852         } /* end of if_!fallback_tcp */
1853         /* perform TC flag check and TCP fallback after updating our
1854          * cache entries for EDNS status and RTT times */
1855         if(LDNS_TC_WIRE(sldns_buffer_begin(c->buffer)) || fallback_tcp) {
1856                 /* fallback to TCP */
1857                 /* this discards partial UDP contents */
1858                 if(sq->status == serviced_query_UDP_EDNS ||
1859                         sq->status == serviced_query_UDP_EDNS_FRAG ||
1860                         sq->status == serviced_query_UDP_EDNS_fallback)
1861                         /* if we have unfinished EDNS_fallback, start again */
1862                         sq->status = serviced_query_TCP_EDNS;
1863                 else    sq->status = serviced_query_TCP;
1864                 serviced_tcp_initiate(sq, c->buffer);
1865                 return 0;
1866         }
1867         /* yay! an answer */
1868         serviced_callbacks(sq, error, c, rep);
1869         return 0;
1870 }
1871
1872 struct serviced_query* 
1873 outnet_serviced_query(struct outside_network* outnet,
1874         uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass,
1875         uint16_t flags, int dnssec, int want_dnssec, int nocaps,
1876         int tcp_upstream, int ssl_upstream, struct sockaddr_storage* addr,
1877         socklen_t addrlen, uint8_t* zone, size_t zonelen,
1878         comm_point_callback_t* callback, void* callback_arg,
1879         sldns_buffer* buff)
1880 {
1881         struct serviced_query* sq;
1882         struct service_callback* cb;
1883         serviced_gen_query(buff, qname, qnamelen, qtype, qclass, flags);
1884         sq = lookup_serviced(outnet, buff, dnssec, addr, addrlen);
1885         /* duplicate entries are included in the callback list, because
1886          * there is a counterpart registration by our caller that needs to
1887          * be doubly-removed (with callbacks perhaps). */
1888         if(!(cb = (struct service_callback*)malloc(sizeof(*cb))))
1889                 return NULL;
1890         if(!sq) {
1891                 /* make new serviced query entry */
1892                 sq = serviced_create(outnet, buff, dnssec, want_dnssec, nocaps,
1893                         tcp_upstream, ssl_upstream, addr, addrlen, zone,
1894                         zonelen, (int)qtype);
1895                 if(!sq) {
1896                         free(cb);
1897                         return NULL;
1898                 }
1899                 /* perform first network action */
1900                 if(outnet->do_udp && !(tcp_upstream || ssl_upstream)) {
1901                         if(!serviced_udp_send(sq, buff)) {
1902                                 (void)rbtree_delete(outnet->serviced, sq);
1903                                 free(sq->qbuf);
1904                                 free(sq->zone);
1905                                 free(sq);
1906                                 free(cb);
1907                                 return NULL;
1908                         }
1909                 } else {
1910                         if(!serviced_tcp_send(sq, buff)) {
1911                                 (void)rbtree_delete(outnet->serviced, sq);
1912                                 free(sq->qbuf);
1913                                 free(sq->zone);
1914                                 free(sq);
1915                                 free(cb);
1916                                 return NULL;
1917                         }
1918                 }
1919         }
1920         /* add callback to list of callbacks */
1921         cb->cb = callback;
1922         cb->cb_arg = callback_arg;
1923         cb->next = sq->cblist;
1924         sq->cblist = cb;
1925         return sq;
1926 }
1927
1928 /** remove callback from list */
1929 static void
1930 callback_list_remove(struct serviced_query* sq, void* cb_arg)
1931 {
1932         struct service_callback** pp = &sq->cblist;
1933         while(*pp) {
1934                 if((*pp)->cb_arg == cb_arg) {
1935                         struct service_callback* del = *pp;
1936                         *pp = del->next;
1937                         free(del);
1938                         return;
1939                 }
1940                 pp = &(*pp)->next;
1941         }
1942 }
1943
1944 void outnet_serviced_query_stop(struct serviced_query* sq, void* cb_arg)
1945 {
1946         if(!sq) 
1947                 return;
1948         callback_list_remove(sq, cb_arg);
1949         /* if callbacks() routine scheduled deletion, let it do that */
1950         if(!sq->cblist && !sq->to_be_deleted) {
1951 #ifdef UNBOUND_DEBUG
1952                 rbnode_t* rem =
1953 #else
1954                 (void)
1955 #endif
1956                 rbtree_delete(sq->outnet->serviced, sq);
1957                 log_assert(rem); /* should be present */
1958                 serviced_delete(sq); 
1959         }
1960 }
1961
1962 /** get memory used by waiting tcp entry (in use or not) */
1963 static size_t
1964 waiting_tcp_get_mem(struct waiting_tcp* w)
1965 {
1966         size_t s;
1967         if(!w) return 0;
1968         s = sizeof(*w) + w->pkt_len;
1969         if(w->timer)
1970                 s += comm_timer_get_mem(w->timer);
1971         return s;
1972 }
1973
1974 /** get memory used by port if */
1975 static size_t
1976 if_get_mem(struct port_if* pif)
1977 {
1978         size_t s;
1979         int i;
1980         s = sizeof(*pif) + sizeof(int)*pif->avail_total +
1981                 sizeof(struct port_comm*)*pif->maxout;
1982         for(i=0; i<pif->inuse; i++)
1983                 s += sizeof(*pif->out[i]) + 
1984                         comm_point_get_mem(pif->out[i]->cp);
1985         return s;
1986 }
1987
1988 /** get memory used by waiting udp */
1989 static size_t
1990 waiting_udp_get_mem(struct pending* w)
1991 {
1992         size_t s;
1993         s = sizeof(*w) + comm_timer_get_mem(w->timer) + w->pkt_len;
1994         return s;
1995 }
1996
1997 size_t outnet_get_mem(struct outside_network* outnet)
1998 {
1999         size_t i;
2000         int k;
2001         struct waiting_tcp* w;
2002         struct pending* u;
2003         struct serviced_query* sq;
2004         struct service_callback* sb;
2005         struct port_comm* pc;
2006         size_t s = sizeof(*outnet) + sizeof(*outnet->base) + 
2007                 sizeof(*outnet->udp_buff) + 
2008                 sldns_buffer_capacity(outnet->udp_buff);
2009         /* second buffer is not ours */
2010         for(pc = outnet->unused_fds; pc; pc = pc->next) {
2011                 s += sizeof(*pc) + comm_point_get_mem(pc->cp);
2012         }
2013         for(k=0; k<outnet->num_ip4; k++)
2014                 s += if_get_mem(&outnet->ip4_ifs[k]);
2015         for(k=0; k<outnet->num_ip6; k++)
2016                 s += if_get_mem(&outnet->ip6_ifs[k]);
2017         for(u=outnet->udp_wait_first; u; u=u->next_waiting)
2018                 s += waiting_udp_get_mem(u);
2019         
2020         s += sizeof(struct pending_tcp*)*outnet->num_tcp;
2021         for(i=0; i<outnet->num_tcp; i++) {
2022                 s += sizeof(struct pending_tcp);
2023                 s += comm_point_get_mem(outnet->tcp_conns[i]->c);
2024                 if(outnet->tcp_conns[i]->query)
2025                         s += waiting_tcp_get_mem(outnet->tcp_conns[i]->query);
2026         }
2027         for(w=outnet->tcp_wait_first; w; w = w->next_waiting)
2028                 s += waiting_tcp_get_mem(w);
2029         s += sizeof(*outnet->pending);
2030         s += (sizeof(struct pending) + comm_timer_get_mem(NULL)) * 
2031                 outnet->pending->count;
2032         s += sizeof(*outnet->serviced);
2033         s += outnet->svcd_overhead;
2034         RBTREE_FOR(sq, struct serviced_query*, outnet->serviced) {
2035                 s += sizeof(*sq) + sq->qbuflen;
2036                 for(sb = sq->cblist; sb; sb = sb->next)
2037                         s += sizeof(*sb);
2038         }
2039         return s;
2040 }
2041
2042 size_t 
2043 serviced_get_mem(struct serviced_query* sq)
2044 {
2045         struct service_callback* sb;
2046         size_t s;
2047         s = sizeof(*sq) + sq->qbuflen;
2048         for(sb = sq->cblist; sb; sb = sb->next)
2049                 s += sizeof(*sb);
2050         if(sq->status == serviced_query_UDP_EDNS ||
2051                 sq->status == serviced_query_UDP ||
2052                 sq->status == serviced_query_PROBE_EDNS ||
2053                 sq->status == serviced_query_UDP_EDNS_FRAG ||
2054                 sq->status == serviced_query_UDP_EDNS_fallback) {
2055                 s += sizeof(struct pending);
2056                 s += comm_timer_get_mem(NULL);
2057         } else {
2058                 /* does not have size of the pkt pointer */
2059                 /* always has a timer except on malloc failures */
2060
2061                 /* these sizes are part of the main outside network mem */
2062                 /*
2063                 s += sizeof(struct waiting_tcp);
2064                 s += comm_timer_get_mem(NULL);
2065                 */
2066         }
2067         return s;
2068 }
2069