]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/unbound/iterator/iter_utils.c
Merge llvm-project release/15.x llvmorg-15.0.0-rc2-40-gfbd2950d8d0d
[FreeBSD/FreeBSD.git] / contrib / unbound / iterator / iter_utils.c
1 /*
2  * iterator/iter_utils.c - iterative resolver module utility functions.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35
36 /**
37  * \file
38  *
39  * This file contains functions to assist the iterator module.
40  * Configuration options. Forward zones.
41  */
42 #include "config.h"
43 #include "iterator/iter_utils.h"
44 #include "iterator/iterator.h"
45 #include "iterator/iter_hints.h"
46 #include "iterator/iter_fwd.h"
47 #include "iterator/iter_donotq.h"
48 #include "iterator/iter_delegpt.h"
49 #include "iterator/iter_priv.h"
50 #include "services/cache/infra.h"
51 #include "services/cache/dns.h"
52 #include "services/cache/rrset.h"
53 #include "services/outside_network.h"
54 #include "util/net_help.h"
55 #include "util/module.h"
56 #include "util/log.h"
57 #include "util/config_file.h"
58 #include "util/regional.h"
59 #include "util/data/msgparse.h"
60 #include "util/data/dname.h"
61 #include "util/random.h"
62 #include "util/fptr_wlist.h"
63 #include "validator/val_anchor.h"
64 #include "validator/val_kcache.h"
65 #include "validator/val_kentry.h"
66 #include "validator/val_utils.h"
67 #include "validator/val_sigcrypt.h"
68 #include "sldns/sbuffer.h"
69 #include "sldns/str2wire.h"
70
71 /** time when nameserver glue is said to be 'recent' */
72 #define SUSPICION_RECENT_EXPIRY 86400
73
74 /** fillup fetch policy array */
75 static void
76 fetch_fill(struct iter_env* ie, const char* str)
77 {
78         char* s = (char*)str, *e;
79         int i;
80         for(i=0; i<ie->max_dependency_depth+1; i++) {
81                 ie->target_fetch_policy[i] = strtol(s, &e, 10);
82                 if(s == e)
83                         fatal_exit("cannot parse fetch policy number %s", s);
84                 s = e;
85         }
86 }
87
88 /** Read config string that represents the target fetch policy */
89 static int
90 read_fetch_policy(struct iter_env* ie, const char* str)
91 {
92         int count = cfg_count_numbers(str);
93         if(count < 1) {
94                 log_err("Cannot parse target fetch policy: \"%s\"", str);
95                 return 0;
96         }
97         ie->max_dependency_depth = count - 1;
98         ie->target_fetch_policy = (int*)calloc(
99                 (size_t)ie->max_dependency_depth+1, sizeof(int));
100         if(!ie->target_fetch_policy) {
101                 log_err("alloc fetch policy: out of memory");
102                 return 0;
103         }
104         fetch_fill(ie, str);
105         return 1;
106 }
107
108 /** apply config caps whitelist items to name tree */
109 static int
110 caps_white_apply_cfg(rbtree_type* ntree, struct config_file* cfg)
111 {
112         struct config_strlist* p;
113         for(p=cfg->caps_whitelist; p; p=p->next) {
114                 struct name_tree_node* n;
115                 size_t len;
116                 uint8_t* nm = sldns_str2wire_dname(p->str, &len);
117                 if(!nm) {
118                         log_err("could not parse %s", p->str);
119                         return 0;
120                 }
121                 n = (struct name_tree_node*)calloc(1, sizeof(*n));
122                 if(!n) {
123                         log_err("out of memory");
124                         free(nm);
125                         return 0;
126                 }
127                 n->node.key = n;
128                 n->name = nm;
129                 n->len = len;
130                 n->labs = dname_count_labels(nm);
131                 n->dclass = LDNS_RR_CLASS_IN;
132                 if(!name_tree_insert(ntree, n, nm, len, n->labs, n->dclass)) {
133                         /* duplicate element ignored, idempotent */
134                         free(n->name);
135                         free(n);
136                 }
137         }
138         name_tree_init_parents(ntree);
139         return 1;
140 }
141
142 int
143 iter_apply_cfg(struct iter_env* iter_env, struct config_file* cfg)
144 {
145         int i;
146         /* target fetch policy */
147         if(!read_fetch_policy(iter_env, cfg->target_fetch_policy))
148                 return 0;
149         for(i=0; i<iter_env->max_dependency_depth+1; i++)
150                 verbose(VERB_QUERY, "target fetch policy for level %d is %d",
151                         i, iter_env->target_fetch_policy[i]);
152
153         if(!iter_env->donotq)
154                 iter_env->donotq = donotq_create();
155         if(!iter_env->donotq || !donotq_apply_cfg(iter_env->donotq, cfg)) {
156                 log_err("Could not set donotqueryaddresses");
157                 return 0;
158         }
159         if(!iter_env->priv)
160                 iter_env->priv = priv_create();
161         if(!iter_env->priv || !priv_apply_cfg(iter_env->priv, cfg)) {
162                 log_err("Could not set private addresses");
163                 return 0;
164         }
165         if(cfg->caps_whitelist) {
166                 if(!iter_env->caps_white)
167                         iter_env->caps_white = rbtree_create(name_tree_compare);
168                 if(!iter_env->caps_white || !caps_white_apply_cfg(
169                         iter_env->caps_white, cfg)) {
170                         log_err("Could not set capsforid whitelist");
171                         return 0;
172                 }
173
174         }
175         iter_env->supports_ipv6 = cfg->do_ip6;
176         iter_env->supports_ipv4 = cfg->do_ip4;
177         iter_env->outbound_msg_retry = cfg->outbound_msg_retry;
178         iter_env->max_sent_count = cfg->max_sent_count;
179         iter_env->max_query_restarts = cfg->max_query_restarts;
180         return 1;
181 }
182
183 /** filter out unsuitable targets
184  * @param iter_env: iterator environment with ipv6-support flag.
185  * @param env: module environment with infra cache.
186  * @param name: zone name
187  * @param namelen: length of name
188  * @param qtype: query type (host order).
189  * @param now: current time
190  * @param a: address in delegation point we are examining.
191  * @return an integer that signals the target suitability.
192  *      as follows:
193  *      -1: The address should be omitted from the list.
194  *          Because:
195  *              o The address is bogus (DNSSEC validation failure).
196  *              o Listed as donotquery
197  *              o is ipv6 but no ipv6 support (in operating system).
198  *              o is ipv4 but no ipv4 support (in operating system).
199  *              o is lame
200  *      Otherwise, an rtt in milliseconds.
201  *      0 .. USEFUL_SERVER_TOP_TIMEOUT-1
202  *              The roundtrip time timeout estimate. less than 2 minutes.
203  *              Note that util/rtt.c has a MIN_TIMEOUT of 50 msec, thus
204  *              values 0 .. 49 are not used, unless that is changed.
205  *      USEFUL_SERVER_TOP_TIMEOUT
206  *              This value exactly is given for unresponsive blacklisted.
207  *      USEFUL_SERVER_TOP_TIMEOUT+1
208  *              For non-blacklisted servers: huge timeout, but has traffic.
209  *      USEFUL_SERVER_TOP_TIMEOUT*1 ..
210  *              parent-side lame servers get this penalty. A dispreferential
211  *              server. (lame in delegpt).
212  *      USEFUL_SERVER_TOP_TIMEOUT*2 ..
213  *              dnsseclame servers get penalty
214  *      USEFUL_SERVER_TOP_TIMEOUT*3 ..
215  *              recursion lame servers get penalty
216  *      UNKNOWN_SERVER_NICENESS
217  *              If no information is known about the server, this is
218  *              returned. 376 msec or so.
219  *      +BLACKLIST_PENALTY (of USEFUL_TOP_TIMEOUT*4) for dnssec failed IPs.
220  *
221  * When a final value is chosen that is dnsseclame ; dnsseclameness checking
222  * is turned off (so we do not discard the reply).
223  * When a final value is chosen that is recursionlame; RD bit is set on query.
224  * Because of the numbers this means recursionlame also have dnssec lameness
225  * checking turned off.
226  */
227 static int
228 iter_filter_unsuitable(struct iter_env* iter_env, struct module_env* env,
229         uint8_t* name, size_t namelen, uint16_t qtype, time_t now,
230         struct delegpt_addr* a)
231 {
232         int rtt, lame, reclame, dnsseclame;
233         if(a->bogus)
234                 return -1; /* address of server is bogus */
235         if(donotq_lookup(iter_env->donotq, &a->addr, a->addrlen)) {
236                 log_addr(VERB_ALGO, "skip addr on the donotquery list",
237                         &a->addr, a->addrlen);
238                 return -1; /* server is on the donotquery list */
239         }
240         if(!iter_env->supports_ipv6 && addr_is_ip6(&a->addr, a->addrlen)) {
241                 return -1; /* there is no ip6 available */
242         }
243         if(!iter_env->supports_ipv4 && !addr_is_ip6(&a->addr, a->addrlen)) {
244                 return -1; /* there is no ip4 available */
245         }
246         /* check lameness - need zone , class info */
247         if(infra_get_lame_rtt(env->infra_cache, &a->addr, a->addrlen,
248                 name, namelen, qtype, &lame, &dnsseclame, &reclame,
249                 &rtt, now)) {
250                 log_addr(VERB_ALGO, "servselect", &a->addr, a->addrlen);
251                 verbose(VERB_ALGO, "   rtt=%d%s%s%s%s", rtt,
252                         lame?" LAME":"",
253                         dnsseclame?" DNSSEC_LAME":"",
254                         reclame?" REC_LAME":"",
255                         a->lame?" ADDR_LAME":"");
256                 if(lame)
257                         return -1; /* server is lame */
258                 else if(rtt >= USEFUL_SERVER_TOP_TIMEOUT)
259                         /* server is unresponsive,
260                          * we used to return TOP_TIMEOUT, but fairly useless,
261                          * because if == TOP_TIMEOUT is dropped because
262                          * blacklisted later, instead, remove it here, so
263                          * other choices (that are not blacklisted) can be
264                          * tried */
265                         return -1;
266                 /* select remainder from worst to best */
267                 else if(reclame)
268                         return rtt+USEFUL_SERVER_TOP_TIMEOUT*3; /* nonpref */
269                 else if(dnsseclame || a->dnsseclame)
270                         return rtt+USEFUL_SERVER_TOP_TIMEOUT*2; /* nonpref */
271                 else if(a->lame)
272                         return rtt+USEFUL_SERVER_TOP_TIMEOUT+1; /* nonpref */
273                 else    return rtt;
274         }
275         /* no server information present */
276         if(a->dnsseclame)
277                 return UNKNOWN_SERVER_NICENESS+USEFUL_SERVER_TOP_TIMEOUT*2; /* nonpref */
278         else if(a->lame)
279                 return USEFUL_SERVER_TOP_TIMEOUT+1+UNKNOWN_SERVER_NICENESS; /* nonpref */
280         return UNKNOWN_SERVER_NICENESS;
281 }
282
283 /** lookup RTT information, and also store fastest rtt (if any) */
284 static int
285 iter_fill_rtt(struct iter_env* iter_env, struct module_env* env,
286         uint8_t* name, size_t namelen, uint16_t qtype, time_t now,
287         struct delegpt* dp, int* best_rtt, struct sock_list* blacklist,
288         size_t* num_suitable_results)
289 {
290         int got_it = 0;
291         struct delegpt_addr* a;
292         *num_suitable_results = 0;
293
294         if(dp->bogus)
295                 return 0; /* NS bogus, all bogus, nothing found */
296         for(a=dp->result_list; a; a = a->next_result) {
297                 a->sel_rtt = iter_filter_unsuitable(iter_env, env,
298                         name, namelen, qtype, now, a);
299                 if(a->sel_rtt != -1) {
300                         if(sock_list_find(blacklist, &a->addr, a->addrlen))
301                                 a->sel_rtt += BLACKLIST_PENALTY;
302
303                         if(!got_it) {
304                                 *best_rtt = a->sel_rtt;
305                                 got_it = 1;
306                         } else if(a->sel_rtt < *best_rtt) {
307                                 *best_rtt = a->sel_rtt;
308                         }
309                         (*num_suitable_results)++;
310                 }
311         }
312         return got_it;
313 }
314
315 /** compare two rtts, return -1, 0 or 1 */
316 static int
317 rtt_compare(const void* x, const void* y)
318 {
319         if(*(int*)x == *(int*)y)
320                 return 0;
321         if(*(int*)x > *(int*)y)
322                 return 1;
323         return -1;
324 }
325
326 /** get RTT for the Nth fastest server */
327 static int
328 nth_rtt(struct delegpt_addr* result_list, size_t num_results, size_t n)
329 {
330         int rtt_band;
331         size_t i;
332         int* rtt_list, *rtt_index;
333
334         if(num_results < 1 || n >= num_results) {
335                 return -1;
336         }
337
338         rtt_list = calloc(num_results, sizeof(int));
339         if(!rtt_list) {
340                 log_err("malloc failure: allocating rtt_list");
341                 return -1;
342         }
343         rtt_index = rtt_list;
344
345         for(i=0; i<num_results && result_list; i++) {
346                 if(result_list->sel_rtt != -1) {
347                         *rtt_index = result_list->sel_rtt;
348                         rtt_index++;
349                 }
350                 result_list=result_list->next_result;
351         }
352         qsort(rtt_list, num_results, sizeof(*rtt_list), rtt_compare);
353
354         log_assert(n > 0);
355         rtt_band = rtt_list[n-1];
356         free(rtt_list);
357
358         return rtt_band;
359 }
360
361 /** filter the address list, putting best targets at front,
362  * returns number of best targets (or 0, no suitable targets) */
363 static int
364 iter_filter_order(struct iter_env* iter_env, struct module_env* env,
365         uint8_t* name, size_t namelen, uint16_t qtype, time_t now,
366         struct delegpt* dp, int* selected_rtt, int open_target,
367         struct sock_list* blacklist, time_t prefetch)
368 {
369         int got_num = 0, low_rtt = 0, swap_to_front, rtt_band = RTT_BAND, nth;
370         int alllame = 0;
371         size_t num_results;
372         struct delegpt_addr* a, *n, *prev=NULL;
373
374         /* fillup sel_rtt and find best rtt in the bunch */
375         got_num = iter_fill_rtt(iter_env, env, name, namelen, qtype, now, dp,
376                 &low_rtt, blacklist, &num_results);
377         if(got_num == 0)
378                 return 0;
379         if(low_rtt >= USEFUL_SERVER_TOP_TIMEOUT &&
380                 /* If all missing (or not fully resolved) targets are lame,
381                  * then use the remaining lame address. */
382                 ((delegpt_count_missing_targets(dp, &alllame) > 0 && !alllame) ||
383                 open_target > 0)) {
384                 verbose(VERB_ALGO, "Bad choices, trying to get more choice");
385                 return 0; /* we want more choice. The best choice is a bad one.
386                              return 0 to force the caller to fetch more */
387         }
388
389         if(env->cfg->fast_server_permil != 0 && prefetch == 0 &&
390                 num_results > env->cfg->fast_server_num &&
391                 ub_random_max(env->rnd, 1000) < env->cfg->fast_server_permil) {
392                 /* the query is not prefetch, but for a downstream client,
393                  * there are more servers available then the fastest N we want
394                  * to choose from. Limit our choice to the fastest servers. */
395                 nth = nth_rtt(dp->result_list, num_results,
396                         env->cfg->fast_server_num);
397                 if(nth > 0) {
398                         rtt_band = nth - low_rtt;
399                         if(rtt_band > RTT_BAND)
400                                 rtt_band = RTT_BAND;
401                 }
402         }
403
404         got_num = 0;
405         a = dp->result_list;
406         while(a) {
407                 /* skip unsuitable targets */
408                 if(a->sel_rtt == -1) {
409                         prev = a;
410                         a = a->next_result;
411                         continue;
412                 }
413                 /* classify the server address and determine what to do */
414                 swap_to_front = 0;
415                 if(a->sel_rtt >= low_rtt && a->sel_rtt - low_rtt <= rtt_band) {
416                         got_num++;
417                         swap_to_front = 1;
418                 } else if(a->sel_rtt<low_rtt && low_rtt-a->sel_rtt<=rtt_band) {
419                         got_num++;
420                         swap_to_front = 1;
421                 }
422                 /* swap to front if necessary, or move to next result */
423                 if(swap_to_front && prev) {
424                         n = a->next_result;
425                         prev->next_result = n;
426                         a->next_result = dp->result_list;
427                         dp->result_list = a;
428                         a = n;
429                 } else {
430                         prev = a;
431                         a = a->next_result;
432                 }
433         }
434         *selected_rtt = low_rtt;
435
436         if (env->cfg->prefer_ip6) {
437                 int got_num6 = 0;
438                 int low_rtt6 = 0;
439                 int i;
440                 int attempt = -1; /* filter to make sure addresses have
441                   less attempts on them than the first, to force round
442                   robin when all the IPv6 addresses fail */
443                 int num4ok = 0; /* number ip4 at low attempt count */
444                 int num4_lowrtt = 0;
445                 prev = NULL;
446                 a = dp->result_list;
447                 for(i = 0; i < got_num; i++) {
448                         if(!a) break; /* robustness */
449                         swap_to_front = 0;
450                         if(a->addr.ss_family != AF_INET6 && attempt == -1) {
451                                 /* if we only have ip4 at low attempt count,
452                                  * then ip6 is failing, and we need to
453                                  * select one of the remaining IPv4 addrs */
454                                 attempt = a->attempts;
455                                 num4ok++;
456                                 num4_lowrtt = a->sel_rtt;
457                         } else if(a->addr.ss_family != AF_INET6 && attempt == a->attempts) {
458                                 num4ok++;
459                                 if(num4_lowrtt == 0 || a->sel_rtt < num4_lowrtt) {
460                                         num4_lowrtt = a->sel_rtt;
461                                 }
462                         }
463                         if(a->addr.ss_family == AF_INET6) {
464                                 if(attempt == -1) {
465                                         attempt = a->attempts;
466                                 } else if(a->attempts > attempt) {
467                                         break;
468                                 }
469                                 got_num6++;
470                                 swap_to_front = 1;
471                                 if(low_rtt6 == 0 || a->sel_rtt < low_rtt6) {
472                                         low_rtt6 = a->sel_rtt;
473                                 }
474                         }
475                         /* swap to front if IPv6, or move to next result */
476                         if(swap_to_front && prev) {
477                                 n = a->next_result;
478                                 prev->next_result = n;
479                                 a->next_result = dp->result_list;
480                                 dp->result_list = a;
481                                 a = n;
482                         } else {
483                                 prev = a;
484                                 a = a->next_result;
485                         }
486                 }
487                 if(got_num6 > 0) {
488                         got_num = got_num6;
489                         *selected_rtt = low_rtt6;
490                 } else if(num4ok > 0) {
491                         got_num = num4ok;
492                         *selected_rtt = num4_lowrtt;
493                 }
494         } else if (env->cfg->prefer_ip4) {
495                 int got_num4 = 0;
496                 int low_rtt4 = 0;
497                 int i;
498                 int attempt = -1; /* filter to make sure addresses have
499                   less attempts on them than the first, to force round
500                   robin when all the IPv4 addresses fail */
501                 int num6ok = 0; /* number ip6 at low attempt count */
502                 int num6_lowrtt = 0;
503                 prev = NULL;
504                 a = dp->result_list;
505                 for(i = 0; i < got_num; i++) {
506                         if(!a) break; /* robustness */
507                         swap_to_front = 0;
508                         if(a->addr.ss_family != AF_INET && attempt == -1) {
509                                 /* if we only have ip6 at low attempt count,
510                                  * then ip4 is failing, and we need to
511                                  * select one of the remaining IPv6 addrs */
512                                 attempt = a->attempts;
513                                 num6ok++;
514                                 num6_lowrtt = a->sel_rtt;
515                         } else if(a->addr.ss_family != AF_INET && attempt == a->attempts) {
516                                 num6ok++;
517                                 if(num6_lowrtt == 0 || a->sel_rtt < num6_lowrtt) {
518                                         num6_lowrtt = a->sel_rtt;
519                                 }
520                         }
521                         if(a->addr.ss_family == AF_INET) {
522                                 if(attempt == -1) {
523                                         attempt = a->attempts;
524                                 } else if(a->attempts > attempt) {
525                                         break;
526                                 }
527                                 got_num4++;
528                                 swap_to_front = 1;
529                                 if(low_rtt4 == 0 || a->sel_rtt < low_rtt4) {
530                                         low_rtt4 = a->sel_rtt;
531                                 }
532                         }
533                         /* swap to front if IPv4, or move to next result */
534                         if(swap_to_front && prev) {
535                                 n = a->next_result;
536                                 prev->next_result = n;
537                                 a->next_result = dp->result_list;
538                                 dp->result_list = a;
539                                 a = n;
540                         } else {
541                                 prev = a;
542                                 a = a->next_result;
543                         }
544                 }
545                 if(got_num4 > 0) {
546                         got_num = got_num4;
547                         *selected_rtt = low_rtt4;
548                 } else if(num6ok > 0) {
549                         got_num = num6ok;
550                         *selected_rtt = num6_lowrtt;
551                 }
552         }
553         return got_num;
554 }
555
556 struct delegpt_addr*
557 iter_server_selection(struct iter_env* iter_env,
558         struct module_env* env, struct delegpt* dp,
559         uint8_t* name, size_t namelen, uint16_t qtype, int* dnssec_lame,
560         int* chase_to_rd, int open_target, struct sock_list* blacklist,
561         time_t prefetch)
562 {
563         int sel;
564         int selrtt;
565         struct delegpt_addr* a, *prev;
566         int num = iter_filter_order(iter_env, env, name, namelen, qtype,
567                 *env->now, dp, &selrtt, open_target, blacklist, prefetch);
568
569         if(num == 0)
570                 return NULL;
571         verbose(VERB_ALGO, "selrtt %d", selrtt);
572         if(selrtt > BLACKLIST_PENALTY) {
573                 if(selrtt-BLACKLIST_PENALTY > USEFUL_SERVER_TOP_TIMEOUT*3) {
574                         verbose(VERB_ALGO, "chase to "
575                                 "blacklisted recursion lame server");
576                         *chase_to_rd = 1;
577                 }
578                 if(selrtt-BLACKLIST_PENALTY > USEFUL_SERVER_TOP_TIMEOUT*2) {
579                         verbose(VERB_ALGO, "chase to "
580                                 "blacklisted dnssec lame server");
581                         *dnssec_lame = 1;
582                 }
583         } else {
584                 if(selrtt > USEFUL_SERVER_TOP_TIMEOUT*3) {
585                         verbose(VERB_ALGO, "chase to recursion lame server");
586                         *chase_to_rd = 1;
587                 }
588                 if(selrtt > USEFUL_SERVER_TOP_TIMEOUT*2) {
589                         verbose(VERB_ALGO, "chase to dnssec lame server");
590                         *dnssec_lame = 1;
591                 }
592                 if(selrtt == USEFUL_SERVER_TOP_TIMEOUT) {
593                         verbose(VERB_ALGO, "chase to blacklisted lame server");
594                         return NULL;
595                 }
596         }
597
598         if(num == 1) {
599                 a = dp->result_list;
600                 if(++a->attempts < iter_env->outbound_msg_retry)
601                         return a;
602                 dp->result_list = a->next_result;
603                 return a;
604         }
605
606         /* randomly select a target from the list */
607         log_assert(num > 1);
608         /* grab secure random number, to pick unexpected server.
609          * also we need it to be threadsafe. */
610         sel = ub_random_max(env->rnd, num);
611         a = dp->result_list;
612         prev = NULL;
613         while(sel > 0 && a) {
614                 prev = a;
615                 a = a->next_result;
616                 sel--;
617         }
618         if(!a)  /* robustness */
619                 return NULL;
620         if(++a->attempts < iter_env->outbound_msg_retry)
621                 return a;
622         /* remove it from the delegation point result list */
623         if(prev)
624                 prev->next_result = a->next_result;
625         else    dp->result_list = a->next_result;
626         return a;
627 }
628
629 struct dns_msg*
630 dns_alloc_msg(sldns_buffer* pkt, struct msg_parse* msg,
631         struct regional* region)
632 {
633         struct dns_msg* m = (struct dns_msg*)regional_alloc(region,
634                 sizeof(struct dns_msg));
635         if(!m)
636                 return NULL;
637         memset(m, 0, sizeof(*m));
638         if(!parse_create_msg(pkt, msg, NULL, &m->qinfo, &m->rep, region)) {
639                 log_err("malloc failure: allocating incoming dns_msg");
640                 return NULL;
641         }
642         return m;
643 }
644
645 struct dns_msg*
646 dns_copy_msg(struct dns_msg* from, struct regional* region)
647 {
648         struct dns_msg* m = (struct dns_msg*)regional_alloc(region,
649                 sizeof(struct dns_msg));
650         if(!m)
651                 return NULL;
652         m->qinfo = from->qinfo;
653         if(!(m->qinfo.qname = regional_alloc_init(region, from->qinfo.qname,
654                 from->qinfo.qname_len)))
655                 return NULL;
656         if(!(m->rep = reply_info_copy(from->rep, NULL, region)))
657                 return NULL;
658         return m;
659 }
660
661 void
662 iter_dns_store(struct module_env* env, struct query_info* msgqinf,
663         struct reply_info* msgrep, int is_referral, time_t leeway, int pside,
664         struct regional* region, uint16_t flags, time_t qstarttime)
665 {
666         if(!dns_cache_store(env, msgqinf, msgrep, is_referral, leeway,
667                 pside, region, flags, qstarttime))
668                 log_err("out of memory: cannot store data in cache");
669 }
670
671 int
672 iter_ns_probability(struct ub_randstate* rnd, int n, int m)
673 {
674         int sel;
675         if(n == m) /* 100% chance */
676                 return 1;
677         /* we do not need secure random numbers here, but
678          * we do need it to be threadsafe, so we use this */
679         sel = ub_random_max(rnd, m);
680         return (sel < n);
681 }
682
683 /** detect dependency cycle for query and target */
684 static int
685 causes_cycle(struct module_qstate* qstate, uint8_t* name, size_t namelen,
686         uint16_t t, uint16_t c)
687 {
688         struct query_info qinf;
689         qinf.qname = name;
690         qinf.qname_len = namelen;
691         qinf.qtype = t;
692         qinf.qclass = c;
693         qinf.local_alias = NULL;
694         fptr_ok(fptr_whitelist_modenv_detect_cycle(
695                 qstate->env->detect_cycle));
696         return (*qstate->env->detect_cycle)(qstate, &qinf,
697                 (uint16_t)(BIT_RD|BIT_CD), qstate->is_priming,
698                 qstate->is_valrec);
699 }
700
701 void
702 iter_mark_cycle_targets(struct module_qstate* qstate, struct delegpt* dp)
703 {
704         struct delegpt_ns* ns;
705         for(ns = dp->nslist; ns; ns = ns->next) {
706                 if(ns->resolved)
707                         continue;
708                 /* see if this ns as target causes dependency cycle */
709                 if(causes_cycle(qstate, ns->name, ns->namelen,
710                         LDNS_RR_TYPE_AAAA, qstate->qinfo.qclass) ||
711                    causes_cycle(qstate, ns->name, ns->namelen,
712                         LDNS_RR_TYPE_A, qstate->qinfo.qclass)) {
713                         log_nametypeclass(VERB_QUERY, "skipping target due "
714                                 "to dependency cycle (harden-glue: no may "
715                                 "fix some of the cycles)",
716                                 ns->name, LDNS_RR_TYPE_A,
717                                 qstate->qinfo.qclass);
718                         ns->resolved = 1;
719                 }
720         }
721 }
722
723 void
724 iter_mark_pside_cycle_targets(struct module_qstate* qstate, struct delegpt* dp)
725 {
726         struct delegpt_ns* ns;
727         for(ns = dp->nslist; ns; ns = ns->next) {
728                 if(ns->done_pside4 && ns->done_pside6)
729                         continue;
730                 /* see if this ns as target causes dependency cycle */
731                 if(causes_cycle(qstate, ns->name, ns->namelen,
732                         LDNS_RR_TYPE_A, qstate->qinfo.qclass)) {
733                         log_nametypeclass(VERB_QUERY, "skipping target due "
734                                 "to dependency cycle", ns->name,
735                                 LDNS_RR_TYPE_A, qstate->qinfo.qclass);
736                         ns->done_pside4 = 1;
737                 }
738                 if(causes_cycle(qstate, ns->name, ns->namelen,
739                         LDNS_RR_TYPE_AAAA, qstate->qinfo.qclass)) {
740                         log_nametypeclass(VERB_QUERY, "skipping target due "
741                                 "to dependency cycle", ns->name,
742                                 LDNS_RR_TYPE_AAAA, qstate->qinfo.qclass);
743                         ns->done_pside6 = 1;
744                 }
745         }
746 }
747
748 int
749 iter_dp_is_useless(struct query_info* qinfo, uint16_t qflags,
750         struct delegpt* dp, int supports_ipv4, int supports_ipv6)
751 {
752         struct delegpt_ns* ns;
753         struct delegpt_addr* a;
754         /* check:
755          *      o RD qflag is on.
756          *      o no addresses are provided.
757          *      o all NS items are required glue.
758          * OR
759          *      o RD qflag is on.
760          *      o no addresses are provided.
761          *      o the query is for one of the nameservers in dp,
762          *        and that nameserver is a glue-name for this dp.
763          */
764         if(!(qflags&BIT_RD))
765                 return 0;
766         /* either available or unused targets,
767          * if they exist, the dp is not useless. */
768         for(a = dp->usable_list; a; a = a->next_usable) {
769                 if(!addr_is_ip6(&a->addr, a->addrlen) && supports_ipv4)
770                         return 0;
771                 else if(addr_is_ip6(&a->addr, a->addrlen) && supports_ipv6)
772                         return 0;
773         }
774         for(a = dp->result_list; a; a = a->next_result) {
775                 if(!addr_is_ip6(&a->addr, a->addrlen) && supports_ipv4)
776                         return 0;
777                 else if(addr_is_ip6(&a->addr, a->addrlen) && supports_ipv6)
778                         return 0;
779         }
780
781         /* see if query is for one of the nameservers, which is glue */
782         if( ((qinfo->qtype == LDNS_RR_TYPE_A && supports_ipv4) ||
783                 (qinfo->qtype == LDNS_RR_TYPE_AAAA && supports_ipv6)) &&
784                 dname_subdomain_c(qinfo->qname, dp->name) &&
785                 delegpt_find_ns(dp, qinfo->qname, qinfo->qname_len))
786                 return 1;
787
788         for(ns = dp->nslist; ns; ns = ns->next) {
789                 if(ns->resolved) /* skip failed targets */
790                         continue;
791                 if(!dname_subdomain_c(ns->name, dp->name))
792                         return 0; /* one address is not required glue */
793         }
794         return 1;
795 }
796
797 int
798 iter_qname_indicates_dnssec(struct module_env* env, struct query_info *qinfo)
799 {
800         struct trust_anchor* a;
801         if(!env || !env->anchors || !qinfo || !qinfo->qname)
802                 return 0;
803         /* a trust anchor exists above the name? */
804         if((a=anchors_lookup(env->anchors, qinfo->qname, qinfo->qname_len,
805                 qinfo->qclass))) {
806                 if(a->numDS == 0 && a->numDNSKEY == 0) {
807                         /* insecure trust point */
808                         lock_basic_unlock(&a->lock);
809                         return 0;
810                 }
811                 lock_basic_unlock(&a->lock);
812                 return 1;
813         }
814         /* no trust anchor above it. */
815         return 0;
816 }
817
818 int
819 iter_indicates_dnssec(struct module_env* env, struct delegpt* dp,
820         struct dns_msg* msg, uint16_t dclass)
821 {
822         struct trust_anchor* a;
823         /* information not available, !env->anchors can be common */
824         if(!env || !env->anchors || !dp || !dp->name)
825                 return 0;
826         /* a trust anchor exists with this name, RRSIGs expected */
827         if((a=anchor_find(env->anchors, dp->name, dp->namelabs, dp->namelen,
828                 dclass))) {
829                 if(a->numDS == 0 && a->numDNSKEY == 0) {
830                         /* insecure trust point */
831                         lock_basic_unlock(&a->lock);
832                         return 0;
833                 }
834                 lock_basic_unlock(&a->lock);
835                 return 1;
836         }
837         /* see if DS rrset was given, in AUTH section */
838         if(msg && msg->rep &&
839                 reply_find_rrset_section_ns(msg->rep, dp->name, dp->namelen,
840                 LDNS_RR_TYPE_DS, dclass))
841                 return 1;
842         /* look in key cache */
843         if(env->key_cache) {
844                 struct key_entry_key* kk = key_cache_obtain(env->key_cache,
845                         dp->name, dp->namelen, dclass, env->scratch, *env->now);
846                 if(kk) {
847                         if(query_dname_compare(kk->name, dp->name) == 0) {
848                           if(key_entry_isgood(kk) || key_entry_isbad(kk)) {
849                                 regional_free_all(env->scratch);
850                                 return 1;
851                           } else if(key_entry_isnull(kk)) {
852                                 regional_free_all(env->scratch);
853                                 return 0;
854                           }
855                         }
856                         regional_free_all(env->scratch);
857                 }
858         }
859         return 0;
860 }
861
862 int
863 iter_msg_has_dnssec(struct dns_msg* msg)
864 {
865         size_t i;
866         if(!msg || !msg->rep)
867                 return 0;
868         for(i=0; i<msg->rep->an_numrrsets + msg->rep->ns_numrrsets; i++) {
869                 if(((struct packed_rrset_data*)msg->rep->rrsets[i]->
870                         entry.data)->rrsig_count > 0)
871                         return 1;
872         }
873         /* empty message has no DNSSEC info, with DNSSEC the reply is
874          * not empty (NSEC) */
875         return 0;
876 }
877
878 int iter_msg_from_zone(struct dns_msg* msg, struct delegpt* dp,
879         enum response_type type, uint16_t dclass)
880 {
881         if(!msg || !dp || !msg->rep || !dp->name)
882                 return 0;
883         /* SOA RRset - always from reply zone */
884         if(reply_find_rrset_section_an(msg->rep, dp->name, dp->namelen,
885                 LDNS_RR_TYPE_SOA, dclass) ||
886            reply_find_rrset_section_ns(msg->rep, dp->name, dp->namelen,
887                 LDNS_RR_TYPE_SOA, dclass))
888                 return 1;
889         if(type == RESPONSE_TYPE_REFERRAL) {
890                 size_t i;
891                 /* if it adds a single label, i.e. we expect .com,
892                  * and referral to example.com. NS ... , then origin zone
893                  * is .com. For a referral to sub.example.com. NS ... then
894                  * we do not know, since example.com. may be in between. */
895                 for(i=0; i<msg->rep->an_numrrsets+msg->rep->ns_numrrsets;
896                         i++) {
897                         struct ub_packed_rrset_key* s = msg->rep->rrsets[i];
898                         if(ntohs(s->rk.type) == LDNS_RR_TYPE_NS &&
899                                 ntohs(s->rk.rrset_class) == dclass) {
900                                 int l = dname_count_labels(s->rk.dname);
901                                 if(l == dp->namelabs + 1 &&
902                                         dname_strict_subdomain(s->rk.dname,
903                                         l, dp->name, dp->namelabs))
904                                         return 1;
905                         }
906                 }
907                 return 0;
908         }
909         log_assert(type==RESPONSE_TYPE_ANSWER || type==RESPONSE_TYPE_CNAME);
910         /* not a referral, and not lame delegation (upwards), so,
911          * any NS rrset must be from the zone itself */
912         if(reply_find_rrset_section_an(msg->rep, dp->name, dp->namelen,
913                 LDNS_RR_TYPE_NS, dclass) ||
914            reply_find_rrset_section_ns(msg->rep, dp->name, dp->namelen,
915                 LDNS_RR_TYPE_NS, dclass))
916                 return 1;
917         /* a DNSKEY set is expected at the zone apex as well */
918         /* this is for 'minimal responses' for DNSKEYs */
919         if(reply_find_rrset_section_an(msg->rep, dp->name, dp->namelen,
920                 LDNS_RR_TYPE_DNSKEY, dclass))
921                 return 1;
922         return 0;
923 }
924
925 /**
926  * check equality of two rrsets
927  * @param k1: rrset
928  * @param k2: rrset
929  * @return true if equal
930  */
931 static int
932 rrset_equal(struct ub_packed_rrset_key* k1, struct ub_packed_rrset_key* k2)
933 {
934         struct packed_rrset_data* d1 = (struct packed_rrset_data*)
935                 k1->entry.data;
936         struct packed_rrset_data* d2 = (struct packed_rrset_data*)
937                 k2->entry.data;
938         size_t i, t;
939         if(k1->rk.dname_len != k2->rk.dname_len ||
940                 k1->rk.flags != k2->rk.flags ||
941                 k1->rk.type != k2->rk.type ||
942                 k1->rk.rrset_class != k2->rk.rrset_class ||
943                 query_dname_compare(k1->rk.dname, k2->rk.dname) != 0)
944                 return 0;
945         if(     /* do not check ttl: d1->ttl != d2->ttl || */
946                 d1->count != d2->count ||
947                 d1->rrsig_count != d2->rrsig_count ||
948                 d1->trust != d2->trust ||
949                 d1->security != d2->security)
950                 return 0;
951         t = d1->count + d1->rrsig_count;
952         for(i=0; i<t; i++) {
953                 if(d1->rr_len[i] != d2->rr_len[i] ||
954                         /* no ttl check: d1->rr_ttl[i] != d2->rr_ttl[i] ||*/
955                         memcmp(d1->rr_data[i], d2->rr_data[i],
956                                 d1->rr_len[i]) != 0)
957                         return 0;
958         }
959         return 1;
960 }
961
962 /** compare rrsets and sort canonically.  Compares rrset name, type, class.
963  * return 0 if equal, +1 if x > y, and -1 if x < y.
964  */
965 static int
966 rrset_canonical_sort_cmp(const void* x, const void* y)
967 {
968         struct ub_packed_rrset_key* rrx = *(struct ub_packed_rrset_key**)x;
969         struct ub_packed_rrset_key* rry = *(struct ub_packed_rrset_key**)y;
970         int r = dname_canonical_compare(rrx->rk.dname, rry->rk.dname);
971         if(r != 0)
972                 return r;
973         if(rrx->rk.type != rry->rk.type) {
974                 if(ntohs(rrx->rk.type) > ntohs(rry->rk.type))
975                         return 1;
976                 else    return -1;
977         }
978         if(rrx->rk.rrset_class != rry->rk.rrset_class) {
979                 if(ntohs(rrx->rk.rrset_class) > ntohs(rry->rk.rrset_class))
980                         return 1;
981                 else    return -1;
982         }
983         return 0;
984 }
985
986 int
987 reply_equal(struct reply_info* p, struct reply_info* q, struct regional* region)
988 {
989         size_t i;
990         struct ub_packed_rrset_key** sorted_p, **sorted_q;
991         if(p->flags != q->flags ||
992                 p->qdcount != q->qdcount ||
993                 /* do not check TTL, this may differ */
994                 /*
995                 p->ttl != q->ttl ||
996                 p->prefetch_ttl != q->prefetch_ttl ||
997                 */
998                 p->security != q->security ||
999                 p->an_numrrsets != q->an_numrrsets ||
1000                 p->ns_numrrsets != q->ns_numrrsets ||
1001                 p->ar_numrrsets != q->ar_numrrsets ||
1002                 p->rrset_count != q->rrset_count)
1003                 return 0;
1004         /* sort the rrsets in the authority and additional sections before
1005          * compare, the query and answer sections are ordered in the sequence
1006          * they should have (eg. one after the other for aliases). */
1007         sorted_p = (struct ub_packed_rrset_key**)regional_alloc_init(
1008                 region, p->rrsets, sizeof(*sorted_p)*p->rrset_count);
1009         if(!sorted_p) return 0;
1010         log_assert(p->an_numrrsets + p->ns_numrrsets + p->ar_numrrsets <=
1011                 p->rrset_count);
1012         qsort(sorted_p + p->an_numrrsets, p->ns_numrrsets,
1013                 sizeof(*sorted_p), rrset_canonical_sort_cmp);
1014         qsort(sorted_p + p->an_numrrsets + p->ns_numrrsets, p->ar_numrrsets,
1015                 sizeof(*sorted_p), rrset_canonical_sort_cmp);
1016
1017         sorted_q = (struct ub_packed_rrset_key**)regional_alloc_init(
1018                 region, q->rrsets, sizeof(*sorted_q)*q->rrset_count);
1019         if(!sorted_q) {
1020                 regional_free_all(region);
1021                 return 0;
1022         }
1023         log_assert(q->an_numrrsets + q->ns_numrrsets + q->ar_numrrsets <=
1024                 q->rrset_count);
1025         qsort(sorted_q + q->an_numrrsets, q->ns_numrrsets,
1026                 sizeof(*sorted_q), rrset_canonical_sort_cmp);
1027         qsort(sorted_q + q->an_numrrsets + q->ns_numrrsets, q->ar_numrrsets,
1028                 sizeof(*sorted_q), rrset_canonical_sort_cmp);
1029
1030         /* compare the rrsets */
1031         for(i=0; i<p->rrset_count; i++) {
1032                 if(!rrset_equal(sorted_p[i], sorted_q[i])) {
1033                         if(!rrset_canonical_equal(region, sorted_p[i],
1034                                 sorted_q[i])) {
1035                                 regional_free_all(region);
1036                                 return 0;
1037                         }
1038                 }
1039         }
1040         regional_free_all(region);
1041         return 1;
1042 }
1043
1044 void
1045 caps_strip_reply(struct reply_info* rep)
1046 {
1047         size_t i;
1048         if(!rep) return;
1049         /* see if message is a referral, in which case the additional and
1050          * NS record cannot be removed */
1051         /* referrals have the AA flag unset (strict check, not elsewhere in
1052          * unbound, but for 0x20 this is very convenient). */
1053         if(!(rep->flags&BIT_AA))
1054                 return;
1055         /* remove the additional section from the reply */
1056         if(rep->ar_numrrsets != 0) {
1057                 verbose(VERB_ALGO, "caps fallback: removing additional section");
1058                 rep->rrset_count -= rep->ar_numrrsets;
1059                 rep->ar_numrrsets = 0;
1060         }
1061         /* is there an NS set in the authority section to remove? */
1062         /* the failure case (Cisco firewalls) only has one rrset in authsec */
1063         for(i=rep->an_numrrsets; i<rep->an_numrrsets+rep->ns_numrrsets; i++) {
1064                 struct ub_packed_rrset_key* s = rep->rrsets[i];
1065                 if(ntohs(s->rk.type) == LDNS_RR_TYPE_NS) {
1066                         /* remove NS rrset and break from loop (loop limits
1067                          * have changed) */
1068                         /* move last rrset into this position (there is no
1069                          * additional section any more) */
1070                         verbose(VERB_ALGO, "caps fallback: removing NS rrset");
1071                         if(i < rep->rrset_count-1)
1072                                 rep->rrsets[i]=rep->rrsets[rep->rrset_count-1];
1073                         rep->rrset_count --;
1074                         rep->ns_numrrsets --;
1075                         break;
1076                 }
1077         }
1078 }
1079
1080 int caps_failed_rcode(struct reply_info* rep)
1081 {
1082         return !(FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NOERROR ||
1083                 FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NXDOMAIN);
1084 }
1085
1086 void
1087 iter_store_parentside_rrset(struct module_env* env,
1088         struct ub_packed_rrset_key* rrset)
1089 {
1090         struct rrset_ref ref;
1091         rrset = packed_rrset_copy_alloc(rrset, env->alloc, *env->now);
1092         if(!rrset) {
1093                 log_err("malloc failure in store_parentside_rrset");
1094                 return;
1095         }
1096         rrset->rk.flags |= PACKED_RRSET_PARENT_SIDE;
1097         rrset->entry.hash = rrset_key_hash(&rrset->rk);
1098         ref.key = rrset;
1099         ref.id = rrset->id;
1100         /* ignore ret: if it was in the cache, ref updated */
1101         (void)rrset_cache_update(env->rrset_cache, &ref, env->alloc, *env->now);
1102 }
1103
1104 /** fetch NS record from reply, if any */
1105 static struct ub_packed_rrset_key*
1106 reply_get_NS_rrset(struct reply_info* rep)
1107 {
1108         size_t i;
1109         for(i=0; i<rep->rrset_count; i++) {
1110                 if(rep->rrsets[i]->rk.type == htons(LDNS_RR_TYPE_NS)) {
1111                         return rep->rrsets[i];
1112                 }
1113         }
1114         return NULL;
1115 }
1116
1117 void
1118 iter_store_parentside_NS(struct module_env* env, struct reply_info* rep)
1119 {
1120         struct ub_packed_rrset_key* rrset = reply_get_NS_rrset(rep);
1121         if(rrset) {
1122                 log_rrset_key(VERB_ALGO, "store parent-side NS", rrset);
1123                 iter_store_parentside_rrset(env, rrset);
1124         }
1125 }
1126
1127 void iter_store_parentside_neg(struct module_env* env,
1128         struct query_info* qinfo, struct reply_info* rep)
1129 {
1130         /* TTL: NS from referral in iq->deleg_msg,
1131          *      or first RR from iq->response,
1132          *      or servfail5secs if !iq->response */
1133         time_t ttl = NORR_TTL;
1134         struct ub_packed_rrset_key* neg;
1135         struct packed_rrset_data* newd;
1136         if(rep) {
1137                 struct ub_packed_rrset_key* rrset = reply_get_NS_rrset(rep);
1138                 if(!rrset && rep->rrset_count != 0) rrset = rep->rrsets[0];
1139                 if(rrset) ttl = ub_packed_rrset_ttl(rrset);
1140         }
1141         /* create empty rrset to store */
1142         neg = (struct ub_packed_rrset_key*)regional_alloc(env->scratch,
1143                         sizeof(struct ub_packed_rrset_key));
1144         if(!neg) {
1145                 log_err("out of memory in store_parentside_neg");
1146                 return;
1147         }
1148         memset(&neg->entry, 0, sizeof(neg->entry));
1149         neg->entry.key = neg;
1150         neg->rk.type = htons(qinfo->qtype);
1151         neg->rk.rrset_class = htons(qinfo->qclass);
1152         neg->rk.flags = 0;
1153         neg->rk.dname = regional_alloc_init(env->scratch, qinfo->qname,
1154                 qinfo->qname_len);
1155         if(!neg->rk.dname) {
1156                 log_err("out of memory in store_parentside_neg");
1157                 return;
1158         }
1159         neg->rk.dname_len = qinfo->qname_len;
1160         neg->entry.hash = rrset_key_hash(&neg->rk);
1161         newd = (struct packed_rrset_data*)regional_alloc_zero(env->scratch,
1162                 sizeof(struct packed_rrset_data) + sizeof(size_t) +
1163                 sizeof(uint8_t*) + sizeof(time_t) + sizeof(uint16_t));
1164         if(!newd) {
1165                 log_err("out of memory in store_parentside_neg");
1166                 return;
1167         }
1168         neg->entry.data = newd;
1169         newd->ttl = ttl;
1170         /* entry must have one RR, otherwise not valid in cache.
1171          * put in one RR with empty rdata: those are ignored as nameserver */
1172         newd->count = 1;
1173         newd->rrsig_count = 0;
1174         newd->trust = rrset_trust_ans_noAA;
1175         newd->rr_len = (size_t*)((uint8_t*)newd +
1176                 sizeof(struct packed_rrset_data));
1177         newd->rr_len[0] = 0 /* zero len rdata */ + sizeof(uint16_t);
1178         packed_rrset_ptr_fixup(newd);
1179         newd->rr_ttl[0] = newd->ttl;
1180         sldns_write_uint16(newd->rr_data[0], 0 /* zero len rdata */);
1181         /* store it */
1182         log_rrset_key(VERB_ALGO, "store parent-side negative", neg);
1183         iter_store_parentside_rrset(env, neg);
1184 }
1185
1186 int
1187 iter_lookup_parent_NS_from_cache(struct module_env* env, struct delegpt* dp,
1188         struct regional* region, struct query_info* qinfo)
1189 {
1190         struct ub_packed_rrset_key* akey;
1191         akey = rrset_cache_lookup(env->rrset_cache, dp->name,
1192                 dp->namelen, LDNS_RR_TYPE_NS, qinfo->qclass,
1193                 PACKED_RRSET_PARENT_SIDE, *env->now, 0);
1194         if(akey) {
1195                 log_rrset_key(VERB_ALGO, "found parent-side NS in cache", akey);
1196                 dp->has_parent_side_NS = 1;
1197                 /* and mark the new names as lame */
1198                 if(!delegpt_rrset_add_ns(dp, region, akey, 1)) {
1199                         lock_rw_unlock(&akey->entry.lock);
1200                         return 0;
1201                 }
1202                 lock_rw_unlock(&akey->entry.lock);
1203         }
1204         return 1;
1205 }
1206
1207 int iter_lookup_parent_glue_from_cache(struct module_env* env,
1208         struct delegpt* dp, struct regional* region, struct query_info* qinfo)
1209 {
1210         struct ub_packed_rrset_key* akey;
1211         struct delegpt_ns* ns;
1212         size_t num = delegpt_count_targets(dp);
1213         for(ns = dp->nslist; ns; ns = ns->next) {
1214                 if(ns->cache_lookup_count > ITERATOR_NAME_CACHELOOKUP_MAX_PSIDE)
1215                         continue;
1216                 ns->cache_lookup_count++;
1217                 /* get cached parentside A */
1218                 akey = rrset_cache_lookup(env->rrset_cache, ns->name,
1219                         ns->namelen, LDNS_RR_TYPE_A, qinfo->qclass,
1220                         PACKED_RRSET_PARENT_SIDE, *env->now, 0);
1221                 if(akey) {
1222                         log_rrset_key(VERB_ALGO, "found parent-side", akey);
1223                         ns->done_pside4 = 1;
1224                         /* a negative-cache-element has no addresses it adds */
1225                         if(!delegpt_add_rrset_A(dp, region, akey, 1, NULL))
1226                                 log_err("malloc failure in lookup_parent_glue");
1227                         lock_rw_unlock(&akey->entry.lock);
1228                 }
1229                 /* get cached parentside AAAA */
1230                 akey = rrset_cache_lookup(env->rrset_cache, ns->name,
1231                         ns->namelen, LDNS_RR_TYPE_AAAA, qinfo->qclass,
1232                         PACKED_RRSET_PARENT_SIDE, *env->now, 0);
1233                 if(akey) {
1234                         log_rrset_key(VERB_ALGO, "found parent-side", akey);
1235                         ns->done_pside6 = 1;
1236                         /* a negative-cache-element has no addresses it adds */
1237                         if(!delegpt_add_rrset_AAAA(dp, region, akey, 1, NULL))
1238                                 log_err("malloc failure in lookup_parent_glue");
1239                         lock_rw_unlock(&akey->entry.lock);
1240                 }
1241         }
1242         /* see if new (but lame) addresses have become available */
1243         return delegpt_count_targets(dp) != num;
1244 }
1245
1246 int
1247 iter_get_next_root(struct iter_hints* hints, struct iter_forwards* fwd,
1248         uint16_t* c)
1249 {
1250         uint16_t c1 = *c, c2 = *c;
1251         int r1 = hints_next_root(hints, &c1);
1252         int r2 = forwards_next_root(fwd, &c2);
1253         if(!r1 && !r2) /* got none, end of list */
1254                 return 0;
1255         else if(!r1) /* got one, return that */
1256                 *c = c2;
1257         else if(!r2)
1258                 *c = c1;
1259         else if(c1 < c2) /* got both take smallest */
1260                 *c = c1;
1261         else    *c = c2;
1262         return 1;
1263 }
1264
1265 void
1266 iter_scrub_ds(struct dns_msg* msg, struct ub_packed_rrset_key* ns, uint8_t* z)
1267 {
1268         /* Only the DS record for the delegation itself is expected.
1269          * We allow DS for everything between the bailiwick and the
1270          * zonecut, thus DS records must be at or above the zonecut.
1271          * And the DS records must be below the server authority zone.
1272          * The answer section is already scrubbed. */
1273         size_t i = msg->rep->an_numrrsets;
1274         while(i < (msg->rep->an_numrrsets + msg->rep->ns_numrrsets)) {
1275                 struct ub_packed_rrset_key* s = msg->rep->rrsets[i];
1276                 if(ntohs(s->rk.type) == LDNS_RR_TYPE_DS &&
1277                         (!ns || !dname_subdomain_c(ns->rk.dname, s->rk.dname)
1278                         || query_dname_compare(z, s->rk.dname) == 0)) {
1279                         log_nametypeclass(VERB_ALGO, "removing irrelevant DS",
1280                                 s->rk.dname, ntohs(s->rk.type),
1281                                 ntohs(s->rk.rrset_class));
1282                         memmove(msg->rep->rrsets+i, msg->rep->rrsets+i+1,
1283                                 sizeof(struct ub_packed_rrset_key*) *
1284                                 (msg->rep->rrset_count-i-1));
1285                         msg->rep->ns_numrrsets--;
1286                         msg->rep->rrset_count--;
1287                         /* stay at same i, but new record */
1288                         continue;
1289                 }
1290                 i++;
1291         }
1292 }
1293
1294 void
1295 iter_scrub_nxdomain(struct dns_msg* msg)
1296 {
1297         if(msg->rep->an_numrrsets == 0)
1298                 return;
1299
1300         memmove(msg->rep->rrsets, msg->rep->rrsets+msg->rep->an_numrrsets,
1301                 sizeof(struct ub_packed_rrset_key*) *
1302                 (msg->rep->rrset_count-msg->rep->an_numrrsets));
1303         msg->rep->rrset_count -= msg->rep->an_numrrsets;
1304         msg->rep->an_numrrsets = 0;
1305 }
1306
1307 void iter_dec_attempts(struct delegpt* dp, int d, int outbound_msg_retry)
1308 {
1309         struct delegpt_addr* a;
1310         for(a=dp->target_list; a; a = a->next_target) {
1311                 if(a->attempts >= outbound_msg_retry) {
1312                         /* add back to result list */
1313                         a->next_result = dp->result_list;
1314                         dp->result_list = a;
1315                 }
1316                 if(a->attempts > d)
1317                         a->attempts -= d;
1318                 else a->attempts = 0;
1319         }
1320 }
1321
1322 void iter_merge_retry_counts(struct delegpt* dp, struct delegpt* old,
1323         int outbound_msg_retry)
1324 {
1325         struct delegpt_addr* a, *o, *prev;
1326         for(a=dp->target_list; a; a = a->next_target) {
1327                 o = delegpt_find_addr(old, &a->addr, a->addrlen);
1328                 if(o) {
1329                         log_addr(VERB_ALGO, "copy attempt count previous dp",
1330                                 &a->addr, a->addrlen);
1331                         a->attempts = o->attempts;
1332                 }
1333         }
1334         prev = NULL;
1335         a = dp->usable_list;
1336         while(a) {
1337                 if(a->attempts >= outbound_msg_retry) {
1338                         log_addr(VERB_ALGO, "remove from usable list dp",
1339                                 &a->addr, a->addrlen);
1340                         /* remove from result list */
1341                         if(prev)
1342                                 prev->next_usable = a->next_usable;
1343                         else    dp->usable_list = a->next_usable;
1344                         /* prev stays the same */
1345                         a = a->next_usable;
1346                         continue;
1347                 }
1348                 prev = a;
1349                 a = a->next_usable;
1350         }
1351 }
1352
1353 int
1354 iter_ds_toolow(struct dns_msg* msg, struct delegpt* dp)
1355 {
1356         /* if for query example.com, there is example.com SOA or a subdomain
1357          * of example.com, then we are too low and need to fetch NS. */
1358         size_t i;
1359         /* if we have a DNAME or CNAME we are probably wrong */
1360         /* if we have a qtype DS in the answer section, its fine */
1361         for(i=0; i < msg->rep->an_numrrsets; i++) {
1362                 struct ub_packed_rrset_key* s = msg->rep->rrsets[i];
1363                 if(ntohs(s->rk.type) == LDNS_RR_TYPE_DNAME ||
1364                         ntohs(s->rk.type) == LDNS_RR_TYPE_CNAME) {
1365                         /* not the right answer, maybe too low, check the
1366                          * RRSIG signer name (if there is any) for a hint
1367                          * that it is from the dp zone anyway */
1368                         uint8_t* sname;
1369                         size_t slen;
1370                         val_find_rrset_signer(s, &sname, &slen);
1371                         if(sname && query_dname_compare(dp->name, sname)==0)
1372                                 return 0; /* it is fine, from the right dp */
1373                         return 1;
1374                 }
1375                 if(ntohs(s->rk.type) == LDNS_RR_TYPE_DS)
1376                         return 0; /* fine, we have a DS record */
1377         }
1378         for(i=msg->rep->an_numrrsets;
1379                 i < msg->rep->an_numrrsets + msg->rep->ns_numrrsets; i++) {
1380                 struct ub_packed_rrset_key* s = msg->rep->rrsets[i];
1381                 if(ntohs(s->rk.type) == LDNS_RR_TYPE_SOA) {
1382                         if(dname_subdomain_c(s->rk.dname, msg->qinfo.qname))
1383                                 return 1; /* point is too low */
1384                         if(query_dname_compare(s->rk.dname, dp->name)==0)
1385                                 return 0; /* right dp */
1386                 }
1387                 if(ntohs(s->rk.type) == LDNS_RR_TYPE_NSEC ||
1388                         ntohs(s->rk.type) == LDNS_RR_TYPE_NSEC3) {
1389                         uint8_t* sname;
1390                         size_t slen;
1391                         val_find_rrset_signer(s, &sname, &slen);
1392                         if(sname && query_dname_compare(dp->name, sname)==0)
1393                                 return 0; /* it is fine, from the right dp */
1394                         return 1;
1395                 }
1396         }
1397         /* we do not know */
1398         return 1;
1399 }
1400
1401 int iter_dp_cangodown(struct query_info* qinfo, struct delegpt* dp)
1402 {
1403         /* no delegation point, do not see how we can go down,
1404          * robust check, it should really exist */
1405         if(!dp) return 0;
1406
1407         /* see if dp equals the qname, then we cannot go down further */
1408         if(query_dname_compare(qinfo->qname, dp->name) == 0)
1409                 return 0;
1410         /* if dp is one label above the name we also cannot go down further */
1411         if(dname_count_labels(qinfo->qname) == dp->namelabs+1)
1412                 return 0;
1413         return 1;
1414 }
1415
1416 int
1417 iter_stub_fwd_no_cache(struct module_qstate *qstate, struct query_info *qinf,
1418         uint8_t** retdpname, size_t* retdpnamelen)
1419 {
1420         struct iter_hints_stub *stub;
1421         struct delegpt *dp;
1422
1423         /* Check for stub. */
1424         stub = hints_lookup_stub(qstate->env->hints, qinf->qname,
1425             qinf->qclass, NULL);
1426         dp = forwards_lookup(qstate->env->fwds, qinf->qname, qinf->qclass);
1427
1428         /* see if forward or stub is more pertinent */
1429         if(stub && stub->dp && dp) {
1430                 if(dname_strict_subdomain(dp->name, dp->namelabs,
1431                         stub->dp->name, stub->dp->namelabs)) {
1432                         stub = NULL; /* ignore stub, forward is lower */
1433                 } else {
1434                         dp = NULL; /* ignore forward, stub is lower */
1435                 }
1436         }
1437
1438         /* check stub */
1439         if (stub != NULL && stub->dp != NULL) {
1440                 if(stub->dp->no_cache) {
1441                         char qname[255+1];
1442                         char dpname[255+1];
1443                         dname_str(qinf->qname, qname);
1444                         dname_str(stub->dp->name, dpname);
1445                         verbose(VERB_ALGO, "stub for %s %s has no_cache", qname, dpname);
1446                 }
1447                 if(retdpname) {
1448                         *retdpname = stub->dp->name;
1449                         *retdpnamelen = stub->dp->namelen;
1450                 }
1451                 return (stub->dp->no_cache);
1452         }
1453
1454         /* Check for forward. */
1455         if (dp) {
1456                 if(dp->no_cache) {
1457                         char qname[255+1];
1458                         char dpname[255+1];
1459                         dname_str(qinf->qname, qname);
1460                         dname_str(dp->name, dpname);
1461                         verbose(VERB_ALGO, "forward for %s %s has no_cache", qname, dpname);
1462                 }
1463                 if(retdpname) {
1464                         *retdpname = dp->name;
1465                         *retdpnamelen = dp->namelen;
1466                 }
1467                 return (dp->no_cache);
1468         }
1469         if(retdpname) {
1470                 *retdpname = NULL;
1471                 *retdpnamelen = 0;
1472         }
1473         return 0;
1474 }
1475
1476 void iterator_set_ip46_support(struct module_stack* mods,
1477         struct module_env* env, struct outside_network* outnet)
1478 {
1479         int m = modstack_find(mods, "iterator");
1480         struct iter_env* ie = NULL;
1481         if(m == -1)
1482                 return;
1483         ie = (struct iter_env*)env->modinfo[m];
1484         if(outnet->pending == NULL)
1485                 return; /* we are in testbound, no rbtree for UDP */
1486         if(outnet->num_ip4 == 0)
1487                 ie->supports_ipv4 = 0;
1488         if(outnet->num_ip6 == 0)
1489                 ie->supports_ipv6 = 0;
1490 }