]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - contrib/bind9/lib/dns/resolver.c
Fix BIND named(8) cache poisoning with DNSSEC validation.
[FreeBSD/releng/7.2.git] / contrib / bind9 / lib / dns / resolver.c
1 /*
2  * Copyright (C) 2004-2008  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: resolver.c,v 1.284.18.79 2008/10/17 22:02:13 jinmei Exp $ */
19
20 /*! \file */
21
22 #include <config.h>
23
24 #include <isc/print.h>
25 #include <isc/string.h>
26 #include <isc/task.h>
27 #include <isc/timer.h>
28 #include <isc/util.h>
29
30 #include <dns/acl.h>
31 #include <dns/adb.h>
32 #include <dns/cache.h>
33 #include <dns/db.h>
34 #include <dns/dispatch.h>
35 #include <dns/ds.h>
36 #include <dns/events.h>
37 #include <dns/forward.h>
38 #include <dns/keytable.h>
39 #include <dns/log.h>
40 #include <dns/message.h>
41 #include <dns/ncache.h>
42 #include <dns/opcode.h>
43 #include <dns/peer.h>
44 #include <dns/rbt.h>
45 #include <dns/rcode.h>
46 #include <dns/rdata.h>
47 #include <dns/rdataclass.h>
48 #include <dns/rdatalist.h>
49 #include <dns/rdataset.h>
50 #include <dns/rdatastruct.h>
51 #include <dns/rdatatype.h>
52 #include <dns/resolver.h>
53 #include <dns/result.h>
54 #include <dns/rootns.h>
55 #include <dns/tsig.h>
56 #include <dns/validator.h>
57
58 #define DNS_RESOLVER_TRACE
59 #ifdef DNS_RESOLVER_TRACE
60 #define RTRACE(m)       isc_log_write(dns_lctx, \
61                                       DNS_LOGCATEGORY_RESOLVER, \
62                                       DNS_LOGMODULE_RESOLVER, \
63                                       ISC_LOG_DEBUG(3), \
64                                       "res %p: %s", res, (m))
65 #define RRTRACE(r, m)   isc_log_write(dns_lctx, \
66                                       DNS_LOGCATEGORY_RESOLVER, \
67                                       DNS_LOGMODULE_RESOLVER, \
68                                       ISC_LOG_DEBUG(3), \
69                                       "res %p: %s", (r), (m))
70 #define FCTXTRACE(m)    isc_log_write(dns_lctx, \
71                                       DNS_LOGCATEGORY_RESOLVER, \
72                                       DNS_LOGMODULE_RESOLVER, \
73                                       ISC_LOG_DEBUG(3), \
74                                       "fctx %p(%s'): %s", fctx, fctx->info, (m))
75 #define FCTXTRACE2(m1, m2) \
76                         isc_log_write(dns_lctx, \
77                                       DNS_LOGCATEGORY_RESOLVER, \
78                                       DNS_LOGMODULE_RESOLVER, \
79                                       ISC_LOG_DEBUG(3), \
80                                       "fctx %p(%s): %s %s", \
81                                       fctx, fctx->info, (m1), (m2))
82 #define FTRACE(m)       isc_log_write(dns_lctx, \
83                                       DNS_LOGCATEGORY_RESOLVER, \
84                                       DNS_LOGMODULE_RESOLVER, \
85                                       ISC_LOG_DEBUG(3), \
86                                       "fetch %p (fctx %p(%s)): %s", \
87                                       fetch, fetch->private, \
88                                       fetch->private->info, (m))
89 #define QTRACE(m)       isc_log_write(dns_lctx, \
90                                       DNS_LOGCATEGORY_RESOLVER, \
91                                       DNS_LOGMODULE_RESOLVER, \
92                                       ISC_LOG_DEBUG(3), \
93                                       "resquery %p (fctx %p(%s)): %s", \
94                                       query, query->fctx, \
95                                       query->fctx->info, (m))
96 #else
97 #define RTRACE(m)
98 #define RRTRACE(r, m)
99 #define FCTXTRACE(m)
100 #define FTRACE(m)
101 #define QTRACE(m)
102 #endif
103
104 /*%
105  * Maximum EDNS0 input packet size.
106  */
107 #define RECV_BUFFER_SIZE                4096            /* XXXRTH  Constant. */
108
109 /*%
110  * This defines the maximum number of timeouts we will permit before we
111  * disable EDNS0 on the query.
112  */
113 #define MAX_EDNS0_TIMEOUTS      3
114
115 typedef struct fetchctx fetchctx_t;
116
117 typedef struct query {
118         /* Locked by task event serialization. */
119         unsigned int                    magic;
120         fetchctx_t *                    fctx;
121         isc_mem_t *                     mctx;
122         dns_dispatchmgr_t *             dispatchmgr;
123         dns_dispatch_t *                dispatch;
124         isc_boolean_t                   exclusivesocket;
125         dns_adbaddrinfo_t *             addrinfo;
126         isc_socket_t *                  tcpsocket;
127         isc_time_t                      start;
128         dns_messageid_t                 id;
129         dns_dispentry_t *               dispentry;
130         ISC_LINK(struct query)          link;
131         isc_buffer_t                    buffer;
132         isc_buffer_t                    *tsig;
133         dns_tsigkey_t                   *tsigkey;
134         unsigned int                    options;
135         unsigned int                    attributes;
136         unsigned int                    sends;
137         unsigned int                    connects;
138         unsigned char                   data[512];
139 } resquery_t;
140
141 #define QUERY_MAGIC                     ISC_MAGIC('Q', '!', '!', '!')
142 #define VALID_QUERY(query)              ISC_MAGIC_VALID(query, QUERY_MAGIC)
143
144 #define RESQUERY_ATTR_CANCELED          0x02
145
146 #define RESQUERY_CONNECTING(q)          ((q)->connects > 0)
147 #define RESQUERY_CANCELED(q)            (((q)->attributes & \
148                                           RESQUERY_ATTR_CANCELED) != 0)
149 #define RESQUERY_SENDING(q)             ((q)->sends > 0)
150
151 typedef enum {
152         fetchstate_init = 0,            /*%< Start event has not run yet. */
153         fetchstate_active,
154         fetchstate_done                 /*%< FETCHDONE events posted. */
155 } fetchstate;
156
157 struct fetchctx {
158         /*% Not locked. */
159         unsigned int                    magic;
160         dns_resolver_t *                res;
161         dns_name_t                      name;
162         dns_rdatatype_t                 type;
163         unsigned int                    options;
164         unsigned int                    bucketnum;
165         char *                          info;
166         /*% Locked by appropriate bucket lock. */
167         fetchstate                      state;
168         isc_boolean_t                   want_shutdown;
169         isc_boolean_t                   cloned;
170         isc_boolean_t                   spilled;
171         unsigned int                    references;
172         isc_event_t                     control_event;
173         ISC_LINK(struct fetchctx)       link;
174         ISC_LIST(dns_fetchevent_t)      events;
175         /*% Locked by task event serialization. */
176         dns_name_t                      domain;
177         dns_rdataset_t                  nameservers;
178         unsigned int                    attributes;
179         isc_timer_t *                   timer;
180         isc_time_t                      expires;
181         isc_interval_t                  interval;
182         dns_message_t *                 qmessage;
183         dns_message_t *                 rmessage;
184         ISC_LIST(resquery_t)            queries;
185         dns_adbfindlist_t               finds;
186         dns_adbfind_t *                 find;
187         dns_adbfindlist_t               altfinds;
188         dns_adbfind_t *                 altfind;
189         dns_adbaddrinfolist_t           forwaddrs;
190         dns_adbaddrinfolist_t           altaddrs;
191         isc_sockaddrlist_t              forwarders;
192         dns_fwdpolicy_t                 fwdpolicy;
193         isc_sockaddrlist_t              bad;
194         isc_sockaddrlist_t              edns;
195         isc_sockaddrlist_t              edns512;
196         dns_validator_t                 *validator;
197         ISC_LIST(dns_validator_t)       validators;
198         dns_db_t *                      cache;
199         dns_adb_t *                     adb;
200
201         /*%
202          * The number of events we're waiting for.
203          */
204         unsigned int                    pending;
205
206         /*%
207          * The number of times we've "restarted" the current
208          * nameserver set.  This acts as a failsafe to prevent
209          * us from pounding constantly on a particular set of
210          * servers that, for whatever reason, are not giving
211          * us useful responses, but are responding in such a
212          * way that they are not marked "bad".
213          */
214         unsigned int                    restarts;
215
216         /*%
217          * The number of timeouts that have occurred since we
218          * last successfully received a response packet.  This
219          * is used for EDNS0 black hole detection.
220          */
221         unsigned int                    timeouts;
222         /*%
223          * Look aside state for DS lookups.
224          */
225         dns_name_t                      nsname;
226         dns_fetch_t *                   nsfetch;
227         dns_rdataset_t                  nsrrset;
228
229         /*%
230          * Number of queries that reference this context.
231          */
232         unsigned int                    nqueries;
233 };
234
235 #define FCTX_MAGIC                      ISC_MAGIC('F', '!', '!', '!')
236 #define VALID_FCTX(fctx)                ISC_MAGIC_VALID(fctx, FCTX_MAGIC)
237
238 #define FCTX_ATTR_HAVEANSWER            0x0001
239 #define FCTX_ATTR_GLUING                0x0002
240 #define FCTX_ATTR_ADDRWAIT              0x0004
241 #define FCTX_ATTR_SHUTTINGDOWN          0x0008
242 #define FCTX_ATTR_WANTCACHE             0x0010
243 #define FCTX_ATTR_WANTNCACHE            0x0020
244 #define FCTX_ATTR_NEEDEDNS0             0x0040
245 #define FCTX_ATTR_TRIEDFIND             0x0080
246 #define FCTX_ATTR_TRIEDALT              0x0100
247
248 #define HAVE_ANSWER(f)          (((f)->attributes & FCTX_ATTR_HAVEANSWER) != \
249                                  0)
250 #define GLUING(f)               (((f)->attributes & FCTX_ATTR_GLUING) != \
251                                  0)
252 #define ADDRWAIT(f)             (((f)->attributes & FCTX_ATTR_ADDRWAIT) != \
253                                  0)
254 #define SHUTTINGDOWN(f)         (((f)->attributes & FCTX_ATTR_SHUTTINGDOWN) \
255                                  != 0)
256 #define WANTCACHE(f)            (((f)->attributes & FCTX_ATTR_WANTCACHE) != 0)
257 #define WANTNCACHE(f)           (((f)->attributes & FCTX_ATTR_WANTNCACHE) != 0)
258 #define NEEDEDNS0(f)            (((f)->attributes & FCTX_ATTR_NEEDEDNS0) != 0)
259 #define TRIEDFIND(f)            (((f)->attributes & FCTX_ATTR_TRIEDFIND) != 0)
260 #define TRIEDALT(f)             (((f)->attributes & FCTX_ATTR_TRIEDALT) != 0)
261
262 typedef struct {
263         dns_adbaddrinfo_t *             addrinfo;
264         fetchctx_t *                    fctx;
265 } dns_valarg_t;
266
267 struct dns_fetch {
268         unsigned int                    magic;
269         fetchctx_t *                    private;
270 };
271
272 #define DNS_FETCH_MAGIC                 ISC_MAGIC('F', 't', 'c', 'h')
273 #define DNS_FETCH_VALID(fetch)          ISC_MAGIC_VALID(fetch, DNS_FETCH_MAGIC)
274
275 typedef struct fctxbucket {
276         isc_task_t *                    task;
277         isc_mutex_t                     lock;
278         ISC_LIST(fetchctx_t)            fctxs;
279         isc_boolean_t                   exiting;
280         isc_mem_t *                     mctx;
281 } fctxbucket_t;
282
283 typedef struct alternate {
284         isc_boolean_t                   isaddress;
285         union   {
286                 isc_sockaddr_t          addr;
287                 struct {
288                         dns_name_t      name;
289                         in_port_t       port;
290                 } _n;
291         } _u;
292         ISC_LINK(struct alternate)      link;
293 } alternate_t;
294
295 struct dns_resolver {
296         /* Unlocked. */
297         unsigned int                    magic;
298         isc_mem_t *                     mctx;
299         isc_mutex_t                     lock;
300         isc_mutex_t                     nlock;
301         isc_mutex_t                     primelock;
302         dns_rdataclass_t                rdclass;
303         isc_socketmgr_t *               socketmgr;
304         isc_timermgr_t *                timermgr;
305         isc_taskmgr_t *                 taskmgr;
306         dns_view_t *                    view;
307         isc_boolean_t                   frozen;
308         unsigned int                    options;
309         dns_dispatchmgr_t *             dispatchmgr;
310         dns_dispatch_t *                dispatchv4;
311         isc_boolean_t                   exclusivev4;
312         dns_dispatch_t *                dispatchv6;
313         isc_boolean_t                   exclusivev6;
314         unsigned int                    nbuckets;
315         fctxbucket_t *                  buckets;
316         isc_uint32_t                    lame_ttl;
317         ISC_LIST(alternate_t)           alternates;
318         isc_uint16_t                    udpsize;
319 #if USE_ALGLOCK
320         isc_rwlock_t                    alglock;
321 #endif
322         dns_rbt_t *                     algorithms;
323 #if USE_MBSLOCK
324         isc_rwlock_t                    mbslock;
325 #endif
326         dns_rbt_t *                     mustbesecure;
327         unsigned int                    spillatmax;
328         unsigned int                    spillatmin;
329         isc_timer_t *                   spillattimer;
330         isc_boolean_t                   zero_no_soa_ttl;
331         /* Locked by lock. */
332         unsigned int                    references;
333         isc_boolean_t                   exiting;
334         isc_eventlist_t                 whenshutdown;
335         unsigned int                    activebuckets;
336         isc_boolean_t                   priming;
337         unsigned int                    spillat;        /* clients-per-query */
338         /* Locked by primelock. */
339         dns_fetch_t *                   primefetch;
340         /* Locked by nlock. */
341         unsigned int                    nfctx;
342 };
343
344 #define RES_MAGIC                       ISC_MAGIC('R', 'e', 's', '!')
345 #define VALID_RESOLVER(res)             ISC_MAGIC_VALID(res, RES_MAGIC)
346
347 /*%
348  * Private addrinfo flags.  These must not conflict with DNS_FETCHOPT_NOEDNS0,
349  * which we also use as an addrinfo flag.
350  */
351 #define FCTX_ADDRINFO_MARK              0x0001
352 #define FCTX_ADDRINFO_FORWARDER         0x1000
353 #define UNMARKED(a)                     (((a)->flags & FCTX_ADDRINFO_MARK) \
354                                          == 0)
355 #define ISFORWARDER(a)                  (((a)->flags & \
356                                          FCTX_ADDRINFO_FORWARDER) != 0)
357
358 #define NXDOMAIN(r) (((r)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
359
360 #define dns_db_transfernode(a,b,c) do { (*c) = (*b); (*b) = NULL; } while (0)
361
362 static void destroy(dns_resolver_t *res);
363 static void empty_bucket(dns_resolver_t *res);
364 static isc_result_t resquery_send(resquery_t *query);
365 static void resquery_response(isc_task_t *task, isc_event_t *event);
366 static void resquery_connected(isc_task_t *task, isc_event_t *event);
367 static void fctx_try(fetchctx_t *fctx);
368 static isc_boolean_t fctx_destroy(fetchctx_t *fctx);
369 static isc_result_t ncache_adderesult(dns_message_t *message,
370                                       dns_db_t *cache, dns_dbnode_t *node,
371                                       dns_rdatatype_t covers,
372                                       isc_stdtime_t now, dns_ttl_t maxttl,
373                                       dns_rdataset_t *ardataset,
374                                       isc_result_t *eresultp);
375 static void validated(isc_task_t *task, isc_event_t *event);
376 static void maybe_destroy(fetchctx_t *fctx);
377 static void add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
378                     isc_result_t reason);
379
380 static isc_result_t
381 valcreate(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, dns_name_t *name,
382           dns_rdatatype_t type, dns_rdataset_t *rdataset,
383           dns_rdataset_t *sigrdataset, unsigned int valoptions,
384           isc_task_t *task)
385 {
386         dns_validator_t *validator = NULL;
387         dns_valarg_t *valarg;
388         isc_result_t result;
389
390         valarg = isc_mem_get(fctx->res->buckets[fctx->bucketnum].mctx,
391                              sizeof(*valarg));
392         if (valarg == NULL)
393                 return (ISC_R_NOMEMORY);
394
395         valarg->fctx = fctx;
396         valarg->addrinfo = addrinfo;
397
398         if (!ISC_LIST_EMPTY(fctx->validators))
399                 INSIST((valoptions & DNS_VALIDATOR_DEFER) != 0);
400
401         result = dns_validator_create(fctx->res->view, name, type, rdataset,
402                                       sigrdataset, fctx->rmessage,
403                                       valoptions, task, validated, valarg,
404                                       &validator);
405         if (result == ISC_R_SUCCESS) {
406                 if ((valoptions & DNS_VALIDATOR_DEFER) == 0) {
407                         INSIST(fctx->validator == NULL);
408                         fctx->validator  = validator;
409                 }
410                 ISC_LIST_APPEND(fctx->validators, validator, link);
411         } else
412                 isc_mem_put(fctx->res->buckets[fctx->bucketnum].mctx,
413                             valarg, sizeof(*valarg));
414         return (result);
415 }
416
417 static isc_boolean_t
418 fix_mustbedelegationornxdomain(dns_message_t *message, fetchctx_t *fctx) {
419         dns_name_t *name;
420         dns_name_t *domain = &fctx->domain;
421         dns_rdataset_t *rdataset;
422         dns_rdatatype_t type;
423         isc_result_t result;
424         isc_boolean_t keep_auth = ISC_FALSE;
425
426         if (message->rcode == dns_rcode_nxdomain)
427                 return (ISC_FALSE);
428
429         /*
430          * Look for BIND 8 style delegations.
431          * Also look for answers to ANY queries where the duplicate NS RRset
432          * may have been stripped from the authority section.
433          */
434         if (message->counts[DNS_SECTION_ANSWER] != 0 &&
435             (fctx->type == dns_rdatatype_ns ||
436              fctx->type == dns_rdatatype_any)) {
437                 result = dns_message_firstname(message, DNS_SECTION_ANSWER);
438                 while (result == ISC_R_SUCCESS) {
439                         name = NULL;
440                         dns_message_currentname(message, DNS_SECTION_ANSWER,
441                                                 &name);
442                         for (rdataset = ISC_LIST_HEAD(name->list);
443                              rdataset != NULL;
444                              rdataset = ISC_LIST_NEXT(rdataset, link)) {
445                                 type = rdataset->type;
446                                 if (type != dns_rdatatype_ns)
447                                         continue;
448                                 if (dns_name_issubdomain(name, domain))
449                                         return (ISC_FALSE);
450                         }
451                         result = dns_message_nextname(message,
452                                                       DNS_SECTION_ANSWER);
453                 }
454         }
455
456         /* Look for referral. */
457         if (message->counts[DNS_SECTION_AUTHORITY] == 0)
458                 goto munge;
459
460         result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
461         while (result == ISC_R_SUCCESS) {
462                 name = NULL;
463                 dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
464                 for (rdataset = ISC_LIST_HEAD(name->list);
465                      rdataset != NULL;
466                      rdataset = ISC_LIST_NEXT(rdataset, link)) {
467                         type = rdataset->type;
468                         if (type == dns_rdatatype_soa &&
469                             dns_name_equal(name, domain))
470                                 keep_auth = ISC_TRUE;
471                         if (type != dns_rdatatype_ns &&
472                             type != dns_rdatatype_soa)
473                                 continue;
474                         if (dns_name_equal(name, domain))
475                                 goto munge;
476                         if (dns_name_issubdomain(name, domain))
477                                 return (ISC_FALSE);
478                 }
479                 result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
480         }
481
482  munge:
483         message->rcode = dns_rcode_nxdomain;
484         message->counts[DNS_SECTION_ANSWER] = 0;
485         if (!keep_auth)
486                 message->counts[DNS_SECTION_AUTHORITY] = 0;
487         message->counts[DNS_SECTION_ADDITIONAL] = 0;
488         return (ISC_TRUE);
489 }
490
491 static inline isc_result_t
492 fctx_starttimer(fetchctx_t *fctx) {
493         /*
494          * Start the lifetime timer for fctx.
495          *
496          * This is also used for stopping the idle timer; in that
497          * case we must purge events already posted to ensure that
498          * no further idle events are delivered.
499          */
500         return (isc_timer_reset(fctx->timer, isc_timertype_once,
501                                 &fctx->expires, NULL, ISC_TRUE));
502 }
503
504 static inline void
505 fctx_stoptimer(fetchctx_t *fctx) {
506         isc_result_t result;
507
508         /*
509          * We don't return a result if resetting the timer to inactive fails
510          * since there's nothing to be done about it.  Resetting to inactive
511          * should never fail anyway, since the code as currently written
512          * cannot fail in that case.
513          */
514         result = isc_timer_reset(fctx->timer, isc_timertype_inactive,
515                                   NULL, NULL, ISC_TRUE);
516         if (result != ISC_R_SUCCESS) {
517                 UNEXPECTED_ERROR(__FILE__, __LINE__,
518                                  "isc_timer_reset(): %s",
519                                  isc_result_totext(result));
520         }
521 }
522
523
524 static inline isc_result_t
525 fctx_startidletimer(fetchctx_t *fctx) {
526         /*
527          * Start the idle timer for fctx.  The lifetime timer continues
528          * to be in effect.
529          */
530         return (isc_timer_reset(fctx->timer, isc_timertype_once,
531                                 &fctx->expires, &fctx->interval,
532                                 ISC_FALSE));
533 }
534
535 /*
536  * Stopping the idle timer is equivalent to calling fctx_starttimer(), but
537  * we use fctx_stopidletimer for readability in the code below.
538  */
539 #define fctx_stopidletimer      fctx_starttimer
540
541
542 static inline void
543 resquery_destroy(resquery_t **queryp) {
544         resquery_t *query;
545
546         REQUIRE(queryp != NULL);
547         query = *queryp;
548         REQUIRE(!ISC_LINK_LINKED(query, link));
549
550         INSIST(query->tcpsocket == NULL);
551
552         query->fctx->nqueries--;
553         if (SHUTTINGDOWN(query->fctx))
554                 maybe_destroy(query->fctx);     /* Locks bucket. */
555         query->magic = 0;
556         isc_mem_put(query->mctx, query, sizeof(*query));
557         *queryp = NULL;
558 }
559
560 static void
561 fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp,
562                  isc_time_t *finish, isc_boolean_t no_response)
563 {
564         fetchctx_t *fctx;
565         resquery_t *query;
566         unsigned int rtt;
567         unsigned int factor;
568         dns_adbfind_t *find;
569         dns_adbaddrinfo_t *addrinfo;
570         isc_socket_t *socket;
571
572         query = *queryp;
573         fctx = query->fctx;
574
575         FCTXTRACE("cancelquery");
576
577         REQUIRE(!RESQUERY_CANCELED(query));
578
579         query->attributes |= RESQUERY_ATTR_CANCELED;
580
581         /*
582          * Should we update the RTT?
583          */
584         if (finish != NULL || no_response) {
585                 if (finish != NULL) {
586                         /*
587                          * We have both the start and finish times for this
588                          * packet, so we can compute a real RTT.
589                          */
590                         rtt = (unsigned int)isc_time_microdiff(finish,
591                                                                &query->start);
592                         factor = DNS_ADB_RTTADJDEFAULT;
593                 } else {
594                         /*
595                          * We don't have an RTT for this query.  Maybe the
596                          * packet was lost, or maybe this server is very
597                          * slow.  We don't know.  Increase the RTT.
598                          */
599                         INSIST(no_response);
600                         rtt = query->addrinfo->srtt + 200000;
601                         if (rtt > 10000000)
602                                 rtt = 10000000;
603                         /*
604                          * Replace the current RTT with our value.
605                          */
606                         factor = DNS_ADB_RTTADJREPLACE;
607                 }
608                 dns_adb_adjustsrtt(fctx->adb, query->addrinfo, rtt, factor);
609         }
610
611         /*
612          * Age RTTs of servers not tried.
613          */
614         factor = DNS_ADB_RTTADJAGE;
615         if (finish != NULL)
616                 for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
617                      addrinfo != NULL;
618                      addrinfo = ISC_LIST_NEXT(addrinfo, publink))
619                         if (UNMARKED(addrinfo))
620                                 dns_adb_adjustsrtt(fctx->adb, addrinfo,
621                                                    0, factor);
622
623         if (finish != NULL && TRIEDFIND(fctx))
624                 for (find = ISC_LIST_HEAD(fctx->finds);
625                      find != NULL;
626                      find = ISC_LIST_NEXT(find, publink))
627                         for (addrinfo = ISC_LIST_HEAD(find->list);
628                              addrinfo != NULL;
629                              addrinfo = ISC_LIST_NEXT(addrinfo, publink))
630                                 if (UNMARKED(addrinfo))
631                                         dns_adb_adjustsrtt(fctx->adb, addrinfo,
632                                                            0, factor);
633
634         if (finish != NULL && TRIEDALT(fctx)) {
635                 for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
636                      addrinfo != NULL;
637                      addrinfo = ISC_LIST_NEXT(addrinfo, publink))
638                         if (UNMARKED(addrinfo))
639                                 dns_adb_adjustsrtt(fctx->adb, addrinfo,
640                                                    0, factor);
641                 for (find = ISC_LIST_HEAD(fctx->altfinds);
642                      find != NULL;
643                      find = ISC_LIST_NEXT(find, publink))
644                         for (addrinfo = ISC_LIST_HEAD(find->list);
645                              addrinfo != NULL;
646                              addrinfo = ISC_LIST_NEXT(addrinfo, publink))
647                                 if (UNMARKED(addrinfo))
648                                         dns_adb_adjustsrtt(fctx->adb, addrinfo,
649                                                            0, factor);
650         }
651
652         /*
653          * Check for any outstanding socket events.  If they exist, cancel
654          * them and let the event handlers finish the cleanup.  The resolver
655          * only needs to worry about managing the connect and send events;
656          * the dispatcher manages the recv events.
657          */
658         if (RESQUERY_CONNECTING(query)) {
659                 /*
660                  * Cancel the connect.
661                  */
662                 if (query->tcpsocket != NULL) {
663                         isc_socket_cancel(query->tcpsocket, NULL,
664                                           ISC_SOCKCANCEL_CONNECT);
665                 } else if (query->dispentry != NULL) {
666                         INSIST(query->exclusivesocket);
667                         socket = dns_dispatch_getentrysocket(query->dispentry);
668                         if (socket != NULL)
669                                 isc_socket_cancel(socket, NULL,
670                                                   ISC_SOCKCANCEL_CONNECT);
671                 }
672         } else if (RESQUERY_SENDING(query)) {
673                 /*
674                  * Cancel the pending send.
675                  */
676                 if (query->exclusivesocket && query->dispentry != NULL)
677                         socket = dns_dispatch_getentrysocket(query->dispentry);
678                 else
679                         socket = dns_dispatch_getsocket(query->dispatch);
680                 if (socket != NULL)
681                         isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND);
682         }
683
684         if (query->dispentry != NULL)
685                 dns_dispatch_removeresponse(&query->dispentry, deventp);
686
687         ISC_LIST_UNLINK(fctx->queries, query, link);
688
689         if (query->tsig != NULL)
690                 isc_buffer_free(&query->tsig);
691
692         if (query->tsigkey != NULL)
693                 dns_tsigkey_detach(&query->tsigkey);
694
695         if (query->dispatch != NULL)
696                 dns_dispatch_detach(&query->dispatch);
697
698         if (! (RESQUERY_CONNECTING(query) || RESQUERY_SENDING(query)))
699                 /*
700                  * It's safe to destroy the query now.
701                  */
702                 resquery_destroy(&query);
703 }
704
705 static void
706 fctx_cancelqueries(fetchctx_t *fctx, isc_boolean_t no_response) {
707         resquery_t *query, *next_query;
708
709         FCTXTRACE("cancelqueries");
710
711         for (query = ISC_LIST_HEAD(fctx->queries);
712              query != NULL;
713              query = next_query) {
714                 next_query = ISC_LIST_NEXT(query, link);
715                 fctx_cancelquery(&query, NULL, NULL, no_response);
716         }
717 }
718
719 static void
720 fctx_cleanupfinds(fetchctx_t *fctx) {
721         dns_adbfind_t *find, *next_find;
722
723         REQUIRE(ISC_LIST_EMPTY(fctx->queries));
724
725         for (find = ISC_LIST_HEAD(fctx->finds);
726              find != NULL;
727              find = next_find) {
728                 next_find = ISC_LIST_NEXT(find, publink);
729                 ISC_LIST_UNLINK(fctx->finds, find, publink);
730                 dns_adb_destroyfind(&find);
731         }
732         fctx->find = NULL;
733 }
734
735 static void
736 fctx_cleanupaltfinds(fetchctx_t *fctx) {
737         dns_adbfind_t *find, *next_find;
738
739         REQUIRE(ISC_LIST_EMPTY(fctx->queries));
740
741         for (find = ISC_LIST_HEAD(fctx->altfinds);
742              find != NULL;
743              find = next_find) {
744                 next_find = ISC_LIST_NEXT(find, publink);
745                 ISC_LIST_UNLINK(fctx->altfinds, find, publink);
746                 dns_adb_destroyfind(&find);
747         }
748         fctx->altfind = NULL;
749 }
750
751 static void
752 fctx_cleanupforwaddrs(fetchctx_t *fctx) {
753         dns_adbaddrinfo_t *addr, *next_addr;
754
755         REQUIRE(ISC_LIST_EMPTY(fctx->queries));
756
757         for (addr = ISC_LIST_HEAD(fctx->forwaddrs);
758              addr != NULL;
759              addr = next_addr) {
760                 next_addr = ISC_LIST_NEXT(addr, publink);
761                 ISC_LIST_UNLINK(fctx->forwaddrs, addr, publink);
762                 dns_adb_freeaddrinfo(fctx->adb, &addr);
763         }
764 }
765
766 static void
767 fctx_cleanupaltaddrs(fetchctx_t *fctx) {
768         dns_adbaddrinfo_t *addr, *next_addr;
769
770         REQUIRE(ISC_LIST_EMPTY(fctx->queries));
771
772         for (addr = ISC_LIST_HEAD(fctx->altaddrs);
773              addr != NULL;
774              addr = next_addr) {
775                 next_addr = ISC_LIST_NEXT(addr, publink);
776                 ISC_LIST_UNLINK(fctx->altaddrs, addr, publink);
777                 dns_adb_freeaddrinfo(fctx->adb, &addr);
778         }
779 }
780
781 static inline void
782 fctx_stopeverything(fetchctx_t *fctx, isc_boolean_t no_response) {
783         FCTXTRACE("stopeverything");
784         fctx_cancelqueries(fctx, no_response);
785         fctx_cleanupfinds(fctx);
786         fctx_cleanupaltfinds(fctx);
787         fctx_cleanupforwaddrs(fctx);
788         fctx_cleanupaltaddrs(fctx);
789         fctx_stoptimer(fctx);
790 }
791
792 static inline void
793 fctx_sendevents(fetchctx_t *fctx, isc_result_t result) {
794         dns_fetchevent_t *event, *next_event;
795         isc_task_t *task;
796         unsigned int count = 0;
797         isc_interval_t i;
798         isc_boolean_t logit = ISC_FALSE;
799         unsigned int old_spillat;
800         unsigned int new_spillat = 0;   /* initialized to silence compiler warnings */
801
802         /*
803          * Caller must be holding the appropriate bucket lock.
804          */
805         REQUIRE(fctx->state == fetchstate_done);
806
807         FCTXTRACE("sendevents");
808
809         for (event = ISC_LIST_HEAD(fctx->events);
810              event != NULL;
811              event = next_event) {
812                 next_event = ISC_LIST_NEXT(event, ev_link);
813                 ISC_LIST_UNLINK(fctx->events, event, ev_link);
814                 task = event->ev_sender;
815                 event->ev_sender = fctx;
816                 if (!HAVE_ANSWER(fctx))
817                         event->result = result;
818
819                 INSIST(result != ISC_R_SUCCESS ||
820                        dns_rdataset_isassociated(event->rdataset) ||
821                        fctx->type == dns_rdatatype_any ||
822                        fctx->type == dns_rdatatype_rrsig ||
823                        fctx->type == dns_rdatatype_sig);
824
825                 /*
826                  * Negative results must be indicated in event->result.
827                  */
828                 if (dns_rdataset_isassociated(event->rdataset) &&
829                     event->rdataset->type == dns_rdatatype_none) {
830                         INSIST(event->result == DNS_R_NCACHENXDOMAIN ||
831                                event->result == DNS_R_NCACHENXRRSET);
832                 }
833
834                 isc_task_sendanddetach(&task, ISC_EVENT_PTR(&event));
835                 count++;
836         }
837
838         if ((fctx->attributes & FCTX_ATTR_HAVEANSWER) != 0 &&
839             fctx->spilled &&
840             (count < fctx->res->spillatmax || fctx->res->spillatmax == 0)) {
841                 LOCK(&fctx->res->lock);
842                 if (count == fctx->res->spillat && !fctx->res->exiting) {
843                         old_spillat = fctx->res->spillat;
844                         fctx->res->spillat += 5;
845                         if (fctx->res->spillat > fctx->res->spillatmax &&
846                             fctx->res->spillatmax != 0)
847                                 fctx->res->spillat = fctx->res->spillatmax;
848                         new_spillat = fctx->res->spillat;
849                         if (new_spillat != old_spillat) {
850                                 logit = ISC_TRUE;
851                         }
852                         isc_interval_set(&i, 20 * 60, 0);
853                         result = isc_timer_reset(fctx->res->spillattimer,
854                                                  isc_timertype_ticker, NULL,
855                                                  &i, ISC_TRUE);
856                         RUNTIME_CHECK(result == ISC_R_SUCCESS);
857                 }
858                 UNLOCK(&fctx->res->lock);
859                 if (logit)
860                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
861                                       DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
862                                       "clients-per-query increased to %u",
863                                       new_spillat);
864         }
865 }
866
867 static void
868 fctx_done(fetchctx_t *fctx, isc_result_t result) {
869         dns_resolver_t *res;
870         isc_boolean_t no_response;
871
872         FCTXTRACE("done");
873
874         res = fctx->res;
875
876         if (result == ISC_R_SUCCESS)
877                 no_response = ISC_TRUE;
878         else
879                 no_response = ISC_FALSE;
880         fctx_stopeverything(fctx, no_response);
881
882         LOCK(&res->buckets[fctx->bucketnum].lock);
883
884         fctx->state = fetchstate_done;
885         fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
886         fctx_sendevents(fctx, result);
887
888         UNLOCK(&res->buckets[fctx->bucketnum].lock);
889 }
890
891 static void
892 process_sendevent(resquery_t *query, isc_event_t *event) {
893         isc_socketevent_t *sevent = (isc_socketevent_t *)event;
894         isc_boolean_t retry = ISC_FALSE;
895         isc_result_t result;
896         fetchctx_t *fctx;
897
898         fctx = query->fctx;
899
900         if (RESQUERY_CANCELED(query)) {
901                 if (query->sends == 0 && query->connects == 0) {
902                         /*
903                          * This query was canceled while the
904                          * isc_socket_sendto/connect() was in progress.
905                          */
906                         if (query->tcpsocket != NULL)
907                                 isc_socket_detach(&query->tcpsocket);
908                         resquery_destroy(&query);
909                 }
910         } else {
911                 switch (sevent->result) {
912                 case ISC_R_SUCCESS:
913                         break;
914
915                 case ISC_R_HOSTUNREACH:
916                 case ISC_R_NETUNREACH:
917                 case ISC_R_NOPERM:
918                 case ISC_R_ADDRNOTAVAIL:
919                 case ISC_R_CONNREFUSED:
920
921                         /*
922                          * No route to remote.
923                          */
924                         add_bad(fctx, query->addrinfo, sevent->result);
925                         fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
926                         retry = ISC_TRUE;
927                         break;
928
929                 default:
930                         fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
931                         break;
932                 }
933         }
934
935         isc_event_free(&event);
936
937         if (retry) {
938                 /*
939                  * Behave as if the idle timer has expired.  For TCP
940                  * this may not actually reflect the latest timer.
941                  */
942                 fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
943                 result = fctx_stopidletimer(fctx);
944                 if (result != ISC_R_SUCCESS)
945                         fctx_done(fctx, result);
946                 else
947                         fctx_try(fctx);
948         }
949 }
950
951 static void
952 resquery_udpconnected(isc_task_t *task, isc_event_t *event) {
953         resquery_t *query = event->ev_arg;
954
955         REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
956
957         QTRACE("udpconnected");
958
959         UNUSED(task);
960
961         INSIST(RESQUERY_CONNECTING(query));
962
963         query->connects--;
964
965         process_sendevent(query, event);
966 }
967
968 static void
969 resquery_senddone(isc_task_t *task, isc_event_t *event) {
970         resquery_t *query = event->ev_arg;
971
972         REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE);
973
974         QTRACE("senddone");
975
976         /*
977          * XXXRTH
978          *
979          * Currently we don't wait for the senddone event before retrying
980          * a query.  This means that if we get really behind, we may end
981          * up doing extra work!
982          */
983
984         UNUSED(task);
985
986         INSIST(RESQUERY_SENDING(query));
987
988         query->sends--;
989
990         process_sendevent(query, event);
991 }
992
993 static inline isc_result_t
994 fctx_addopt(dns_message_t *message, unsigned int version, isc_uint16_t udpsize)
995 {
996         dns_rdataset_t *rdataset;
997         dns_rdatalist_t *rdatalist;
998         dns_rdata_t *rdata;
999         isc_result_t result;
1000
1001         rdatalist = NULL;
1002         result = dns_message_gettemprdatalist(message, &rdatalist);
1003         if (result != ISC_R_SUCCESS)
1004                 return (result);
1005         rdata = NULL;
1006         result = dns_message_gettemprdata(message, &rdata);
1007         if (result != ISC_R_SUCCESS)
1008                 return (result);
1009         rdataset = NULL;
1010         result = dns_message_gettemprdataset(message, &rdataset);
1011         if (result != ISC_R_SUCCESS)
1012                 return (result);
1013         dns_rdataset_init(rdataset);
1014
1015         rdatalist->type = dns_rdatatype_opt;
1016         rdatalist->covers = 0;
1017
1018         /*
1019          * Set Maximum UDP buffer size.
1020          */
1021         rdatalist->rdclass = udpsize;
1022
1023         /*
1024          * Set EXTENDED-RCODE and Z to 0, DO to 1.
1025          */
1026         rdatalist->ttl = (version << 16);
1027         rdatalist->ttl |= DNS_MESSAGEEXTFLAG_DO;
1028
1029         /*
1030          * No EDNS options.
1031          */
1032         rdata->data = NULL;
1033         rdata->length = 0;
1034         rdata->rdclass = rdatalist->rdclass;
1035         rdata->type = rdatalist->type;
1036         rdata->flags = 0;
1037
1038         ISC_LIST_INIT(rdatalist->rdata);
1039         ISC_LIST_APPEND(rdatalist->rdata, rdata, link);
1040         RUNTIME_CHECK(dns_rdatalist_tordataset(rdatalist, rdataset) == ISC_R_SUCCESS);
1041
1042         return (dns_message_setopt(message, rdataset));
1043 }
1044
1045 static inline void
1046 fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
1047         unsigned int seconds;
1048         unsigned int us;
1049
1050         /*
1051          * We retry every .5 seconds the first two times through the address
1052          * list, and then we do exponential back-off.
1053          */
1054         if (fctx->restarts < 3)
1055                 us = 800000;
1056         else
1057                 us = (800000 << (fctx->restarts - 2));
1058
1059         /*
1060          * Double the round-trip time.
1061          */
1062         rtt *= 2;
1063
1064         /*
1065          * Always wait for at least the doubled round-trip time.
1066          */
1067         if (us < rtt)
1068                 us = rtt;
1069
1070         /*
1071          * But don't ever wait for more than 10 seconds.
1072          */
1073         if (us > 10000000)
1074                 us = 10000000;
1075
1076         seconds = us / 1000000;
1077         us -= seconds * 1000000;
1078         isc_interval_set(&fctx->interval, seconds, us * 1000);
1079 }
1080
1081 static isc_result_t
1082 fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
1083            unsigned int options)
1084 {
1085         dns_resolver_t *res;
1086         isc_task_t *task;
1087         isc_result_t result;
1088         resquery_t *query;
1089         isc_sockaddr_t addr;
1090         isc_boolean_t have_addr = ISC_FALSE;
1091
1092         FCTXTRACE("query");
1093
1094         res = fctx->res;
1095         task = res->buckets[fctx->bucketnum].task;
1096
1097         fctx_setretryinterval(fctx, addrinfo->srtt);
1098         result = fctx_startidletimer(fctx);
1099         if (result != ISC_R_SUCCESS)
1100                 return (result);
1101
1102         INSIST(ISC_LIST_EMPTY(fctx->validators));
1103
1104         dns_message_reset(fctx->rmessage, DNS_MESSAGE_INTENTPARSE);
1105
1106         query = isc_mem_get(res->buckets[fctx->bucketnum].mctx,
1107                             sizeof(*query));
1108         if (query == NULL) {
1109                 result = ISC_R_NOMEMORY;
1110                 goto stop_idle_timer;
1111         }
1112         query->mctx = res->buckets[fctx->bucketnum].mctx;
1113         query->options = options;
1114         query->attributes = 0;
1115         query->sends = 0;
1116         query->connects = 0;
1117         /*
1118          * Note that the caller MUST guarantee that 'addrinfo' will remain
1119          * valid until this query is canceled.
1120          */
1121         query->addrinfo = addrinfo;
1122         TIME_NOW(&query->start);
1123
1124         /*
1125          * If this is a TCP query, then we need to make a socket and
1126          * a dispatch for it here.  Otherwise we use the resolver's
1127          * shared dispatch.
1128          */
1129         query->dispatchmgr = res->dispatchmgr;
1130         query->dispatch = NULL;
1131         query->exclusivesocket = ISC_FALSE;
1132         query->tcpsocket = NULL;
1133         if (res->view->peers != NULL) {
1134                 dns_peer_t *peer = NULL;
1135                 isc_netaddr_t dstip;
1136                 isc_netaddr_fromsockaddr(&dstip, &addrinfo->sockaddr);
1137                 result = dns_peerlist_peerbyaddr(res->view->peers,
1138                                                  &dstip, &peer);
1139                 if (result == ISC_R_SUCCESS) {
1140                         result = dns_peer_getquerysource(peer, &addr);
1141                         if (result == ISC_R_SUCCESS)
1142                                 have_addr = ISC_TRUE;
1143                 }
1144         }
1145
1146         if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1147                 int pf;
1148
1149                 pf = isc_sockaddr_pf(&addrinfo->sockaddr);
1150                 if (!have_addr) {
1151                         switch (pf) {
1152                         case PF_INET:
1153                                 result =
1154                                   dns_dispatch_getlocaladdress(res->dispatchv4,
1155                                                                &addr);
1156                                 break;
1157                         case PF_INET6:
1158                                 result =
1159                                   dns_dispatch_getlocaladdress(res->dispatchv6,
1160                                                                &addr);
1161                                 break;
1162                         default:
1163                                 result = ISC_R_NOTIMPLEMENTED;
1164                                 break;
1165                         }
1166                         if (result != ISC_R_SUCCESS)
1167                                 goto cleanup_query;
1168                 }
1169                 isc_sockaddr_setport(&addr, 0);
1170
1171                 result = isc_socket_create(res->socketmgr, pf,
1172                                            isc_sockettype_tcp,
1173                                            &query->tcpsocket);
1174                 if (result != ISC_R_SUCCESS)
1175                         goto cleanup_query;
1176
1177 #ifndef BROKEN_TCP_BIND_BEFORE_CONNECT
1178                 result = isc_socket_bind(query->tcpsocket, &addr, 0);
1179                 if (result != ISC_R_SUCCESS)
1180                         goto cleanup_socket;
1181 #endif
1182
1183                 /*
1184                  * A dispatch will be created once the connect succeeds.
1185                  */
1186         } else {
1187                 if (have_addr) {
1188                         unsigned int attrs, attrmask;
1189                         attrs = DNS_DISPATCHATTR_UDP;
1190                         switch (isc_sockaddr_pf(&addr)) {
1191                         case AF_INET:
1192                                 attrs |= DNS_DISPATCHATTR_IPV4;
1193                                 break;
1194                         case AF_INET6:
1195                                 attrs |= DNS_DISPATCHATTR_IPV6;
1196                                 break;
1197                         default:
1198                                 result = ISC_R_NOTIMPLEMENTED;
1199                                 goto cleanup_query;
1200                         }
1201                         attrmask = DNS_DISPATCHATTR_UDP;
1202                         attrmask |= DNS_DISPATCHATTR_TCP;
1203                         attrmask |= DNS_DISPATCHATTR_IPV4;
1204                         attrmask |= DNS_DISPATCHATTR_IPV6;
1205                         result = dns_dispatch_getudp(res->dispatchmgr,
1206                                                      res->socketmgr,
1207                                                      res->taskmgr, &addr,
1208                                                      4096, 1000, 32768, 16411,
1209                                                      16433, attrs, attrmask,
1210                                                      &query->dispatch);
1211                         if (result != ISC_R_SUCCESS)
1212                                 goto cleanup_query;
1213                 } else {
1214                         switch (isc_sockaddr_pf(&addrinfo->sockaddr)) {
1215                         case PF_INET:
1216                                 dns_dispatch_attach(res->dispatchv4,
1217                                                     &query->dispatch);
1218                                 query->exclusivesocket = res->exclusivev4;
1219                                 break;
1220                         case PF_INET6:
1221                                 dns_dispatch_attach(res->dispatchv6,
1222                                                     &query->dispatch);
1223                                 query->exclusivesocket = res->exclusivev6;
1224                                 break;
1225                         default:
1226                                 result = ISC_R_NOTIMPLEMENTED;
1227                                 goto cleanup_query;
1228                         }
1229                 }
1230                 /*
1231                  * We should always have a valid dispatcher here.  If we
1232                  * don't support a protocol family, then its dispatcher
1233                  * will be NULL, but we shouldn't be finding addresses for
1234                  * protocol types we don't support, so the dispatcher
1235                  * we found should never be NULL.
1236                  */
1237                 INSIST(query->dispatch != NULL);
1238         }
1239
1240         query->dispentry = NULL;
1241         query->fctx = fctx;
1242         query->tsig = NULL;
1243         query->tsigkey = NULL;
1244         ISC_LINK_INIT(query, link);
1245         query->magic = QUERY_MAGIC;
1246
1247         if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1248                 /*
1249                  * Connect to the remote server.
1250                  *
1251                  * XXXRTH  Should we attach to the socket?
1252                  */
1253                 result = isc_socket_connect(query->tcpsocket,
1254                                             &addrinfo->sockaddr, task,
1255                                             resquery_connected, query);
1256                 if (result != ISC_R_SUCCESS)
1257                         goto cleanup_socket;
1258                 query->connects++;
1259                 QTRACE("connecting via TCP");
1260         } else {
1261                 result = resquery_send(query);
1262                 if (result != ISC_R_SUCCESS)
1263                         goto cleanup_dispatch;
1264         }
1265
1266         ISC_LIST_APPEND(fctx->queries, query, link);
1267         query->fctx->nqueries++;
1268
1269         return (ISC_R_SUCCESS);
1270
1271  cleanup_socket:
1272         isc_socket_detach(&query->tcpsocket);
1273
1274  cleanup_dispatch:
1275         if (query->dispatch != NULL)
1276                 dns_dispatch_detach(&query->dispatch);
1277
1278  cleanup_query:
1279         query->magic = 0;
1280         isc_mem_put(res->buckets[fctx->bucketnum].mctx,
1281                     query, sizeof(*query));
1282
1283  stop_idle_timer:
1284         RUNTIME_CHECK(fctx_stopidletimer(fctx) == ISC_R_SUCCESS);
1285
1286         return (result);
1287 }
1288
1289 static isc_boolean_t
1290 triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1291         isc_sockaddr_t *sa;
1292
1293         for (sa = ISC_LIST_HEAD(fctx->edns);
1294              sa != NULL;
1295              sa = ISC_LIST_NEXT(sa, link)) {
1296                 if (isc_sockaddr_equal(sa, address))
1297                         return (ISC_TRUE);
1298         }
1299
1300         return (ISC_FALSE);
1301 }
1302
1303 static void
1304 add_triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1305         isc_sockaddr_t *sa;
1306
1307         if (triededns(fctx, address))
1308                 return;
1309
1310         sa = isc_mem_get(fctx->res->buckets[fctx->bucketnum].mctx,
1311                          sizeof(*sa));
1312         if (sa == NULL)
1313                 return;
1314
1315         *sa = *address;
1316         ISC_LIST_INITANDAPPEND(fctx->edns, sa, link);
1317 }
1318
1319 static isc_boolean_t
1320 triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1321         isc_sockaddr_t *sa;
1322
1323         for (sa = ISC_LIST_HEAD(fctx->edns512);
1324              sa != NULL;
1325              sa = ISC_LIST_NEXT(sa, link)) {
1326                 if (isc_sockaddr_equal(sa, address))
1327                         return (ISC_TRUE);
1328         }
1329
1330         return (ISC_FALSE);
1331 }
1332
1333 static void
1334 add_triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1335         isc_sockaddr_t *sa;
1336
1337         if (triededns512(fctx, address))
1338                 return;
1339
1340         sa = isc_mem_get(fctx->res->buckets[fctx->bucketnum].mctx,
1341                          sizeof(*sa));
1342         if (sa == NULL)
1343                 return;
1344
1345         *sa = *address;
1346         ISC_LIST_INITANDAPPEND(fctx->edns512, sa, link);
1347 }
1348
1349 static isc_result_t
1350 resquery_send(resquery_t *query) {
1351         fetchctx_t *fctx;
1352         isc_result_t result;
1353         dns_name_t *qname = NULL;
1354         dns_rdataset_t *qrdataset = NULL;
1355         isc_region_t r;
1356         dns_resolver_t *res;
1357         isc_task_t *task;
1358         isc_socket_t *socket;
1359         isc_buffer_t tcpbuffer;
1360         isc_sockaddr_t *address;
1361         isc_buffer_t *buffer;
1362         isc_netaddr_t ipaddr;
1363         dns_tsigkey_t *tsigkey = NULL;
1364         dns_peer_t *peer = NULL;
1365         isc_boolean_t useedns;
1366         dns_compress_t cctx;
1367         isc_boolean_t cleanup_cctx = ISC_FALSE;
1368         isc_boolean_t secure_domain;
1369
1370         fctx = query->fctx;
1371         QTRACE("send");
1372
1373         res = fctx->res;
1374         task = res->buckets[fctx->bucketnum].task;
1375         address = NULL;
1376
1377         if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1378                 /*
1379                  * Reserve space for the TCP message length.
1380                  */
1381                 isc_buffer_init(&tcpbuffer, query->data, sizeof(query->data));
1382                 isc_buffer_init(&query->buffer, query->data + 2,
1383                                 sizeof(query->data) - 2);
1384                 buffer = &tcpbuffer;
1385         } else {
1386                 isc_buffer_init(&query->buffer, query->data,
1387                                 sizeof(query->data));
1388                 buffer = &query->buffer;
1389         }
1390
1391         result = dns_message_gettempname(fctx->qmessage, &qname);
1392         if (result != ISC_R_SUCCESS)
1393                 goto cleanup_temps;
1394         result = dns_message_gettemprdataset(fctx->qmessage, &qrdataset);
1395         if (result != ISC_R_SUCCESS)
1396                 goto cleanup_temps;
1397
1398         /*
1399          * Get a query id from the dispatch.
1400          */
1401         result = dns_dispatch_addresponse2(query->dispatch,
1402                                            &query->addrinfo->sockaddr,
1403                                            task,
1404                                            resquery_response,
1405                                            query,
1406                                            &query->id,
1407                                            &query->dispentry,
1408                                            res->socketmgr);
1409         if (result != ISC_R_SUCCESS)
1410                 goto cleanup_temps;
1411
1412         fctx->qmessage->opcode = dns_opcode_query;
1413
1414         /*
1415          * Set up question.
1416          */
1417         dns_name_init(qname, NULL);
1418         dns_name_clone(&fctx->name, qname);
1419         dns_rdataset_init(qrdataset);
1420         dns_rdataset_makequestion(qrdataset, res->rdclass, fctx->type);
1421         ISC_LIST_APPEND(qname->list, qrdataset, link);
1422         dns_message_addname(fctx->qmessage, qname, DNS_SECTION_QUESTION);
1423         qname = NULL;
1424         qrdataset = NULL;
1425
1426         /*
1427          * Set RD if the client has requested that we do a recursive query,
1428          * or if we're sending to a forwarder.
1429          */
1430         if ((query->options & DNS_FETCHOPT_RECURSIVE) != 0 ||
1431             ISFORWARDER(query->addrinfo))
1432                 fctx->qmessage->flags |= DNS_MESSAGEFLAG_RD;
1433
1434         /*
1435          * Set CD if the client says don't validate or the question is
1436          * under a secure entry point.
1437          */
1438         if ((query->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
1439                 fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1440         } else if (res->view->enablevalidation) {
1441                 result = dns_keytable_issecuredomain(res->view->secroots,
1442                                                      &fctx->name,
1443                                                      &secure_domain);
1444                 if (result != ISC_R_SUCCESS)
1445                         secure_domain = ISC_FALSE;
1446                 if (res->view->dlv != NULL)
1447                         secure_domain = ISC_TRUE;
1448                 if (secure_domain)
1449                         fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1450         }
1451
1452         /*
1453          * We don't have to set opcode because it defaults to query.
1454          */
1455         fctx->qmessage->id = query->id;
1456
1457         /*
1458          * Convert the question to wire format.
1459          */
1460         result = dns_compress_init(&cctx, -1, fctx->res->mctx);
1461         if (result != ISC_R_SUCCESS)
1462                 goto cleanup_message;
1463         cleanup_cctx = ISC_TRUE;
1464
1465         result = dns_message_renderbegin(fctx->qmessage, &cctx,
1466                                          &query->buffer);
1467         if (result != ISC_R_SUCCESS)
1468                 goto cleanup_message;
1469
1470         result = dns_message_rendersection(fctx->qmessage,
1471                                            DNS_SECTION_QUESTION, 0);
1472         if (result != ISC_R_SUCCESS)
1473                 goto cleanup_message;
1474
1475         peer = NULL;
1476         isc_netaddr_fromsockaddr(&ipaddr, &query->addrinfo->sockaddr);
1477         (void) dns_peerlist_peerbyaddr(fctx->res->view->peers, &ipaddr, &peer);
1478
1479         /*
1480          * The ADB does not know about servers with "edns no".  Check this,
1481          * and then inform the ADB for future use.
1482          */
1483         if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0 &&
1484             peer != NULL &&
1485             dns_peer_getsupportedns(peer, &useedns) == ISC_R_SUCCESS &&
1486             !useedns)
1487         {
1488                 query->options |= DNS_FETCHOPT_NOEDNS0;
1489                 dns_adb_changeflags(fctx->adb,
1490                                     query->addrinfo,
1491                                     DNS_FETCHOPT_NOEDNS0,
1492                                     DNS_FETCHOPT_NOEDNS0);
1493         }
1494
1495         /*
1496          * Use EDNS0, unless the caller doesn't want it, or we know that
1497          * the remote server doesn't like it.
1498          */
1499
1500         if ((triededns512(fctx, &query->addrinfo->sockaddr) ||
1501              fctx->timeouts >= (MAX_EDNS0_TIMEOUTS * 2)) &&
1502             (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1503                 query->options |= DNS_FETCHOPT_NOEDNS0;
1504                 FCTXTRACE("too many timeouts, disabling EDNS0");
1505         } else if ((triededns(fctx, &query->addrinfo->sockaddr) ||
1506                     fctx->timeouts >= MAX_EDNS0_TIMEOUTS) &&
1507                    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1508                 query->options |= DNS_FETCHOPT_EDNS512;
1509                 FCTXTRACE("too many timeouts, setting EDNS size to 512");
1510         }
1511
1512         if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1513                 if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0) {
1514                         unsigned int version = 0;       /* Default version. */
1515                         unsigned int flags;
1516                         isc_uint16_t udpsize = res->udpsize;
1517
1518                         flags = query->addrinfo->flags;
1519                         if ((flags & DNS_FETCHOPT_EDNSVERSIONSET) != 0) {
1520                                 version = flags & DNS_FETCHOPT_EDNSVERSIONMASK;
1521                                 version >>= DNS_FETCHOPT_EDNSVERSIONSHIFT;
1522                         }
1523                         if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1524                                 udpsize = 512;
1525                         else if (peer != NULL)
1526                                 (void)dns_peer_getudpsize(peer, &udpsize);
1527                         result = fctx_addopt(fctx->qmessage, version, udpsize);
1528                         if (result != ISC_R_SUCCESS) {
1529                                 /*
1530                                  * We couldn't add the OPT, but we'll press on.
1531                                  * We're not using EDNS0, so set the NOEDNS0
1532                                  * bit.
1533                                  */
1534                                 query->options |= DNS_FETCHOPT_NOEDNS0;
1535                         }
1536                 } else {
1537                         /*
1538                          * We know this server doesn't like EDNS0, so we
1539                          * won't use it.  Set the NOEDNS0 bit since we're
1540                          * not using EDNS0.
1541                          */
1542                         query->options |= DNS_FETCHOPT_NOEDNS0;
1543                 }
1544         }
1545
1546         /*
1547          * If we need EDNS0 to do this query and aren't using it, we lose.
1548          */
1549         if (NEEDEDNS0(fctx) && (query->options & DNS_FETCHOPT_NOEDNS0) != 0) {
1550                 result = DNS_R_SERVFAIL;
1551                 goto cleanup_message;
1552         }
1553
1554         if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0)
1555                 add_triededns(fctx, &query->addrinfo->sockaddr);
1556
1557         if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1558                 add_triededns512(fctx, &query->addrinfo->sockaddr);
1559
1560         /*
1561          * Clear CD if EDNS is not in use.
1562          */
1563         if ((query->options & DNS_FETCHOPT_NOEDNS0) != 0)
1564                 fctx->qmessage->flags &= ~DNS_MESSAGEFLAG_CD;
1565
1566         /*
1567          * Add TSIG record tailored to the current recipient.
1568          */
1569         result = dns_view_getpeertsig(fctx->res->view, &ipaddr, &tsigkey);
1570         if (result != ISC_R_SUCCESS && result != ISC_R_NOTFOUND)
1571                 goto cleanup_message;
1572
1573         if (tsigkey != NULL) {
1574                 result = dns_message_settsigkey(fctx->qmessage, tsigkey);
1575                 dns_tsigkey_detach(&tsigkey);
1576                 if (result != ISC_R_SUCCESS)
1577                         goto cleanup_message;
1578         }
1579
1580         result = dns_message_rendersection(fctx->qmessage,
1581                                            DNS_SECTION_ADDITIONAL, 0);
1582         if (result != ISC_R_SUCCESS)
1583                 goto cleanup_message;
1584
1585         result = dns_message_renderend(fctx->qmessage);
1586         if (result != ISC_R_SUCCESS)
1587                 goto cleanup_message;
1588
1589         dns_compress_invalidate(&cctx);
1590         cleanup_cctx = ISC_FALSE;
1591
1592         if (dns_message_gettsigkey(fctx->qmessage) != NULL) {
1593                 dns_tsigkey_attach(dns_message_gettsigkey(fctx->qmessage),
1594                                    &query->tsigkey);
1595                 result = dns_message_getquerytsig(fctx->qmessage,
1596                                                   fctx->res->mctx,
1597                                                   &query->tsig);
1598                 if (result != ISC_R_SUCCESS)
1599                         goto cleanup_message;
1600         }
1601
1602         /*
1603          * If using TCP, write the length of the message at the beginning
1604          * of the buffer.
1605          */
1606         if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1607                 isc_buffer_usedregion(&query->buffer, &r);
1608                 isc_buffer_putuint16(&tcpbuffer, (isc_uint16_t)r.length);
1609                 isc_buffer_add(&tcpbuffer, r.length);
1610         }
1611
1612         /*
1613          * We're now done with the query message.
1614          */
1615         dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
1616
1617         if (query->exclusivesocket)
1618                 socket = dns_dispatch_getentrysocket(query->dispentry);
1619         else
1620                 socket = dns_dispatch_getsocket(query->dispatch);
1621         /*
1622          * Send the query!
1623          */
1624         if ((query->options & DNS_FETCHOPT_TCP) == 0) {
1625                 address = &query->addrinfo->sockaddr;
1626                 if (query->exclusivesocket) {
1627                         result = isc_socket_connect(socket, address, task,
1628                                                     resquery_udpconnected,
1629                                                     query);
1630                         if (result != ISC_R_SUCCESS)
1631                                 goto cleanup_message;
1632                         query->connects++;
1633                 }
1634         }
1635         isc_buffer_usedregion(buffer, &r);
1636
1637         /*
1638          * XXXRTH  Make sure we don't send to ourselves!  We should probably
1639          *         prune out these addresses when we get them from the ADB.
1640          */
1641         result = isc_socket_sendto(socket, &r, task, resquery_senddone,
1642                                    query, address, NULL);
1643         if (result != ISC_R_SUCCESS)
1644                 goto cleanup_message;
1645         query->sends++;
1646         QTRACE("sent");
1647
1648         return (ISC_R_SUCCESS);
1649
1650  cleanup_message:
1651         if (cleanup_cctx)
1652                 dns_compress_invalidate(&cctx);
1653
1654         dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
1655
1656         /*
1657          * Stop the dispatcher from listening.
1658          */
1659         dns_dispatch_removeresponse(&query->dispentry, NULL);
1660
1661  cleanup_temps:
1662         if (qname != NULL)
1663                 dns_message_puttempname(fctx->qmessage, &qname);
1664         if (qrdataset != NULL)
1665                 dns_message_puttemprdataset(fctx->qmessage, &qrdataset);
1666
1667         return (result);
1668 }
1669
1670 static void
1671 resquery_connected(isc_task_t *task, isc_event_t *event) {
1672         isc_socketevent_t *sevent = (isc_socketevent_t *)event;
1673         resquery_t *query = event->ev_arg;
1674         isc_boolean_t retry = ISC_FALSE;
1675         isc_result_t result;
1676         unsigned int attrs;
1677         fetchctx_t *fctx;
1678
1679         REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
1680         REQUIRE(VALID_QUERY(query));
1681
1682         QTRACE("connected");
1683
1684         UNUSED(task);
1685
1686         /*
1687          * XXXRTH
1688          *
1689          * Currently we don't wait for the connect event before retrying
1690          * a query.  This means that if we get really behind, we may end
1691          * up doing extra work!
1692          */
1693
1694         query->connects--;
1695         fctx = query->fctx;
1696
1697         if (RESQUERY_CANCELED(query)) {
1698                 /*
1699                  * This query was canceled while the connect() was in
1700                  * progress.
1701                  */
1702                 isc_socket_detach(&query->tcpsocket);
1703                 resquery_destroy(&query);
1704         } else {
1705                 switch (sevent->result) {
1706                 case ISC_R_SUCCESS:
1707                         /*
1708                          * We are connected.  Create a dispatcher and
1709                          * send the query.
1710                          */
1711                         attrs = 0;
1712                         attrs |= DNS_DISPATCHATTR_TCP;
1713                         attrs |= DNS_DISPATCHATTR_PRIVATE;
1714                         attrs |= DNS_DISPATCHATTR_CONNECTED;
1715                         if (isc_sockaddr_pf(&query->addrinfo->sockaddr) ==
1716                             AF_INET)
1717                                 attrs |= DNS_DISPATCHATTR_IPV4;
1718                         else
1719                                 attrs |= DNS_DISPATCHATTR_IPV6;
1720                         attrs |= DNS_DISPATCHATTR_MAKEQUERY;
1721
1722                         result = dns_dispatch_createtcp(query->dispatchmgr,
1723                                                      query->tcpsocket,
1724                                                      query->fctx->res->taskmgr,
1725                                                      4096, 2, 1, 1, 3, attrs,
1726                                                      &query->dispatch);
1727
1728                         /*
1729                          * Regardless of whether dns_dispatch_create()
1730                          * succeeded or not, we don't need our reference
1731                          * to the socket anymore.
1732                          */
1733                         isc_socket_detach(&query->tcpsocket);
1734
1735                         if (result == ISC_R_SUCCESS)
1736                                 result = resquery_send(query);
1737
1738                         if (result != ISC_R_SUCCESS) {
1739                                 fctx_cancelquery(&query, NULL, NULL,
1740                                                  ISC_FALSE);
1741                                 fctx_done(fctx, result);
1742                         }
1743                         break;
1744
1745                 case ISC_R_NETUNREACH:
1746                 case ISC_R_HOSTUNREACH:
1747                 case ISC_R_CONNREFUSED:
1748                 case ISC_R_NOPERM:
1749                 case ISC_R_ADDRNOTAVAIL:
1750                 case ISC_R_CONNECTIONRESET:
1751                         /*
1752                          * No route to remote.
1753                          */
1754                         isc_socket_detach(&query->tcpsocket);
1755                         fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
1756                         retry = ISC_TRUE;
1757                         break;
1758
1759                 default:
1760                         isc_socket_detach(&query->tcpsocket);
1761                         fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
1762                         break;
1763                 }
1764         }
1765
1766         isc_event_free(&event);
1767
1768         if (retry) {
1769                 /*
1770                  * Behave as if the idle timer has expired.  For TCP
1771                  * connections this may not actually reflect the latest timer.
1772                  */
1773                 fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1774                 result = fctx_stopidletimer(fctx);
1775                 if (result != ISC_R_SUCCESS)
1776                         fctx_done(fctx, result);
1777                 else
1778                         fctx_try(fctx);
1779         }
1780 }
1781
1782 static void
1783 fctx_finddone(isc_task_t *task, isc_event_t *event) {
1784         fetchctx_t *fctx;
1785         dns_adbfind_t *find;
1786         dns_resolver_t *res;
1787         isc_boolean_t want_try = ISC_FALSE;
1788         isc_boolean_t want_done = ISC_FALSE;
1789         isc_boolean_t bucket_empty = ISC_FALSE;
1790         unsigned int bucketnum;
1791
1792         find = event->ev_sender;
1793         fctx = event->ev_arg;
1794         REQUIRE(VALID_FCTX(fctx));
1795         res = fctx->res;
1796
1797         UNUSED(task);
1798
1799         FCTXTRACE("finddone");
1800
1801         INSIST(fctx->pending > 0);
1802         fctx->pending--;
1803
1804         if (ADDRWAIT(fctx)) {
1805                 /*
1806                  * The fetch is waiting for a name to be found.
1807                  */
1808                 INSIST(!SHUTTINGDOWN(fctx));
1809                 fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1810                 if (event->ev_type == DNS_EVENT_ADBMOREADDRESSES)
1811                         want_try = ISC_TRUE;
1812                 else if (fctx->pending == 0) {
1813                         /*
1814                          * We've got nothing else to wait for and don't
1815                          * know the answer.  There's nothing to do but
1816                          * fail the fctx.
1817                          */
1818                         want_done = ISC_TRUE;
1819                 }
1820         } else if (SHUTTINGDOWN(fctx) && fctx->pending == 0 &&
1821                    fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) {
1822                 bucketnum = fctx->bucketnum;
1823                 LOCK(&res->buckets[bucketnum].lock);
1824                 /*
1825                  * Note that we had to wait until we had the lock before
1826                  * looking at fctx->references.
1827                  */
1828                 if (fctx->references == 0)
1829                         bucket_empty = fctx_destroy(fctx);
1830                 UNLOCK(&res->buckets[bucketnum].lock);
1831         }
1832
1833         isc_event_free(&event);
1834         dns_adb_destroyfind(&find);
1835
1836         if (want_try)
1837                 fctx_try(fctx);
1838         else if (want_done)
1839                 fctx_done(fctx, ISC_R_FAILURE);
1840         else if (bucket_empty)
1841                 empty_bucket(res);
1842 }
1843
1844
1845 static inline isc_boolean_t
1846 bad_server(fetchctx_t *fctx, isc_sockaddr_t *address) {
1847         isc_sockaddr_t *sa;
1848
1849         for (sa = ISC_LIST_HEAD(fctx->bad);
1850              sa != NULL;
1851              sa = ISC_LIST_NEXT(sa, link)) {
1852                 if (isc_sockaddr_equal(sa, address))
1853                         return (ISC_TRUE);
1854         }
1855
1856         return (ISC_FALSE);
1857 }
1858
1859 static inline isc_boolean_t
1860 mark_bad(fetchctx_t *fctx) {
1861         dns_adbfind_t *curr;
1862         dns_adbaddrinfo_t *addrinfo;
1863         isc_boolean_t all_bad = ISC_TRUE;
1864
1865         /*
1866          * Mark all known bad servers, so we don't try to talk to them
1867          * again.
1868          */
1869
1870         /*
1871          * Mark any bad nameservers.
1872          */
1873         for (curr = ISC_LIST_HEAD(fctx->finds);
1874              curr != NULL;
1875              curr = ISC_LIST_NEXT(curr, publink)) {
1876                 for (addrinfo = ISC_LIST_HEAD(curr->list);
1877                      addrinfo != NULL;
1878                      addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
1879                         if (bad_server(fctx, &addrinfo->sockaddr))
1880                                 addrinfo->flags |= FCTX_ADDRINFO_MARK;
1881                         else
1882                                 all_bad = ISC_FALSE;
1883                 }
1884         }
1885
1886         /*
1887          * Mark any bad forwarders.
1888          */
1889         for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
1890              addrinfo != NULL;
1891              addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
1892                 if (bad_server(fctx, &addrinfo->sockaddr))
1893                         addrinfo->flags |= FCTX_ADDRINFO_MARK;
1894                 else
1895                         all_bad = ISC_FALSE;
1896         }
1897
1898         /*
1899          * Mark any bad alternates.
1900          */
1901         for (curr = ISC_LIST_HEAD(fctx->altfinds);
1902              curr != NULL;
1903              curr = ISC_LIST_NEXT(curr, publink)) {
1904                 for (addrinfo = ISC_LIST_HEAD(curr->list);
1905                      addrinfo != NULL;
1906                      addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
1907                         if (bad_server(fctx, &addrinfo->sockaddr))
1908                                 addrinfo->flags |= FCTX_ADDRINFO_MARK;
1909                         else
1910                                 all_bad = ISC_FALSE;
1911                 }
1912         }
1913
1914         for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
1915              addrinfo != NULL;
1916              addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
1917                 if (bad_server(fctx, &addrinfo->sockaddr))
1918                         addrinfo->flags |= FCTX_ADDRINFO_MARK;
1919                 else
1920                         all_bad = ISC_FALSE;
1921         }
1922
1923         return (all_bad);
1924 }
1925
1926 static void
1927 add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_result_t reason) {
1928         char namebuf[DNS_NAME_FORMATSIZE];
1929         char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1930         char classbuf[64];
1931         char typebuf[64];
1932         char code[64];
1933         isc_buffer_t b;
1934         isc_sockaddr_t *sa;
1935         const char *sep1, *sep2;
1936         isc_sockaddr_t *address = &addrinfo->sockaddr;
1937
1938         if (bad_server(fctx, address)) {
1939                 /*
1940                  * We already know this server is bad.
1941                  */
1942                 return;
1943         }
1944
1945         FCTXTRACE("add_bad");
1946
1947         sa = isc_mem_get(fctx->res->buckets[fctx->bucketnum].mctx,
1948                          sizeof(*sa));
1949         if (sa == NULL)
1950                 return;
1951         *sa = *address;
1952         ISC_LIST_INITANDAPPEND(fctx->bad, sa, link);
1953
1954         if (reason == DNS_R_LAME)       /* already logged */
1955                 return;
1956
1957         if (reason == DNS_R_UNEXPECTEDRCODE &&
1958             fctx->rmessage->rcode == dns_rcode_servfail &&
1959             ISFORWARDER(addrinfo))
1960                 return;
1961
1962         if (reason == DNS_R_UNEXPECTEDRCODE) {
1963                 isc_buffer_init(&b, code, sizeof(code) - 1);
1964                 dns_rcode_totext(fctx->rmessage->rcode, &b);
1965                 code[isc_buffer_usedlength(&b)] = '\0';
1966                 sep1 = "(";
1967                 sep2 = ") ";
1968         } else if (reason == DNS_R_UNEXPECTEDOPCODE) {
1969                 isc_buffer_init(&b, code, sizeof(code) - 1);
1970                 dns_opcode_totext((dns_opcode_t)fctx->rmessage->opcode, &b);
1971                 code[isc_buffer_usedlength(&b)] = '\0';
1972                 sep1 = "(";
1973                 sep2 = ") ";
1974         } else {
1975                 code[0] = '\0';
1976                 sep1 = "";
1977                 sep2 = "";
1978         }
1979         dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
1980         dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
1981         dns_rdataclass_format(fctx->res->rdclass, classbuf, sizeof(classbuf));
1982         isc_sockaddr_format(address, addrbuf, sizeof(addrbuf));
1983         isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
1984                       DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1985                       "%s %s%s%sresolving '%s/%s/%s': %s",
1986                       dns_result_totext(reason), sep1, code, sep2,
1987                       namebuf, typebuf, classbuf, addrbuf);
1988 }
1989
1990 static void
1991 sort_adbfind(dns_adbfind_t *find) {
1992         dns_adbaddrinfo_t *best, *curr;
1993         dns_adbaddrinfolist_t sorted;
1994
1995         /*
1996          * Lame N^2 bubble sort.
1997          */
1998
1999         ISC_LIST_INIT(sorted);
2000         while (!ISC_LIST_EMPTY(find->list)) {
2001                 best = ISC_LIST_HEAD(find->list);
2002                 curr = ISC_LIST_NEXT(best, publink);
2003                 while (curr != NULL) {
2004                         if (curr->srtt < best->srtt)
2005                                 best = curr;
2006                         curr = ISC_LIST_NEXT(curr, publink);
2007                 }
2008                 ISC_LIST_UNLINK(find->list, best, publink);
2009                 ISC_LIST_APPEND(sorted, best, publink);
2010         }
2011         find->list = sorted;
2012 }
2013
2014 static void
2015 sort_finds(fetchctx_t *fctx) {
2016         dns_adbfind_t *best, *curr;
2017         dns_adbfindlist_t sorted;
2018         dns_adbaddrinfo_t *addrinfo, *bestaddrinfo;
2019
2020         /*
2021          * Lame N^2 bubble sort.
2022          */
2023
2024         ISC_LIST_INIT(sorted);
2025         while (!ISC_LIST_EMPTY(fctx->finds)) {
2026                 best = ISC_LIST_HEAD(fctx->finds);
2027                 bestaddrinfo = ISC_LIST_HEAD(best->list);
2028                 INSIST(bestaddrinfo != NULL);
2029                 curr = ISC_LIST_NEXT(best, publink);
2030                 while (curr != NULL) {
2031                         addrinfo = ISC_LIST_HEAD(curr->list);
2032                         INSIST(addrinfo != NULL);
2033                         if (addrinfo->srtt < bestaddrinfo->srtt) {
2034                                 best = curr;
2035                                 bestaddrinfo = addrinfo;
2036                         }
2037                         curr = ISC_LIST_NEXT(curr, publink);
2038                 }
2039                 ISC_LIST_UNLINK(fctx->finds, best, publink);
2040                 ISC_LIST_APPEND(sorted, best, publink);
2041         }
2042         fctx->finds = sorted;
2043
2044         ISC_LIST_INIT(sorted);
2045         while (!ISC_LIST_EMPTY(fctx->altfinds)) {
2046                 best = ISC_LIST_HEAD(fctx->altfinds);
2047                 bestaddrinfo = ISC_LIST_HEAD(best->list);
2048                 INSIST(bestaddrinfo != NULL);
2049                 curr = ISC_LIST_NEXT(best, publink);
2050                 while (curr != NULL) {
2051                         addrinfo = ISC_LIST_HEAD(curr->list);
2052                         INSIST(addrinfo != NULL);
2053                         if (addrinfo->srtt < bestaddrinfo->srtt) {
2054                                 best = curr;
2055                                 bestaddrinfo = addrinfo;
2056                         }
2057                         curr = ISC_LIST_NEXT(curr, publink);
2058                 }
2059                 ISC_LIST_UNLINK(fctx->altfinds, best, publink);
2060                 ISC_LIST_APPEND(sorted, best, publink);
2061         }
2062         fctx->altfinds = sorted;
2063 }
2064
2065 static void
2066 findname(fetchctx_t *fctx, dns_name_t *name, in_port_t port,
2067          unsigned int options, unsigned int flags, isc_stdtime_t now,
2068          isc_boolean_t *need_alternate)
2069 {
2070         dns_adbaddrinfo_t *ai;
2071         dns_adbfind_t *find;
2072         dns_resolver_t *res;
2073         isc_boolean_t unshared;
2074         isc_result_t result;
2075
2076         res = fctx->res;
2077         unshared = ISC_TF((fctx->options | DNS_FETCHOPT_UNSHARED) != 0);
2078         /*
2079          * If this name is a subdomain of the query domain, tell
2080          * the ADB to start looking using zone/hint data. This keeps us
2081          * from getting stuck if the nameserver is beneath the zone cut
2082          * and we don't know its address (e.g. because the A record has
2083          * expired).
2084          */
2085         if (dns_name_issubdomain(name, &fctx->domain))
2086                 options |= DNS_ADBFIND_STARTATZONE;
2087         options |= DNS_ADBFIND_GLUEOK;
2088         options |= DNS_ADBFIND_HINTOK;
2089
2090         /*
2091          * See what we know about this address.
2092          */
2093         find = NULL;
2094         result = dns_adb_createfind(fctx->adb,
2095                                     res->buckets[fctx->bucketnum].task,
2096                                     fctx_finddone, fctx, name,
2097                                     &fctx->name, fctx->type,
2098                                     options, now, NULL,
2099                                     res->view->dstport, &find);
2100         if (result != ISC_R_SUCCESS) {
2101                 if (result == DNS_R_ALIAS) {
2102                         /*
2103                          * XXXRTH  Follow the CNAME/DNAME chain?
2104                          */
2105                         dns_adb_destroyfind(&find);
2106                 }
2107         } else if (!ISC_LIST_EMPTY(find->list)) {
2108                 /*
2109                  * We have at least some of the addresses for the
2110                  * name.
2111                  */
2112                 INSIST((find->options & DNS_ADBFIND_WANTEVENT) == 0);
2113                 sort_adbfind(find);
2114                 if (flags != 0 || port != 0) {
2115                         for (ai = ISC_LIST_HEAD(find->list);
2116                              ai != NULL;
2117                              ai = ISC_LIST_NEXT(ai, publink)) {
2118                                 ai->flags |= flags;
2119                                 if (port != 0)
2120                                         isc_sockaddr_setport(&ai->sockaddr,
2121                                                              port);
2122                         }
2123                 }
2124                 if ((flags & FCTX_ADDRINFO_FORWARDER) != 0)
2125                         ISC_LIST_APPEND(fctx->altfinds, find, publink);
2126                 else
2127                         ISC_LIST_APPEND(fctx->finds, find, publink);
2128         } else {
2129                 /*
2130                  * We don't know any of the addresses for this
2131                  * name.
2132                  */
2133                 if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
2134                         /*
2135                          * We're looking for them and will get an
2136                          * event about it later.
2137                          */
2138                         fctx->pending++;
2139                         /*
2140                          * Bootstrap.
2141                          */
2142                         if (need_alternate != NULL &&
2143                             !*need_alternate && unshared &&
2144                             ((res->dispatchv4 == NULL &&
2145                               find->result_v6 != DNS_R_NXDOMAIN) ||
2146                              (res->dispatchv6 == NULL &&
2147                               find->result_v4 != DNS_R_NXDOMAIN)))
2148                                 *need_alternate = ISC_TRUE;
2149                 } else {
2150                         /*
2151                          * If we know there are no addresses for
2152                          * the family we are using then try to add
2153                          * an alternative server.
2154                          */
2155                         if (need_alternate != NULL && !*need_alternate &&
2156                             ((res->dispatchv4 == NULL &&
2157                               find->result_v6 == DNS_R_NXRRSET) ||
2158                              (res->dispatchv6 == NULL &&
2159                               find->result_v4 == DNS_R_NXRRSET)))
2160                                 *need_alternate = ISC_TRUE;
2161                         dns_adb_destroyfind(&find);
2162                 }
2163         }
2164 }
2165
2166 static isc_result_t
2167 fctx_getaddresses(fetchctx_t *fctx) {
2168         dns_rdata_t rdata = DNS_RDATA_INIT;
2169         isc_result_t result;
2170         dns_resolver_t *res;
2171         isc_stdtime_t now;
2172         unsigned int stdoptions;
2173         isc_sockaddr_t *sa;
2174         dns_adbaddrinfo_t *ai;
2175         isc_boolean_t all_bad;
2176         dns_rdata_ns_t ns;
2177         isc_boolean_t need_alternate = ISC_FALSE;
2178
2179         FCTXTRACE("getaddresses");
2180
2181         /*
2182          * Don't pound on remote servers.  (Failsafe!)
2183          */
2184         fctx->restarts++;
2185         if (fctx->restarts > 10) {
2186                 FCTXTRACE("too many restarts");
2187                 return (DNS_R_SERVFAIL);
2188         }
2189
2190         res = fctx->res;
2191         stdoptions = 0;         /* Keep compiler happy. */
2192
2193         /*
2194          * Forwarders.
2195          */
2196
2197         INSIST(ISC_LIST_EMPTY(fctx->forwaddrs));
2198         INSIST(ISC_LIST_EMPTY(fctx->altaddrs));
2199
2200         /*
2201          * If this fctx has forwarders, use them; otherwise use any
2202          * selective forwarders specified in the view; otherwise use the
2203          * resolver's forwarders (if any).
2204          */
2205         sa = ISC_LIST_HEAD(fctx->forwarders);
2206         if (sa == NULL) {
2207                 dns_forwarders_t *forwarders = NULL;
2208                 dns_name_t *name = &fctx->name;
2209                 dns_name_t suffix;
2210                 unsigned int labels;
2211
2212                 /*
2213                  * DS records are found in the parent server.
2214                  * Strip label to get the correct forwarder (if any).
2215                  */
2216                 if (fctx->type == dns_rdatatype_ds &&
2217                     dns_name_countlabels(name) > 1) {
2218                         dns_name_init(&suffix, NULL);
2219                         labels = dns_name_countlabels(name);
2220                         dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
2221                         name = &suffix;
2222                 }
2223                 result = dns_fwdtable_find(fctx->res->view->fwdtable, name,
2224                                            &forwarders);
2225                 if (result == ISC_R_SUCCESS) {
2226                         sa = ISC_LIST_HEAD(forwarders->addrs);
2227                         fctx->fwdpolicy = forwarders->fwdpolicy;
2228                 }
2229         }
2230
2231         while (sa != NULL) {
2232                 if ((isc_sockaddr_pf(sa) == AF_INET &&
2233                          fctx->res->dispatchv4 == NULL) ||
2234                     (isc_sockaddr_pf(sa) == AF_INET6 &&
2235                         fctx->res->dispatchv6 == NULL)) {
2236                                 sa = ISC_LIST_NEXT(sa, link);
2237                                 continue;
2238                 }
2239                 ai = NULL;
2240                 result = dns_adb_findaddrinfo(fctx->adb,
2241                                               sa, &ai, 0);  /* XXXMLG */
2242                 if (result == ISC_R_SUCCESS) {
2243                         dns_adbaddrinfo_t *cur;
2244                         ai->flags |= FCTX_ADDRINFO_FORWARDER;
2245                         cur = ISC_LIST_HEAD(fctx->forwaddrs);
2246                         while (cur != NULL && cur->srtt < ai->srtt)
2247                                 cur = ISC_LIST_NEXT(cur, publink);
2248                         if (cur != NULL)
2249                                 ISC_LIST_INSERTBEFORE(fctx->forwaddrs, cur,
2250                                                       ai, publink);
2251                         else
2252                                 ISC_LIST_APPEND(fctx->forwaddrs, ai, publink);
2253                 }
2254                 sa = ISC_LIST_NEXT(sa, link);
2255         }
2256
2257         /*
2258          * If the forwarding policy is "only", we don't need the addresses
2259          * of the nameservers.
2260          */
2261         if (fctx->fwdpolicy == dns_fwdpolicy_only)
2262                 goto out;
2263
2264         /*
2265          * Normal nameservers.
2266          */
2267
2268         stdoptions = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_EMPTYEVENT;
2269         if (fctx->restarts == 1) {
2270                 /*
2271                  * To avoid sending out a flood of queries likely to
2272                  * result in NXRRSET, we suppress fetches for address
2273                  * families we don't have the first time through,
2274                  * provided that we have addresses in some family we
2275                  * can use.
2276                  *
2277                  * We don't want to set this option all the time, since
2278                  * if fctx->restarts > 1, we've clearly been having trouble
2279                  * with the addresses we had, so getting more could help.
2280                  */
2281                 stdoptions |= DNS_ADBFIND_AVOIDFETCHES;
2282         }
2283         if (res->dispatchv4 != NULL)
2284                 stdoptions |= DNS_ADBFIND_INET;
2285         if (res->dispatchv6 != NULL)
2286                 stdoptions |= DNS_ADBFIND_INET6;
2287         isc_stdtime_get(&now);
2288
2289         INSIST(ISC_LIST_EMPTY(fctx->finds));
2290         INSIST(ISC_LIST_EMPTY(fctx->altfinds));
2291
2292         for (result = dns_rdataset_first(&fctx->nameservers);
2293              result == ISC_R_SUCCESS;
2294              result = dns_rdataset_next(&fctx->nameservers))
2295         {
2296                 dns_rdataset_current(&fctx->nameservers, &rdata);
2297                 /*
2298                  * Extract the name from the NS record.
2299                  */
2300                 result = dns_rdata_tostruct(&rdata, &ns, NULL);
2301                 if (result != ISC_R_SUCCESS)
2302                         continue;
2303
2304                 findname(fctx, &ns.name, 0, stdoptions, 0, now,
2305                          &need_alternate);
2306                 dns_rdata_reset(&rdata);
2307                 dns_rdata_freestruct(&ns);
2308         }
2309         if (result != ISC_R_NOMORE)
2310                 return (result);
2311
2312         /*
2313          * Do we need to use 6 to 4?
2314          */
2315         if (need_alternate) {
2316                 int family;
2317                 alternate_t *a;
2318                 family = (res->dispatchv6 != NULL) ? AF_INET6 : AF_INET;
2319                 for (a = ISC_LIST_HEAD(fctx->res->alternates);
2320                      a != NULL;
2321                      a = ISC_LIST_NEXT(a, link)) {
2322                         if (!a->isaddress) {
2323                                 findname(fctx, &a->_u._n.name, a->_u._n.port,
2324                                          stdoptions, FCTX_ADDRINFO_FORWARDER,
2325                                          now, NULL);
2326                                 continue;
2327                         }
2328                         if (isc_sockaddr_pf(&a->_u.addr) != family)
2329                                 continue;
2330                         ai = NULL;
2331                         result = dns_adb_findaddrinfo(fctx->adb, &a->_u.addr,
2332                                                       &ai, 0);
2333                         if (result == ISC_R_SUCCESS) {
2334                                 dns_adbaddrinfo_t *cur;
2335                                 ai->flags |= FCTX_ADDRINFO_FORWARDER;
2336                                 cur = ISC_LIST_HEAD(fctx->altaddrs);
2337                                 while (cur != NULL && cur->srtt < ai->srtt)
2338                                         cur = ISC_LIST_NEXT(cur, publink);
2339                                 if (cur != NULL)
2340                                         ISC_LIST_INSERTBEFORE(fctx->altaddrs,
2341                                                               cur, ai, publink);
2342                                 else
2343                                         ISC_LIST_APPEND(fctx->altaddrs, ai,
2344                                                         publink);
2345                         }
2346                 }
2347         }
2348
2349  out:
2350         /*
2351          * Mark all known bad servers.
2352          */
2353         all_bad = mark_bad(fctx);
2354
2355         /*
2356          * How are we doing?
2357          */
2358         if (all_bad) {
2359                 /*
2360                  * We've got no addresses.
2361                  */
2362                 if (fctx->pending > 0) {
2363                         /*
2364                          * We're fetching the addresses, but don't have any
2365                          * yet.   Tell the caller to wait for an answer.
2366                          */
2367                         result = DNS_R_WAIT;
2368                 } else {
2369                         /*
2370                          * We've lost completely.  We don't know any
2371                          * addresses, and the ADB has told us it can't get
2372                          * them.
2373                          */
2374                         FCTXTRACE("no addresses");
2375                         result = ISC_R_FAILURE;
2376                 }
2377         } else {
2378                 /*
2379                  * We've found some addresses.  We might still be looking
2380                  * for more addresses.
2381                  */
2382                 sort_finds(fctx);
2383                 result = ISC_R_SUCCESS;
2384         }
2385
2386         return (result);
2387 }
2388
2389 static inline void
2390 possibly_mark(fetchctx_t *fctx, dns_adbaddrinfo_t *addr)
2391 {
2392         isc_netaddr_t na;
2393         char buf[ISC_NETADDR_FORMATSIZE];
2394         isc_sockaddr_t *sa;
2395         isc_boolean_t aborted = ISC_FALSE;
2396         isc_boolean_t bogus;
2397         dns_acl_t *blackhole;
2398         isc_netaddr_t ipaddr;
2399         dns_peer_t *peer = NULL;
2400         dns_resolver_t *res;
2401         const char *msg = NULL;
2402
2403         sa = &addr->sockaddr;
2404
2405         res = fctx->res;
2406         isc_netaddr_fromsockaddr(&ipaddr, sa);
2407         blackhole = dns_dispatchmgr_getblackhole(res->dispatchmgr);
2408         (void) dns_peerlist_peerbyaddr(res->view->peers, &ipaddr, &peer);
2409
2410         if (blackhole != NULL) {
2411                 int match;
2412
2413                 if (dns_acl_match(&ipaddr, NULL, blackhole,
2414                                   &res->view->aclenv,
2415                                   &match, NULL) == ISC_R_SUCCESS &&
2416                     match > 0)
2417                         aborted = ISC_TRUE;
2418         }
2419
2420         if (peer != NULL &&
2421             dns_peer_getbogus(peer, &bogus) == ISC_R_SUCCESS &&
2422             bogus)
2423                 aborted = ISC_TRUE;
2424
2425         if (aborted) {
2426                 addr->flags |= FCTX_ADDRINFO_MARK;
2427                 msg = "ignoring blackholed / bogus server: ";
2428         } else if (isc_sockaddr_ismulticast(sa)) {
2429                 addr->flags |= FCTX_ADDRINFO_MARK;
2430                 msg = "ignoring multicast address: ";
2431         } else if (isc_sockaddr_isexperimental(sa)) {
2432                 addr->flags |= FCTX_ADDRINFO_MARK;
2433                 msg = "ignoring experimental address: ";
2434         } else if (sa->type.sa.sa_family != AF_INET6) {
2435                 return;
2436         } else if (IN6_IS_ADDR_V4MAPPED(&sa->type.sin6.sin6_addr)) {
2437                 addr->flags |= FCTX_ADDRINFO_MARK;
2438                 msg = "ignoring IPv6 mapped IPV4 address: ";
2439         } else if (IN6_IS_ADDR_V4COMPAT(&sa->type.sin6.sin6_addr)) {
2440                 addr->flags |= FCTX_ADDRINFO_MARK;
2441                 msg = "ignoring IPv6 compatibility IPV4 address: ";
2442         } else
2443                 return;
2444
2445         if (!isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(3)))
2446                 return;
2447
2448         isc_netaddr_fromsockaddr(&na, sa);
2449         isc_netaddr_format(&na, buf, sizeof(buf));
2450         FCTXTRACE2(msg, buf);
2451 }
2452
2453 static inline dns_adbaddrinfo_t *
2454 fctx_nextaddress(fetchctx_t *fctx) {
2455         dns_adbfind_t *find, *start;
2456         dns_adbaddrinfo_t *addrinfo;
2457         dns_adbaddrinfo_t *faddrinfo;
2458
2459         /*
2460          * Return the next untried address, if any.
2461          */
2462
2463         /*
2464          * Find the first unmarked forwarder (if any).
2465          */
2466         for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
2467              addrinfo != NULL;
2468              addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2469                 if (!UNMARKED(addrinfo))
2470                         continue;
2471                 possibly_mark(fctx, addrinfo);
2472                 if (UNMARKED(addrinfo)) {
2473                         addrinfo->flags |= FCTX_ADDRINFO_MARK;
2474                         fctx->find = NULL;
2475                         return (addrinfo);
2476                 }
2477         }
2478
2479         /*
2480          * No forwarders.  Move to the next find.
2481          */
2482
2483         fctx->attributes |= FCTX_ATTR_TRIEDFIND;
2484
2485         find = fctx->find;
2486         if (find == NULL)
2487                 find = ISC_LIST_HEAD(fctx->finds);
2488         else {
2489                 find = ISC_LIST_NEXT(find, publink);
2490                 if (find == NULL)
2491                         find = ISC_LIST_HEAD(fctx->finds);
2492         }
2493
2494         /*
2495          * Find the first unmarked addrinfo.
2496          */
2497         addrinfo = NULL;
2498         if (find != NULL) {
2499                 start = find;
2500                 do {
2501                         for (addrinfo = ISC_LIST_HEAD(find->list);
2502                              addrinfo != NULL;
2503                              addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2504                                 if (!UNMARKED(addrinfo))
2505                                         continue;
2506                                 possibly_mark(fctx, addrinfo);
2507                                 if (UNMARKED(addrinfo)) {
2508                                         addrinfo->flags |= FCTX_ADDRINFO_MARK;
2509                                         break;
2510                                 }
2511                         }
2512                         if (addrinfo != NULL)
2513                                 break;
2514                         find = ISC_LIST_NEXT(find, publink);
2515                         if (find == NULL)
2516                                 find = ISC_LIST_HEAD(fctx->finds);
2517                 } while (find != start);
2518         }
2519
2520         fctx->find = find;
2521         if (addrinfo != NULL)
2522                 return (addrinfo);
2523
2524         /*
2525          * No nameservers left.  Try alternates.
2526          */
2527
2528         fctx->attributes |= FCTX_ATTR_TRIEDALT;
2529
2530         find = fctx->altfind;
2531         if (find == NULL)
2532                 find = ISC_LIST_HEAD(fctx->altfinds);
2533         else {
2534                 find = ISC_LIST_NEXT(find, publink);
2535                 if (find == NULL)
2536                         find = ISC_LIST_HEAD(fctx->altfinds);
2537         }
2538
2539         /*
2540          * Find the first unmarked addrinfo.
2541          */
2542         addrinfo = NULL;
2543         if (find != NULL) {
2544                 start = find;
2545                 do {
2546                         for (addrinfo = ISC_LIST_HEAD(find->list);
2547                              addrinfo != NULL;
2548                              addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2549                                 if (!UNMARKED(addrinfo))
2550                                         continue;
2551                                 possibly_mark(fctx, addrinfo);
2552                                 if (UNMARKED(addrinfo)) {
2553                                         addrinfo->flags |= FCTX_ADDRINFO_MARK;
2554                                         break;
2555                                 }
2556                         }
2557                         if (addrinfo != NULL)
2558                                 break;
2559                         find = ISC_LIST_NEXT(find, publink);
2560                         if (find == NULL)
2561                                 find = ISC_LIST_HEAD(fctx->altfinds);
2562                 } while (find != start);
2563         }
2564
2565         faddrinfo = addrinfo;
2566
2567         /*
2568          * See if we have a better alternate server by address.
2569          */
2570
2571         for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
2572              addrinfo != NULL;
2573              addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2574                 if (!UNMARKED(addrinfo))
2575                         continue;
2576                 possibly_mark(fctx, addrinfo);
2577                 if (UNMARKED(addrinfo) &&
2578                     (faddrinfo == NULL ||
2579                      addrinfo->srtt < faddrinfo->srtt)) {
2580                         if (faddrinfo != NULL)
2581                                 faddrinfo->flags &= ~FCTX_ADDRINFO_MARK;
2582                         addrinfo->flags |= FCTX_ADDRINFO_MARK;
2583                         break;
2584                 }
2585         }
2586
2587         if (addrinfo == NULL) {
2588                 addrinfo = faddrinfo;
2589                 fctx->altfind = find;
2590         }
2591
2592         return (addrinfo);
2593 }
2594
2595 static void
2596 fctx_try(fetchctx_t *fctx) {
2597         isc_result_t result;
2598         dns_adbaddrinfo_t *addrinfo;
2599
2600         FCTXTRACE("try");
2601
2602         REQUIRE(!ADDRWAIT(fctx));
2603
2604         addrinfo = fctx_nextaddress(fctx);
2605         if (addrinfo == NULL) {
2606                 /*
2607                  * We have no more addresses.  Start over.
2608                  */
2609                 fctx_cancelqueries(fctx, ISC_TRUE);
2610                 fctx_cleanupfinds(fctx);
2611                 fctx_cleanupaltfinds(fctx);
2612                 fctx_cleanupforwaddrs(fctx);
2613                 fctx_cleanupaltaddrs(fctx);
2614                 result = fctx_getaddresses(fctx);
2615                 if (result == DNS_R_WAIT) {
2616                         /*
2617                          * Sleep waiting for addresses.
2618                          */
2619                         FCTXTRACE("addrwait");
2620                         fctx->attributes |= FCTX_ATTR_ADDRWAIT;
2621                         return;
2622                 } else if (result != ISC_R_SUCCESS) {
2623                         /*
2624                          * Something bad happened.
2625                          */
2626                         fctx_done(fctx, result);
2627                         return;
2628                 }
2629
2630                 addrinfo = fctx_nextaddress(fctx);
2631                 /*
2632                  * While we may have addresses from the ADB, they
2633                  * might be bad ones.  In this case, return SERVFAIL.
2634                  */
2635                 if (addrinfo == NULL) {
2636                         fctx_done(fctx, DNS_R_SERVFAIL);
2637                         return;
2638                 }
2639         }
2640
2641         result = fctx_query(fctx, addrinfo, fctx->options);
2642         if (result != ISC_R_SUCCESS)
2643                 fctx_done(fctx, result);
2644 }
2645
2646 static isc_boolean_t
2647 fctx_destroy(fetchctx_t *fctx) {
2648         dns_resolver_t *res;
2649         unsigned int bucketnum;
2650         isc_sockaddr_t *sa, *next_sa;
2651
2652         /*
2653          * Caller must be holding the bucket lock.
2654          */
2655
2656         REQUIRE(VALID_FCTX(fctx));
2657         REQUIRE(fctx->state == fetchstate_done ||
2658                 fctx->state == fetchstate_init);
2659         REQUIRE(ISC_LIST_EMPTY(fctx->events));
2660         REQUIRE(ISC_LIST_EMPTY(fctx->queries));
2661         REQUIRE(ISC_LIST_EMPTY(fctx->finds));
2662         REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
2663         REQUIRE(fctx->pending == 0);
2664         REQUIRE(fctx->references == 0);
2665         REQUIRE(ISC_LIST_EMPTY(fctx->validators));
2666
2667         FCTXTRACE("destroy");
2668
2669         res = fctx->res;
2670         bucketnum = fctx->bucketnum;
2671
2672         ISC_LIST_UNLINK(res->buckets[bucketnum].fctxs, fctx, link);
2673
2674         /*
2675          * Free bad.
2676          */
2677         for (sa = ISC_LIST_HEAD(fctx->bad);
2678              sa != NULL;
2679              sa = next_sa) {
2680                 next_sa = ISC_LIST_NEXT(sa, link);
2681                 ISC_LIST_UNLINK(fctx->bad, sa, link);
2682                 isc_mem_put(res->buckets[bucketnum].mctx, sa, sizeof(*sa));
2683         }
2684
2685         for (sa = ISC_LIST_HEAD(fctx->edns);
2686              sa != NULL;
2687              sa = next_sa) {
2688                 next_sa = ISC_LIST_NEXT(sa, link);
2689                 ISC_LIST_UNLINK(fctx->edns, sa, link);
2690                 isc_mem_put(res->buckets[bucketnum].mctx, sa, sizeof(*sa));
2691         }
2692
2693         for (sa = ISC_LIST_HEAD(fctx->edns512);
2694              sa != NULL;
2695              sa = next_sa) {
2696                 next_sa = ISC_LIST_NEXT(sa, link);
2697                 ISC_LIST_UNLINK(fctx->edns512, sa, link);
2698                 isc_mem_put(res->buckets[bucketnum].mctx, sa, sizeof(*sa));
2699         }
2700
2701         isc_timer_detach(&fctx->timer);
2702         dns_message_destroy(&fctx->rmessage);
2703         dns_message_destroy(&fctx->qmessage);
2704         if (dns_name_countlabels(&fctx->domain) > 0)
2705                 dns_name_free(&fctx->domain, res->buckets[bucketnum].mctx);
2706         if (dns_rdataset_isassociated(&fctx->nameservers))
2707                 dns_rdataset_disassociate(&fctx->nameservers);
2708         dns_name_free(&fctx->name, res->buckets[bucketnum].mctx);
2709         dns_db_detach(&fctx->cache);
2710         dns_adb_detach(&fctx->adb);
2711         isc_mem_free(res->buckets[bucketnum].mctx, fctx->info);
2712         isc_mem_put(res->buckets[bucketnum].mctx, fctx, sizeof(*fctx));
2713
2714         LOCK(&res->nlock);
2715         res->nfctx--;
2716         UNLOCK(&res->nlock);
2717
2718         if (res->buckets[bucketnum].exiting &&
2719             ISC_LIST_EMPTY(res->buckets[bucketnum].fctxs))
2720                 return (ISC_TRUE);
2721
2722         return (ISC_FALSE);
2723 }
2724
2725 /*
2726  * Fetch event handlers.
2727  */
2728
2729 static void
2730 fctx_timeout(isc_task_t *task, isc_event_t *event) {
2731         fetchctx_t *fctx = event->ev_arg;
2732         isc_timerevent_t *tevent = (isc_timerevent_t *)event;
2733         resquery_t *query;
2734
2735         REQUIRE(VALID_FCTX(fctx));
2736
2737         UNUSED(task);
2738
2739         FCTXTRACE("timeout");
2740
2741         if (event->ev_type == ISC_TIMEREVENT_LIFE) {
2742                 fctx_done(fctx, ISC_R_TIMEDOUT);
2743         } else {
2744                 isc_result_t result;
2745
2746                 fctx->timeouts++;
2747                 /*
2748                  * We could cancel the running queries here, or we could let
2749                  * them keep going.  Since we normally use separate sockets for
2750                  * different queries, we adopt the former approach to reduce
2751                  * the number of open sockets: cancel the oldest query if it
2752                  * expired after the query had started (this is usually the
2753                  * case but is not always so, depending on the task schedule
2754                  * timing).
2755                  */
2756                 query = ISC_LIST_HEAD(fctx->queries);
2757                 if (query != NULL &&
2758                     isc_time_compare(&tevent->due, &query->start) >= 0) {
2759                         fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
2760                 }
2761                 fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2762                 /*
2763                  * Our timer has triggered.  Reestablish the fctx lifetime
2764                  * timer.
2765                  */
2766                 result = fctx_starttimer(fctx);
2767                 if (result != ISC_R_SUCCESS)
2768                         fctx_done(fctx, result);
2769                 else
2770                         /*
2771                          * Keep trying.
2772                          */
2773                         fctx_try(fctx);
2774         }
2775
2776         isc_event_free(&event);
2777 }
2778
2779 static void
2780 fctx_shutdown(fetchctx_t *fctx) {
2781         isc_event_t *cevent;
2782
2783         /*
2784          * Start the shutdown process for fctx, if it isn't already underway.
2785          */
2786
2787         FCTXTRACE("shutdown");
2788
2789         /*
2790          * The caller must be holding the appropriate bucket lock.
2791          */
2792
2793         if (fctx->want_shutdown)
2794                 return;
2795
2796         fctx->want_shutdown = ISC_TRUE;
2797
2798         /*
2799          * Unless we're still initializing (in which case the
2800          * control event is still outstanding), we need to post
2801          * the control event to tell the fetch we want it to
2802          * exit.
2803          */
2804         if (fctx->state != fetchstate_init) {
2805                 cevent = &fctx->control_event;
2806                 isc_task_send(fctx->res->buckets[fctx->bucketnum].task,
2807                               &cevent);
2808         }
2809 }
2810
2811 static void
2812 fctx_doshutdown(isc_task_t *task, isc_event_t *event) {
2813         fetchctx_t *fctx = event->ev_arg;
2814         isc_boolean_t bucket_empty = ISC_FALSE;
2815         dns_resolver_t *res;
2816         unsigned int bucketnum;
2817         dns_validator_t *validator;
2818
2819         REQUIRE(VALID_FCTX(fctx));
2820
2821         UNUSED(task);
2822
2823         res = fctx->res;
2824         bucketnum = fctx->bucketnum;
2825
2826         FCTXTRACE("doshutdown");
2827
2828         /*
2829          * An fctx that is shutting down is no longer in ADDRWAIT mode.
2830          */
2831         fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2832
2833         /*
2834          * Cancel all pending validators.  Note that this must be done
2835          * without the bucket lock held, since that could cause deadlock.
2836          */
2837         validator = ISC_LIST_HEAD(fctx->validators);
2838         while (validator != NULL) {
2839                 dns_validator_cancel(validator);
2840                 validator = ISC_LIST_NEXT(validator, link);
2841         }
2842
2843         if (fctx->nsfetch != NULL)
2844                 dns_resolver_cancelfetch(fctx->nsfetch);
2845
2846         /*
2847          * Shut down anything that is still running on behalf of this
2848          * fetch.  To avoid deadlock with the ADB, we must do this
2849          * before we lock the bucket lock.
2850          */
2851         fctx_stopeverything(fctx, ISC_FALSE);
2852
2853         LOCK(&res->buckets[bucketnum].lock);
2854
2855         fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
2856
2857         INSIST(fctx->state == fetchstate_active ||
2858                fctx->state == fetchstate_done);
2859         INSIST(fctx->want_shutdown);
2860
2861         if (fctx->state != fetchstate_done) {
2862                 fctx->state = fetchstate_done;
2863                 fctx_sendevents(fctx, ISC_R_CANCELED);
2864         }
2865
2866         if (fctx->references == 0 && fctx->pending == 0 &&
2867             fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators))
2868                 bucket_empty = fctx_destroy(fctx);
2869
2870         UNLOCK(&res->buckets[bucketnum].lock);
2871
2872         if (bucket_empty)
2873                 empty_bucket(res);
2874 }
2875
2876 static void
2877 fctx_start(isc_task_t *task, isc_event_t *event) {
2878         fetchctx_t *fctx = event->ev_arg;
2879         isc_boolean_t done = ISC_FALSE, bucket_empty = ISC_FALSE;
2880         dns_resolver_t *res;
2881         unsigned int bucketnum;
2882
2883         REQUIRE(VALID_FCTX(fctx));
2884
2885         UNUSED(task);
2886
2887         res = fctx->res;
2888         bucketnum = fctx->bucketnum;
2889
2890         FCTXTRACE("start");
2891
2892         LOCK(&res->buckets[bucketnum].lock);
2893
2894         INSIST(fctx->state == fetchstate_init);
2895         if (fctx->want_shutdown) {
2896                 /*
2897                  * We haven't started this fctx yet, and we've been requested
2898                  * to shut it down.
2899                  */
2900                 fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
2901                 fctx->state = fetchstate_done;
2902                 fctx_sendevents(fctx, ISC_R_CANCELED);
2903                 /*
2904                  * Since we haven't started, we INSIST that we have no
2905                  * pending ADB finds and no pending validations.
2906                  */
2907                 INSIST(fctx->pending == 0);
2908                 INSIST(fctx->nqueries == 0);
2909                 INSIST(ISC_LIST_EMPTY(fctx->validators));
2910                 if (fctx->references == 0) {
2911                         /*
2912                          * It's now safe to destroy this fctx.
2913                          */
2914                         bucket_empty = fctx_destroy(fctx);
2915                 }
2916                 done = ISC_TRUE;
2917         } else {
2918                 /*
2919                  * Normal fctx startup.
2920                  */
2921                 fctx->state = fetchstate_active;
2922                 /*
2923                  * Reset the control event for later use in shutting down
2924                  * the fctx.
2925                  */
2926                 ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
2927                                DNS_EVENT_FETCHCONTROL, fctx_doshutdown, fctx,
2928                                NULL, NULL, NULL);
2929         }
2930
2931         UNLOCK(&res->buckets[bucketnum].lock);
2932
2933         if (!done) {
2934                 isc_result_t result;
2935
2936                 /*
2937                  * All is well.  Start working on the fetch.
2938                  */
2939                 result = fctx_starttimer(fctx);
2940                 if (result != ISC_R_SUCCESS)
2941                         fctx_done(fctx, result);
2942                 else
2943                         fctx_try(fctx);
2944         } else if (bucket_empty)
2945                 empty_bucket(res);
2946 }
2947
2948 /*
2949  * Fetch Creation, Joining, and Cancelation.
2950  */
2951
2952 static inline isc_result_t
2953 fctx_join(fetchctx_t *fctx, isc_task_t *task, isc_sockaddr_t *client,
2954           dns_messageid_t id, isc_taskaction_t action, void *arg,
2955           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
2956           dns_fetch_t *fetch)
2957 {
2958         isc_task_t *clone;
2959         dns_fetchevent_t *event;
2960
2961         FCTXTRACE("join");
2962
2963         /*
2964          * We store the task we're going to send this event to in the
2965          * sender field.  We'll make the fetch the sender when we actually
2966          * send the event.
2967          */
2968         clone = NULL;
2969         isc_task_attach(task, &clone);
2970         event = (dns_fetchevent_t *)
2971                 isc_event_allocate(fctx->res->mctx, clone, DNS_EVENT_FETCHDONE,
2972                                    action, arg, sizeof(*event));
2973         if (event == NULL) {
2974                 isc_task_detach(&clone);
2975                 return (ISC_R_NOMEMORY);
2976         }
2977         event->result = DNS_R_SERVFAIL;
2978         event->qtype = fctx->type;
2979         event->db = NULL;
2980         event->node = NULL;
2981         event->rdataset = rdataset;
2982         event->sigrdataset = sigrdataset;
2983         event->fetch = fetch;
2984         event->client = client;
2985         event->id = id;
2986         dns_fixedname_init(&event->foundname);
2987
2988         /*
2989          * Make sure that we can store the sigrdataset in the
2990          * first event if it is needed by any of the events.
2991          */
2992         if (event->sigrdataset != NULL)
2993                 ISC_LIST_PREPEND(fctx->events, event, ev_link);
2994         else
2995                 ISC_LIST_APPEND(fctx->events, event, ev_link);
2996         fctx->references++;
2997
2998         fetch->magic = DNS_FETCH_MAGIC;
2999         fetch->private = fctx;
3000
3001         return (ISC_R_SUCCESS);
3002 }
3003
3004 static isc_result_t
3005 fctx_create(dns_resolver_t *res, dns_name_t *name, dns_rdatatype_t type,
3006             dns_name_t *domain, dns_rdataset_t *nameservers,
3007             unsigned int options, unsigned int bucketnum, fetchctx_t **fctxp)
3008 {
3009         fetchctx_t *fctx;
3010         isc_result_t result;
3011         isc_result_t iresult;
3012         isc_interval_t interval;
3013         dns_fixedname_t fixed;
3014         unsigned int findoptions = 0;
3015         char buf[DNS_NAME_FORMATSIZE + DNS_RDATATYPE_FORMATSIZE];
3016         char typebuf[DNS_RDATATYPE_FORMATSIZE];
3017         dns_name_t suffix;
3018
3019         /*
3020          * Caller must be holding the lock for bucket number 'bucketnum'.
3021          */
3022         REQUIRE(fctxp != NULL && *fctxp == NULL);
3023
3024         fctx = isc_mem_get(res->buckets[bucketnum].mctx, sizeof(*fctx));
3025         if (fctx == NULL)
3026                 return (ISC_R_NOMEMORY);
3027         dns_name_format(name, buf, sizeof(buf));
3028         dns_rdatatype_format(type, typebuf, sizeof(typebuf));
3029         strcat(buf, "/");       /* checked */
3030         strcat(buf, typebuf);   /* checked */
3031         fctx->info = isc_mem_strdup(res->buckets[bucketnum].mctx, buf);
3032         if (fctx->info == NULL) {
3033                 result = ISC_R_NOMEMORY;
3034                 goto cleanup_fetch;
3035         }
3036         FCTXTRACE("create");
3037         dns_name_init(&fctx->name, NULL);
3038         result = dns_name_dup(name, res->buckets[bucketnum].mctx, &fctx->name);
3039         if (result != ISC_R_SUCCESS)
3040                 goto cleanup_info;
3041         dns_name_init(&fctx->domain, NULL);
3042         dns_rdataset_init(&fctx->nameservers);
3043
3044         fctx->type = type;
3045         fctx->options = options;
3046         /*
3047          * Note!  We do not attach to the task.  We are relying on the
3048          * resolver to ensure that this task doesn't go away while we are
3049          * using it.
3050          */
3051         fctx->res = res;
3052         fctx->references = 0;
3053         fctx->bucketnum = bucketnum;
3054         fctx->state = fetchstate_init;
3055         fctx->want_shutdown = ISC_FALSE;
3056         fctx->cloned = ISC_FALSE;
3057         ISC_LIST_INIT(fctx->queries);
3058         ISC_LIST_INIT(fctx->finds);
3059         ISC_LIST_INIT(fctx->altfinds);
3060         ISC_LIST_INIT(fctx->forwaddrs);
3061         ISC_LIST_INIT(fctx->altaddrs);
3062         ISC_LIST_INIT(fctx->forwarders);
3063         fctx->fwdpolicy = dns_fwdpolicy_none;
3064         ISC_LIST_INIT(fctx->bad);
3065         ISC_LIST_INIT(fctx->edns);
3066         ISC_LIST_INIT(fctx->edns512);
3067         ISC_LIST_INIT(fctx->validators);
3068         fctx->validator = NULL;
3069         fctx->find = NULL;
3070         fctx->altfind = NULL;
3071         fctx->pending = 0;
3072         fctx->restarts = 0;
3073         fctx->timeouts = 0;
3074         fctx->attributes = 0;
3075         fctx->spilled = ISC_FALSE;
3076         fctx->nqueries = 0;
3077
3078         dns_name_init(&fctx->nsname, NULL);
3079         fctx->nsfetch = NULL;
3080         dns_rdataset_init(&fctx->nsrrset);
3081
3082         if (domain == NULL) {
3083                 dns_forwarders_t *forwarders = NULL;
3084                 unsigned int labels;
3085
3086                 /*
3087                  * DS records are found in the parent server.
3088                  * Strip label to get the correct forwarder (if any).
3089                  */
3090                 if (fctx->type == dns_rdatatype_ds &&
3091                     dns_name_countlabels(name) > 1) {
3092                         dns_name_init(&suffix, NULL);
3093                         labels = dns_name_countlabels(name);
3094                         dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
3095                         name = &suffix;
3096                 }
3097                 dns_fixedname_init(&fixed);
3098                 domain = dns_fixedname_name(&fixed);
3099                 result = dns_fwdtable_find2(fctx->res->view->fwdtable, name,
3100                                             domain, &forwarders);
3101                 if (result == ISC_R_SUCCESS)
3102                         fctx->fwdpolicy = forwarders->fwdpolicy;
3103
3104                 if (fctx->fwdpolicy != dns_fwdpolicy_only) {
3105                         /*
3106                          * The caller didn't supply a query domain and
3107                          * nameservers, and we're not in forward-only mode,
3108                          * so find the best nameservers to use.
3109                          */
3110                         if (dns_rdatatype_atparent(type))
3111                                 findoptions |= DNS_DBFIND_NOEXACT;
3112                         result = dns_view_findzonecut(res->view, name, domain,
3113                                                       0, findoptions, ISC_TRUE,
3114                                                       &fctx->nameservers,
3115                                                       NULL);
3116                         if (result != ISC_R_SUCCESS)
3117                                 goto cleanup_name;
3118                         result = dns_name_dup(domain,
3119                                               res->buckets[bucketnum].mctx,
3120                                               &fctx->domain);
3121                         if (result != ISC_R_SUCCESS) {
3122                                 dns_rdataset_disassociate(&fctx->nameservers);
3123                                 goto cleanup_name;
3124                         }
3125                 } else {
3126                         /*
3127                          * We're in forward-only mode.  Set the query domain.
3128                          */
3129                         result = dns_name_dup(domain,
3130                                               res->buckets[bucketnum].mctx,
3131                                               &fctx->domain);
3132                         if (result != ISC_R_SUCCESS)
3133                                 goto cleanup_name;
3134                 }
3135         } else {
3136                 result = dns_name_dup(domain,
3137                                       res->buckets[bucketnum].mctx,
3138                                       &fctx->domain);
3139                 if (result != ISC_R_SUCCESS)
3140                         goto cleanup_name;
3141                 dns_rdataset_clone(nameservers, &fctx->nameservers);
3142         }
3143
3144         INSIST(dns_name_issubdomain(&fctx->name, &fctx->domain));
3145
3146         fctx->qmessage = NULL;
3147         result = dns_message_create(res->buckets[bucketnum].mctx,
3148                                     DNS_MESSAGE_INTENTRENDER,
3149                                     &fctx->qmessage);
3150
3151         if (result != ISC_R_SUCCESS)
3152                 goto cleanup_domain;
3153
3154         fctx->rmessage = NULL;
3155         result = dns_message_create(res->buckets[bucketnum].mctx,
3156                                     DNS_MESSAGE_INTENTPARSE,
3157                                     &fctx->rmessage);
3158
3159         if (result != ISC_R_SUCCESS)
3160                 goto cleanup_qmessage;
3161
3162         /*
3163          * Compute an expiration time for the entire fetch.
3164          */
3165         isc_interval_set(&interval, 30, 0);             /* XXXRTH constant */
3166         iresult = isc_time_nowplusinterval(&fctx->expires, &interval);
3167         if (iresult != ISC_R_SUCCESS) {
3168                 UNEXPECTED_ERROR(__FILE__, __LINE__,
3169                                  "isc_time_nowplusinterval: %s",
3170                                  isc_result_totext(iresult));
3171                 result = ISC_R_UNEXPECTED;
3172                 goto cleanup_rmessage;
3173         }
3174
3175         /*
3176          * Default retry interval initialization.  We set the interval now
3177          * mostly so it won't be uninitialized.  It will be set to the
3178          * correct value before a query is issued.
3179          */
3180         isc_interval_set(&fctx->interval, 2, 0);
3181
3182         /*
3183          * Create an inactive timer.  It will be made active when the fetch
3184          * is actually started.
3185          */
3186         fctx->timer = NULL;
3187         iresult = isc_timer_create(res->timermgr, isc_timertype_inactive,
3188                                    NULL, NULL,
3189                                    res->buckets[bucketnum].task, fctx_timeout,
3190                                    fctx, &fctx->timer);
3191         if (iresult != ISC_R_SUCCESS) {
3192                 UNEXPECTED_ERROR(__FILE__, __LINE__,
3193                                  "isc_timer_create: %s",
3194                                  isc_result_totext(iresult));
3195                 result = ISC_R_UNEXPECTED;
3196                 goto cleanup_rmessage;
3197         }
3198
3199         /*
3200          * Attach to the view's cache and adb.
3201          */
3202         fctx->cache = NULL;
3203         dns_db_attach(res->view->cachedb, &fctx->cache);
3204         fctx->adb = NULL;
3205         dns_adb_attach(res->view->adb, &fctx->adb);
3206
3207         ISC_LIST_INIT(fctx->events);
3208         ISC_LINK_INIT(fctx, link);
3209         fctx->magic = FCTX_MAGIC;
3210
3211         ISC_LIST_APPEND(res->buckets[bucketnum].fctxs, fctx, link);
3212
3213         LOCK(&res->nlock);
3214         res->nfctx++;
3215         UNLOCK(&res->nlock);
3216
3217         *fctxp = fctx;
3218
3219         return (ISC_R_SUCCESS);
3220
3221  cleanup_rmessage:
3222         dns_message_destroy(&fctx->rmessage);
3223
3224  cleanup_qmessage:
3225         dns_message_destroy(&fctx->qmessage);
3226
3227  cleanup_domain:
3228         if (dns_name_countlabels(&fctx->domain) > 0)
3229                 dns_name_free(&fctx->domain, res->buckets[bucketnum].mctx);
3230         if (dns_rdataset_isassociated(&fctx->nameservers))
3231                 dns_rdataset_disassociate(&fctx->nameservers);
3232
3233  cleanup_name:
3234         dns_name_free(&fctx->name, res->buckets[bucketnum].mctx);
3235
3236  cleanup_info:
3237         isc_mem_free(res->buckets[bucketnum].mctx, fctx->info);
3238
3239  cleanup_fetch:
3240         isc_mem_put(res->buckets[bucketnum].mctx, fctx, sizeof(*fctx));
3241
3242         return (result);
3243 }
3244
3245 /*
3246  * Handle Responses
3247  */
3248 static inline isc_boolean_t
3249 is_lame(fetchctx_t *fctx) {
3250         dns_message_t *message = fctx->rmessage;
3251         dns_name_t *name;
3252         dns_rdataset_t *rdataset;
3253         isc_result_t result;
3254
3255         if (message->rcode != dns_rcode_noerror &&
3256             message->rcode != dns_rcode_nxdomain)
3257                 return (ISC_FALSE);
3258
3259         if (message->counts[DNS_SECTION_ANSWER] != 0)
3260                 return (ISC_FALSE);
3261
3262         if (message->counts[DNS_SECTION_AUTHORITY] == 0)
3263                 return (ISC_FALSE);
3264
3265         result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
3266         while (result == ISC_R_SUCCESS) {
3267                 name = NULL;
3268                 dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
3269                 for (rdataset = ISC_LIST_HEAD(name->list);
3270                      rdataset != NULL;
3271                      rdataset = ISC_LIST_NEXT(rdataset, link)) {
3272                         dns_namereln_t namereln;
3273                         int order;
3274                         unsigned int labels;
3275                         if (rdataset->type != dns_rdatatype_ns)
3276                                 continue;
3277                         namereln = dns_name_fullcompare(name, &fctx->domain,
3278                                                         &order, &labels);
3279                         if (namereln == dns_namereln_equal &&
3280                             (message->flags & DNS_MESSAGEFLAG_AA) != 0)
3281                                 return (ISC_FALSE);
3282                         if (namereln == dns_namereln_subdomain)
3283                                 return (ISC_FALSE);
3284                         return (ISC_TRUE);
3285                 }
3286                 result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
3287         }
3288
3289         return (ISC_FALSE);
3290 }
3291
3292 static inline void
3293 log_lame(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo) {
3294         char namebuf[DNS_NAME_FORMATSIZE];
3295         char domainbuf[DNS_NAME_FORMATSIZE];
3296         char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3297
3298         dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3299         dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
3300         isc_sockaddr_format(&addrinfo->sockaddr, addrbuf, sizeof(addrbuf));
3301         isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
3302                       DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
3303                       "lame server resolving '%s' (in '%s'?): %s",
3304                       namebuf, domainbuf, addrbuf);
3305 }
3306
3307 static inline isc_result_t
3308 same_question(fetchctx_t *fctx) {
3309         isc_result_t result;
3310         dns_message_t *message = fctx->rmessage;
3311         dns_name_t *name;
3312         dns_rdataset_t *rdataset;
3313
3314         /*
3315          * Caller must be holding the fctx lock.
3316          */
3317
3318         /*
3319          * XXXRTH  Currently we support only one question.
3320          */
3321         if (message->counts[DNS_SECTION_QUESTION] != 1)
3322                 return (DNS_R_FORMERR);
3323
3324         result = dns_message_firstname(message, DNS_SECTION_QUESTION);
3325         if (result != ISC_R_SUCCESS)
3326                 return (result);
3327         name = NULL;
3328         dns_message_currentname(message, DNS_SECTION_QUESTION, &name);
3329         rdataset = ISC_LIST_HEAD(name->list);
3330         INSIST(rdataset != NULL);
3331         INSIST(ISC_LIST_NEXT(rdataset, link) == NULL);
3332         if (fctx->type != rdataset->type ||
3333             fctx->res->rdclass != rdataset->rdclass ||
3334             !dns_name_equal(&fctx->name, name))
3335                 return (DNS_R_FORMERR);
3336
3337         return (ISC_R_SUCCESS);
3338 }
3339
3340 static void
3341 clone_results(fetchctx_t *fctx) {
3342         dns_fetchevent_t *event, *hevent;
3343         isc_result_t result;
3344         dns_name_t *name, *hname;
3345
3346         FCTXTRACE("clone_results");
3347
3348         /*
3349          * Set up any other events to have the same data as the first
3350          * event.
3351          *
3352          * Caller must be holding the appropriate lock.
3353          */
3354
3355         fctx->cloned = ISC_TRUE;
3356         hevent = ISC_LIST_HEAD(fctx->events);
3357         if (hevent == NULL)
3358                 return;
3359         hname = dns_fixedname_name(&hevent->foundname);
3360         for (event = ISC_LIST_NEXT(hevent, ev_link);
3361              event != NULL;
3362              event = ISC_LIST_NEXT(event, ev_link)) {
3363                 name = dns_fixedname_name(&event->foundname);
3364                 result = dns_name_copy(hname, name, NULL);
3365                 if (result != ISC_R_SUCCESS)
3366                         event->result = result;
3367                 else
3368                         event->result = hevent->result;
3369                 dns_db_attach(hevent->db, &event->db);
3370                 dns_db_attachnode(hevent->db, hevent->node, &event->node);
3371                 INSIST(hevent->rdataset != NULL);
3372                 INSIST(event->rdataset != NULL);
3373                 if (dns_rdataset_isassociated(hevent->rdataset))
3374                         dns_rdataset_clone(hevent->rdataset, event->rdataset);
3375                 INSIST(! (hevent->sigrdataset == NULL &&
3376                           event->sigrdataset != NULL));
3377                 if (hevent->sigrdataset != NULL &&
3378                     dns_rdataset_isassociated(hevent->sigrdataset) &&
3379                     event->sigrdataset != NULL)
3380                         dns_rdataset_clone(hevent->sigrdataset,
3381                                            event->sigrdataset);
3382         }
3383 }
3384
3385 #define CACHE(r)        (((r)->attributes & DNS_RDATASETATTR_CACHE) != 0)
3386 #define ANSWER(r)       (((r)->attributes & DNS_RDATASETATTR_ANSWER) != 0)
3387 #define ANSWERSIG(r)    (((r)->attributes & DNS_RDATASETATTR_ANSWERSIG) != 0)
3388 #define EXTERNAL(r)     (((r)->attributes & DNS_RDATASETATTR_EXTERNAL) != 0)
3389 #define CHAINING(r)     (((r)->attributes & DNS_RDATASETATTR_CHAINING) != 0)
3390 #define CHASE(r)        (((r)->attributes & DNS_RDATASETATTR_CHASE) != 0)
3391 #define CHECKNAMES(r)   (((r)->attributes & DNS_RDATASETATTR_CHECKNAMES) != 0)
3392
3393
3394 /*
3395  * Destroy '*fctx' if it is ready to be destroyed (i.e., if it has
3396  * no references and is no longer waiting for any events).  If this
3397  * was the last fctx in the resolver, destroy the resolver.
3398  *
3399  * Requires:
3400  *      '*fctx' is shutting down.
3401  */
3402 static void
3403 maybe_destroy(fetchctx_t *fctx) {
3404         unsigned int bucketnum;
3405         isc_boolean_t bucket_empty = ISC_FALSE;
3406         dns_resolver_t *res = fctx->res;
3407         dns_validator_t *validator, *next_validator;
3408
3409         REQUIRE(SHUTTINGDOWN(fctx));
3410
3411         if (fctx->pending != 0 || fctx->nqueries != 0)
3412                 return;
3413
3414         for (validator = ISC_LIST_HEAD(fctx->validators);
3415              validator != NULL; validator = next_validator) {
3416                 next_validator = ISC_LIST_NEXT(validator, link);
3417                 dns_validator_cancel(validator);
3418                 /*
3419                  * If this is a active validator wait for the cancel
3420                  * to complete before calling dns_validator_destroy().
3421                  */
3422                 if (validator == fctx->validator)
3423                         continue;
3424                 ISC_LIST_UNLINK(fctx->validators, validator, link);
3425                 dns_validator_destroy(&validator);
3426         }
3427
3428         bucketnum = fctx->bucketnum;
3429         LOCK(&res->buckets[bucketnum].lock);
3430         if (fctx->references == 0 && ISC_LIST_EMPTY(fctx->validators))
3431                 bucket_empty = fctx_destroy(fctx);
3432         UNLOCK(&res->buckets[bucketnum].lock);
3433
3434         if (bucket_empty)
3435                 empty_bucket(res);
3436 }
3437
3438 /*
3439  * The validator has finished.
3440  */
3441 static void
3442 validated(isc_task_t *task, isc_event_t *event) {
3443         isc_result_t result = ISC_R_SUCCESS;
3444         isc_result_t eresult = ISC_R_SUCCESS;
3445         isc_stdtime_t now;
3446         fetchctx_t *fctx;
3447         dns_validatorevent_t *vevent;
3448         dns_fetchevent_t *hevent;
3449         dns_rdataset_t *ardataset = NULL;
3450         dns_rdataset_t *asigrdataset = NULL;
3451         dns_dbnode_t *node = NULL;
3452         isc_boolean_t negative;
3453         isc_boolean_t chaining;
3454         isc_boolean_t sentresponse;
3455         isc_uint32_t ttl;
3456         dns_dbnode_t *nsnode = NULL;
3457         dns_name_t *name;
3458         dns_rdataset_t *rdataset;
3459         dns_rdataset_t *sigrdataset;
3460         dns_valarg_t *valarg;
3461         dns_adbaddrinfo_t *addrinfo;
3462
3463         UNUSED(task); /* for now */
3464
3465         REQUIRE(event->ev_type == DNS_EVENT_VALIDATORDONE);
3466         valarg = event->ev_arg;
3467         fctx = valarg->fctx;
3468         addrinfo = valarg->addrinfo;
3469         REQUIRE(VALID_FCTX(fctx));
3470         REQUIRE(!ISC_LIST_EMPTY(fctx->validators));
3471
3472         vevent = (dns_validatorevent_t *)event;
3473
3474         FCTXTRACE("received validation completion event");
3475
3476         ISC_LIST_UNLINK(fctx->validators, vevent->validator, link);
3477         fctx->validator = NULL;
3478
3479         /*
3480          * Destroy the validator early so that we can
3481          * destroy the fctx if necessary.
3482          */
3483         dns_validator_destroy(&vevent->validator);
3484         isc_mem_put(fctx->res->buckets[fctx->bucketnum].mctx,
3485                     valarg, sizeof(*valarg));
3486
3487         negative = ISC_TF(vevent->rdataset == NULL);
3488
3489         sentresponse = ISC_TF((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0);
3490
3491         /*
3492          * If shutting down, ignore the results.  Check to see if we're
3493          * done waiting for validator completions and ADB pending events; if
3494          * so, destroy the fctx.
3495          */
3496         if (SHUTTINGDOWN(fctx) && !sentresponse) {
3497                 maybe_destroy(fctx);    /* Locks bucket. */
3498                 goto cleanup_event;
3499         }
3500
3501         LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
3502
3503         /*
3504          * If chaining, we need to make sure that the right result code is
3505          * returned, and that the rdatasets are bound.
3506          */
3507         if (vevent->result == ISC_R_SUCCESS &&
3508             !negative &&
3509             vevent->rdataset != NULL &&
3510             CHAINING(vevent->rdataset))
3511         {
3512                 if (vevent->rdataset->type == dns_rdatatype_cname)
3513                         eresult = DNS_R_CNAME;
3514                 else {
3515                         INSIST(vevent->rdataset->type == dns_rdatatype_dname);
3516                         eresult = DNS_R_DNAME;
3517                 }
3518                 chaining = ISC_TRUE;
3519         } else
3520                 chaining = ISC_FALSE;
3521
3522         /*
3523          * Either we're not shutting down, or we are shutting down but want
3524          * to cache the result anyway (if this was a validation started by
3525          * a query with cd set)
3526          */
3527
3528         hevent = ISC_LIST_HEAD(fctx->events);
3529         if (hevent != NULL) {
3530                 if (!negative && !chaining &&
3531                     (fctx->type == dns_rdatatype_any ||
3532                      fctx->type == dns_rdatatype_rrsig ||
3533                      fctx->type == dns_rdatatype_sig)) {
3534                         /*
3535                          * Don't bind rdatasets; the caller
3536                          * will iterate the node.
3537                          */
3538                 } else {
3539                         ardataset = hevent->rdataset;
3540                         asigrdataset = hevent->sigrdataset;
3541                 }
3542         }
3543
3544         if (vevent->result != ISC_R_SUCCESS) {
3545                 FCTXTRACE("validation failed");
3546                 result = ISC_R_NOTFOUND;
3547                 if (vevent->rdataset != NULL)
3548                         result = dns_db_findnode(fctx->cache, vevent->name,
3549                                                  ISC_TRUE, &node);
3550                 if (result == ISC_R_SUCCESS)
3551                         (void)dns_db_deleterdataset(fctx->cache, node, NULL,
3552                                                     vevent->type, 0);
3553                 if (result == ISC_R_SUCCESS && vevent->sigrdataset != NULL)
3554                         (void)dns_db_deleterdataset(fctx->cache, node, NULL,
3555                                                     dns_rdatatype_rrsig,
3556                                                     vevent->type);
3557                 if (result == ISC_R_SUCCESS)
3558                         dns_db_detachnode(fctx->cache, &node);
3559                 result = vevent->result;
3560                 add_bad(fctx, addrinfo, result);
3561                 isc_event_free(&event);
3562                 UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
3563                 INSIST(fctx->validator == NULL);
3564                 fctx->validator = ISC_LIST_HEAD(fctx->validators);
3565                 if (fctx->validator != NULL) {
3566                         dns_validator_send(fctx->validator);
3567                 } else if (sentresponse)
3568                         fctx_done(fctx, result);        /* Locks bucket. */
3569                 else
3570                         fctx_try(fctx);                 /* Locks bucket. */
3571                 return;
3572         }
3573
3574         isc_stdtime_get(&now);
3575
3576         if (negative) {
3577                 dns_rdatatype_t covers;
3578                 FCTXTRACE("nonexistence validation OK");
3579
3580                 if (fctx->rmessage->rcode == dns_rcode_nxdomain)
3581                         covers = dns_rdatatype_any;
3582                 else
3583                         covers = fctx->type;
3584
3585                 result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE,
3586                                          &node);
3587                 if (result != ISC_R_SUCCESS)
3588                         goto noanswer_response;
3589
3590                 /*
3591                  * If we are asking for a SOA record set the cache time
3592                  * to zero to facilitate locating the containing zone of
3593                  * a arbitary zone.
3594                  */
3595                 ttl = fctx->res->view->maxncachettl;
3596                 if (fctx->type == dns_rdatatype_soa &&
3597                     covers == dns_rdatatype_any &&
3598                     fctx->res->zero_no_soa_ttl)
3599                         ttl = 0;
3600
3601                 result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
3602                                            covers, now, ttl,
3603                                            ardataset, &eresult);
3604                 if (result != ISC_R_SUCCESS)
3605                         goto noanswer_response;
3606                 goto answer_response;
3607         }
3608
3609         FCTXTRACE("validation OK");
3610
3611         if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL) {
3612
3613                 result = dns_rdataset_addnoqname(vevent->rdataset,
3614                                    vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF]);
3615                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
3616                 INSIST(vevent->sigrdataset != NULL);
3617                 vevent->sigrdataset->ttl = vevent->rdataset->ttl;
3618         }
3619
3620         /*
3621          * The data was already cached as pending data.
3622          * Re-cache it as secure and bind the cached
3623          * rdatasets to the first event on the fetch
3624          * event list.
3625          */
3626         result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE, &node);
3627         if (result != ISC_R_SUCCESS)
3628                 goto noanswer_response;
3629
3630         result = dns_db_addrdataset(fctx->cache, node, NULL, now,
3631                                     vevent->rdataset, 0, ardataset);
3632         if (result != ISC_R_SUCCESS &&
3633             result != DNS_R_UNCHANGED)
3634                 goto noanswer_response;
3635         if (ardataset != NULL && ardataset->type == 0) {
3636                 if (NXDOMAIN(ardataset))
3637                         eresult = DNS_R_NCACHENXDOMAIN;
3638                 else
3639                         eresult = DNS_R_NCACHENXRRSET;
3640         } else if (vevent->sigrdataset != NULL) {
3641                 result = dns_db_addrdataset(fctx->cache, node, NULL, now,
3642                                             vevent->sigrdataset, 0,
3643                                             asigrdataset);
3644                 if (result != ISC_R_SUCCESS &&
3645                     result != DNS_R_UNCHANGED)
3646                         goto noanswer_response;
3647         }
3648
3649         if (sentresponse) {
3650                 /*
3651                  * If we only deferred the destroy because we wanted to cache
3652                  * the data, destroy now.
3653                  */
3654                 dns_db_detachnode(fctx->cache, &node);
3655                 UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
3656                 if (SHUTTINGDOWN(fctx))
3657                         maybe_destroy(fctx);    /* Locks bucket. */
3658                 goto cleanup_event;
3659         }
3660
3661         if (!ISC_LIST_EMPTY(fctx->validators)) {
3662                 INSIST(!negative);
3663                 INSIST(fctx->type == dns_rdatatype_any ||
3664                        fctx->type == dns_rdatatype_rrsig ||
3665                        fctx->type == dns_rdatatype_sig);
3666                 /*
3667                  * Don't send a response yet - we have
3668                  * more rdatasets that still need to
3669                  * be validated.
3670                  */
3671                 dns_db_detachnode(fctx->cache, &node);
3672                 UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
3673                 dns_validator_send(ISC_LIST_HEAD(fctx->validators));
3674                 goto cleanup_event;
3675         }
3676
3677  answer_response:
3678         /*
3679          * Cache any NS/NSEC records that happened to be validated.
3680          */
3681         result = dns_message_firstname(fctx->rmessage, DNS_SECTION_AUTHORITY);
3682         while (result == ISC_R_SUCCESS) {
3683                 name = NULL;
3684                 dns_message_currentname(fctx->rmessage, DNS_SECTION_AUTHORITY,
3685                                         &name);
3686                 for (rdataset = ISC_LIST_HEAD(name->list);
3687                      rdataset != NULL;
3688                      rdataset = ISC_LIST_NEXT(rdataset, link)) {
3689                         if ((rdataset->type != dns_rdatatype_ns &&
3690                              rdataset->type != dns_rdatatype_nsec) ||
3691                             rdataset->trust != dns_trust_secure)
3692                                 continue;
3693                         for (sigrdataset = ISC_LIST_HEAD(name->list);
3694                              sigrdataset != NULL;
3695                              sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
3696                                 if (sigrdataset->type != dns_rdatatype_rrsig ||
3697                                     sigrdataset->covers != rdataset->type)
3698                                         continue;
3699                                 break;
3700                         }
3701                         if (sigrdataset == NULL ||
3702                             sigrdataset->trust != dns_trust_secure)
3703                                 continue;
3704                         result = dns_db_findnode(fctx->cache, name, ISC_TRUE,
3705                                                  &nsnode);
3706                         if (result != ISC_R_SUCCESS)
3707                                 continue;
3708
3709                         result = dns_db_addrdataset(fctx->cache, nsnode, NULL,
3710                                                     now, rdataset, 0, NULL);
3711                         if (result == ISC_R_SUCCESS)
3712                                 result = dns_db_addrdataset(fctx->cache, nsnode,
3713                                                             NULL, now,
3714                                                             sigrdataset, 0,
3715                                                             NULL);
3716                         dns_db_detachnode(fctx->cache, &nsnode);
3717                 }
3718                 result = dns_message_nextname(fctx->rmessage,
3719                                               DNS_SECTION_AUTHORITY);
3720         }
3721
3722         result = ISC_R_SUCCESS;
3723
3724         /*
3725          * Respond with an answer, positive or negative,
3726          * as opposed to an error.  'node' must be non-NULL.
3727          */
3728
3729         fctx->attributes |= FCTX_ATTR_HAVEANSWER;
3730
3731         if (hevent != NULL) {
3732                 hevent->result = eresult;
3733                 RUNTIME_CHECK(dns_name_copy(vevent->name,
3734                               dns_fixedname_name(&hevent->foundname), NULL)
3735                               == ISC_R_SUCCESS);
3736                 dns_db_attach(fctx->cache, &hevent->db);
3737                 dns_db_transfernode(fctx->cache, &node, &hevent->node);
3738                 clone_results(fctx);
3739         }
3740
3741  noanswer_response:
3742         if (node != NULL)
3743                 dns_db_detachnode(fctx->cache, &node);
3744
3745         UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
3746
3747         fctx_done(fctx, result);        /* Locks bucket. */
3748
3749  cleanup_event:
3750         INSIST(node == NULL);
3751         isc_event_free(&event);
3752 }
3753
3754 static inline isc_result_t
3755 cache_name(fetchctx_t *fctx, dns_name_t *name, dns_adbaddrinfo_t *addrinfo,
3756            isc_stdtime_t now)
3757 {
3758         dns_rdataset_t *rdataset, *sigrdataset;
3759         dns_rdataset_t *addedrdataset, *ardataset, *asigrdataset;
3760         dns_rdataset_t *valrdataset = NULL, *valsigrdataset = NULL;
3761         dns_dbnode_t *node, **anodep;
3762         dns_db_t **adbp;
3763         dns_name_t *aname;
3764         dns_resolver_t *res;
3765         isc_boolean_t need_validation, secure_domain, have_answer;
3766         isc_result_t result, eresult;
3767         dns_fetchevent_t *event;
3768         unsigned int options;
3769         isc_task_t *task;
3770         isc_boolean_t fail;
3771         unsigned int valoptions = 0;
3772
3773         /*
3774          * The appropriate bucket lock must be held.
3775          */
3776
3777         res = fctx->res;
3778         need_validation = ISC_FALSE;
3779         secure_domain = ISC_FALSE;
3780         have_answer = ISC_FALSE;
3781         eresult = ISC_R_SUCCESS;
3782         task = res->buckets[fctx->bucketnum].task;
3783
3784         /*
3785          * Is DNSSEC validation required for this name?
3786          */
3787         if (res->view->enablevalidation) {
3788                 result = dns_keytable_issecuredomain(res->view->secroots, name,
3789                                                      &secure_domain);
3790                 if (result != ISC_R_SUCCESS)
3791                         return (result);
3792
3793                 if (!secure_domain && res->view->dlv != NULL) {
3794                         valoptions = DNS_VALIDATOR_DLV;
3795                         secure_domain = ISC_TRUE;
3796                 }
3797         }
3798
3799         if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
3800                 need_validation = ISC_FALSE;
3801         else
3802                 need_validation = secure_domain;
3803
3804         adbp = NULL;
3805         aname = NULL;
3806         anodep = NULL;
3807         ardataset = NULL;
3808         asigrdataset = NULL;
3809         event = NULL;
3810         if ((name->attributes & DNS_NAMEATTR_ANSWER) != 0 &&
3811             !need_validation) {
3812                 have_answer = ISC_TRUE;
3813                 event = ISC_LIST_HEAD(fctx->events);
3814                 if (event != NULL) {
3815                         adbp = &event->db;
3816                         aname = dns_fixedname_name(&event->foundname);
3817                         result = dns_name_copy(name, aname, NULL);
3818                         if (result != ISC_R_SUCCESS)
3819                                 return (result);
3820                         anodep = &event->node;
3821                         /*
3822                          * If this is an ANY, SIG or RRSIG query, we're not
3823                          * going to return any rdatasets, unless we encountered
3824                          * a CNAME or DNAME as "the answer".  In this case,
3825                          * we're going to return DNS_R_CNAME or DNS_R_DNAME
3826                          * and we must set up the rdatasets.
3827                          */
3828                         if ((fctx->type != dns_rdatatype_any &&
3829                              fctx->type != dns_rdatatype_rrsig &&
3830                              fctx->type != dns_rdatatype_sig) ||
3831                             (name->attributes & DNS_NAMEATTR_CHAINING) != 0) {
3832                                 ardataset = event->rdataset;
3833                                 asigrdataset = event->sigrdataset;
3834                         }
3835                 }
3836         }
3837
3838         /*
3839          * Find or create the cache node.
3840          */
3841         node = NULL;
3842         result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
3843         if (result != ISC_R_SUCCESS)
3844                 return (result);
3845
3846         /*
3847          * Cache or validate each cacheable rdataset.
3848          */
3849         fail = ISC_TF((fctx->res->options & DNS_RESOLVER_CHECKNAMESFAIL) != 0);
3850         for (rdataset = ISC_LIST_HEAD(name->list);
3851              rdataset != NULL;
3852              rdataset = ISC_LIST_NEXT(rdataset, link)) {
3853                 if (!CACHE(rdataset))
3854                         continue;
3855                 if (CHECKNAMES(rdataset)) {
3856                         char namebuf[DNS_NAME_FORMATSIZE];
3857                         char typebuf[DNS_RDATATYPE_FORMATSIZE];
3858                         char classbuf[DNS_RDATATYPE_FORMATSIZE];
3859
3860                         dns_name_format(name, namebuf, sizeof(namebuf));
3861                         dns_rdatatype_format(rdataset->type, typebuf,
3862                                              sizeof(typebuf));
3863                         dns_rdataclass_format(rdataset->rdclass, classbuf,
3864                                               sizeof(classbuf));
3865                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3866                                       DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
3867                                       "check-names %s %s/%s/%s",
3868                                       fail ? "failure" : "warning",
3869                                       namebuf, typebuf, classbuf);
3870                         if (fail) {
3871                                 if (ANSWER(rdataset)) {
3872                                         dns_db_detachnode(fctx->cache, &node);
3873                                         return (DNS_R_BADNAME);
3874                                 }
3875                                 continue;
3876                         }
3877                 }
3878
3879                 /*
3880                  * Enforce the configure maximum cache TTL.
3881                  */
3882                 if (rdataset->ttl > res->view->maxcachettl)
3883                         rdataset->ttl = res->view->maxcachettl;
3884
3885                 /*
3886                  * If this rrset is in a secure domain, do DNSSEC validation
3887                  * for it, unless it is glue.
3888                  */
3889                 if (secure_domain && rdataset->trust != dns_trust_glue) {
3890                         dns_trust_t trust;
3891                         /*
3892                          * RRSIGs are validated as part of validating the
3893                          * type they cover.
3894                          */
3895                         if (rdataset->type == dns_rdatatype_rrsig)
3896                                 continue;
3897                         /*
3898                          * Find the SIG for this rdataset, if we have it.
3899                          */
3900                         for (sigrdataset = ISC_LIST_HEAD(name->list);
3901                              sigrdataset != NULL;
3902                              sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
3903                                 if (sigrdataset->type == dns_rdatatype_rrsig &&
3904                                     sigrdataset->covers == rdataset->type)
3905                                         break;
3906                         }
3907                         if (sigrdataset == NULL) {
3908                                 if (!ANSWER(rdataset) && need_validation) {
3909                                         /*
3910                                          * Ignore non-answer rdatasets that
3911                                          * are missing signatures.
3912                                          */
3913                                         continue;
3914                                 }
3915                         }
3916
3917                         /*
3918                          * Normalize the rdataset and sigrdataset TTLs.
3919                          */
3920                         if (sigrdataset != NULL) {
3921                                 rdataset->ttl = ISC_MIN(rdataset->ttl,
3922                                                         sigrdataset->ttl);
3923                                 sigrdataset->ttl = rdataset->ttl;
3924                         }
3925
3926                         /*
3927                          * Reject out of bailiwick additional records
3928                          * without RRSIGs as they can't possibly validate
3929                          * as "secure" and as we will never never want to
3930                          * store these as "answers" after validation.
3931                          */
3932                         if (rdataset->trust == dns_trust_additional &&
3933                             sigrdataset == NULL && EXTERNAL(rdataset))
3934                                 continue;
3935  
3936                         /*
3937                          * XXXMPA: If we store as "answer" after validating
3938                          * then we need to do bailiwick processing and
3939                          * also need to track whether RRsets are in or
3940                          * out of bailiwick.  This will require a another
3941                          * pending trust level.
3942                          *
3943                          * Cache this rdataset/sigrdataset pair as
3944                          * pending data.  Track whether it was additional
3945                          * or not.
3946                          */
3947                         if (rdataset->trust == dns_trust_additional)
3948                                 trust = dns_trust_pending_additional;
3949                         else
3950                                 trust = dns_trust_pending_answer;
3951  
3952                         rdataset->trust = trust;
3953                         if (sigrdataset != NULL)
3954                                 sigrdataset->trust = trust;
3955                         if (!need_validation)
3956                                 addedrdataset = ardataset;
3957                         else
3958                                 addedrdataset = NULL;
3959                         result = dns_db_addrdataset(fctx->cache, node, NULL,
3960                                                     now, rdataset, 0,
3961                                                     addedrdataset);
3962                         if (result == DNS_R_UNCHANGED) {
3963                                 result = ISC_R_SUCCESS;
3964                                 if (!need_validation &&
3965                                     ardataset != NULL &&
3966                                     ardataset->type == 0) {
3967                                         /*
3968                                          * The answer in the cache is better
3969                                          * than the answer we found, and is
3970                                          * a negative cache entry, so we
3971                                          * must set eresult appropriately.
3972                                          */
3973                                         if (NXDOMAIN(ardataset))
3974                                                 eresult = DNS_R_NCACHENXDOMAIN;
3975                                         else
3976                                                 eresult = DNS_R_NCACHENXRRSET;
3977                                         /*
3978                                          * We have a negative response from
3979                                          * the cache so don't attempt to
3980                                          * add the RRSIG rrset.
3981                                          */
3982                                         continue;
3983                                 }
3984                         }
3985                         if (result != ISC_R_SUCCESS)
3986                                 break;
3987                         if (sigrdataset != NULL) {
3988                                 if (!need_validation)
3989                                         addedrdataset = asigrdataset;
3990                                 else
3991                                         addedrdataset = NULL;
3992                                 result = dns_db_addrdataset(fctx->cache,
3993                                                             node, NULL, now,
3994                                                             sigrdataset, 0,
3995                                                             addedrdataset);
3996                                 if (result == DNS_R_UNCHANGED)
3997                                         result = ISC_R_SUCCESS;
3998                                 if (result != ISC_R_SUCCESS)
3999                                         break;
4000                         } else if (!ANSWER(rdataset))
4001                                 continue;
4002
4003                         if (ANSWER(rdataset) && need_validation) {
4004                                 if (fctx->type != dns_rdatatype_any &&
4005                                     fctx->type != dns_rdatatype_rrsig &&
4006                                     fctx->type != dns_rdatatype_sig) {
4007                                         /*
4008                                          * This is The Answer.  We will
4009                                          * validate it, but first we cache
4010                                          * the rest of the response - it may
4011                                          * contain useful keys.
4012                                          */
4013                                         INSIST(valrdataset == NULL &&
4014                                                valsigrdataset == NULL);
4015                                         valrdataset = rdataset;
4016                                         valsigrdataset = sigrdataset;
4017                                 } else {
4018                                         /*
4019                                          * This is one of (potentially)
4020                                          * multiple answers to an ANY
4021                                          * or SIG query.  To keep things
4022                                          * simple, we just start the
4023                                          * validator right away rather
4024                                          * than caching first and
4025                                          * having to remember which
4026                                          * rdatasets needed validation.
4027                                          */
4028                                         result = valcreate(fctx, addrinfo,
4029                                                            name, rdataset->type,
4030                                                            rdataset,
4031                                                            sigrdataset,
4032                                                            valoptions, task);
4033                                         /*
4034                                          * Defer any further validations.
4035                                          * This prevents multiple validators
4036                                          * from manipulating fctx->rmessage
4037                                          * simultaniously.
4038                                          */
4039                                         valoptions |= DNS_VALIDATOR_DEFER;
4040                                 }
4041                         } else if (CHAINING(rdataset)) {
4042                                 if (rdataset->type == dns_rdatatype_cname)
4043                                         eresult = DNS_R_CNAME;
4044                                 else {
4045                                         INSIST(rdataset->type ==
4046                                                dns_rdatatype_dname);
4047                                         eresult = DNS_R_DNAME;
4048                                 }
4049                         }
4050                 } else if (!EXTERNAL(rdataset)) {
4051                         /*
4052                          * It's OK to cache this rdataset now.
4053                          */
4054                         if (ANSWER(rdataset))
4055                                 addedrdataset = ardataset;
4056                         else if (ANSWERSIG(rdataset))
4057                                 addedrdataset = asigrdataset;
4058                         else
4059                                 addedrdataset = NULL;
4060                         if (CHAINING(rdataset)) {
4061                                 if (rdataset->type == dns_rdatatype_cname)
4062                                         eresult = DNS_R_CNAME;
4063                                 else {
4064                                         INSIST(rdataset->type ==
4065                                                dns_rdatatype_dname);
4066                                         eresult = DNS_R_DNAME;
4067                                 }
4068                         }
4069                         if (rdataset->trust == dns_trust_glue &&
4070                             (rdataset->type == dns_rdatatype_ns ||
4071                              (rdataset->type == dns_rdatatype_rrsig &&
4072                               rdataset->covers == dns_rdatatype_ns))) {
4073                                 /*
4074                                  * If the trust level is 'dns_trust_glue'
4075                                  * then we are adding data from a referral
4076                                  * we got while executing the search algorithm.
4077                                  * New referral data always takes precedence
4078                                  * over the existing cache contents.
4079                                  */
4080                                 options = DNS_DBADD_FORCE;
4081                         } else
4082                                 options = 0;
4083                         /*
4084                          * Now we can add the rdataset.
4085                          */
4086                         result = dns_db_addrdataset(fctx->cache,
4087                                                     node, NULL, now,
4088                                                     rdataset,
4089                                                     options,
4090                                                     addedrdataset);
4091                         if (result == DNS_R_UNCHANGED) {
4092                                 if (ANSWER(rdataset) &&
4093                                     ardataset != NULL &&
4094                                     ardataset->type == 0) {
4095                                         /*
4096                                          * The answer in the cache is better
4097                                          * than the answer we found, and is
4098                                          * a negative cache entry, so we
4099                                          * must set eresult appropriately.
4100                                          */
4101                                         if (NXDOMAIN(ardataset))
4102                                                 eresult = DNS_R_NCACHENXDOMAIN;
4103                                         else
4104                                                 eresult = DNS_R_NCACHENXRRSET;
4105                                 }
4106                                 result = ISC_R_SUCCESS;
4107                         } else if (result != ISC_R_SUCCESS)
4108                                 break;
4109                 }
4110         }
4111
4112         if (valrdataset != NULL)
4113                 result = valcreate(fctx, addrinfo, name, fctx->type,
4114                                    valrdataset, valsigrdataset, valoptions,
4115                                    task);
4116
4117         if (result == ISC_R_SUCCESS && have_answer) {
4118                 fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4119                 if (event != NULL) {
4120                         /*
4121                          * Negative results must be indicated in event->result.
4122                          */
4123                         if (dns_rdataset_isassociated(event->rdataset) &&
4124                             event->rdataset->type == dns_rdatatype_none) {
4125                                 INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
4126                                        eresult == DNS_R_NCACHENXRRSET);
4127                         }
4128                         event->result = eresult;
4129                         dns_db_attach(fctx->cache, adbp);
4130                         dns_db_transfernode(fctx->cache, &node, anodep);
4131                         clone_results(fctx);
4132                 }
4133         }
4134
4135         if (node != NULL)
4136                 dns_db_detachnode(fctx->cache, &node);
4137
4138         return (result);
4139 }
4140
4141 static inline isc_result_t
4142 cache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_stdtime_t now)
4143 {
4144         isc_result_t result;
4145         dns_section_t section;
4146         dns_name_t *name;
4147
4148         FCTXTRACE("cache_message");
4149
4150         fctx->attributes &= ~FCTX_ATTR_WANTCACHE;
4151
4152         LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4153
4154         for (section = DNS_SECTION_ANSWER;
4155              section <= DNS_SECTION_ADDITIONAL;
4156              section++) {
4157                 result = dns_message_firstname(fctx->rmessage, section);
4158                 while (result == ISC_R_SUCCESS) {
4159                         name = NULL;
4160                         dns_message_currentname(fctx->rmessage, section,
4161                                                 &name);
4162                         if ((name->attributes & DNS_NAMEATTR_CACHE) != 0) {
4163                                 result = cache_name(fctx, name, addrinfo, now);
4164                                 if (result != ISC_R_SUCCESS)
4165                                         break;
4166                         }
4167                         result = dns_message_nextname(fctx->rmessage, section);
4168                 }
4169                 if (result != ISC_R_NOMORE)
4170                         break;
4171         }
4172         if (result == ISC_R_NOMORE)
4173                 result = ISC_R_SUCCESS;
4174
4175         UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4176
4177         return (result);
4178 }
4179
4180 /*
4181  * Do what dns_ncache_add() does, and then compute an appropriate eresult.
4182  */
4183 static isc_result_t
4184 ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node,
4185                   dns_rdatatype_t covers, isc_stdtime_t now, dns_ttl_t maxttl,
4186                   dns_rdataset_t *ardataset,
4187                   isc_result_t *eresultp)
4188 {
4189         isc_result_t result;
4190         dns_rdataset_t rdataset;
4191
4192         if (ardataset == NULL) {
4193                 dns_rdataset_init(&rdataset);
4194                 ardataset = &rdataset;
4195         }
4196         result = dns_ncache_add(message, cache, node, covers, now,
4197                                 maxttl, ardataset);
4198         if (result == DNS_R_UNCHANGED || result == ISC_R_SUCCESS) {
4199                 /*
4200                  * If the cache now contains a negative entry and we
4201                  * care about whether it is DNS_R_NCACHENXDOMAIN or
4202                  * DNS_R_NCACHENXRRSET then extract it.
4203                  */
4204                 if (ardataset->type == 0) {
4205                         /*
4206                          * The cache data is a negative cache entry.
4207                          */
4208                         if (NXDOMAIN(ardataset))
4209                                 *eresultp = DNS_R_NCACHENXDOMAIN;
4210                         else
4211                                 *eresultp = DNS_R_NCACHENXRRSET;
4212                 } else {
4213                         /*
4214                          * Either we don't care about the nature of the
4215                          * cache rdataset (because no fetch is interested
4216                          * in the outcome), or the cache rdataset is not
4217                          * a negative cache entry.  Whichever case it is,
4218                          * we can return success.
4219                          *
4220                          * XXXRTH  There's a CNAME/DNAME problem here.
4221                          */
4222                         *eresultp = ISC_R_SUCCESS;
4223                 }
4224                 result = ISC_R_SUCCESS;
4225         }
4226         if (ardataset == &rdataset && dns_rdataset_isassociated(ardataset))
4227                 dns_rdataset_disassociate(ardataset);
4228
4229         return (result);
4230 }
4231
4232 static inline isc_result_t
4233 ncache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
4234                dns_rdatatype_t covers, isc_stdtime_t now)
4235 {
4236         isc_result_t result, eresult;
4237         dns_name_t *name;
4238         dns_resolver_t *res;
4239         dns_db_t **adbp;
4240         dns_dbnode_t *node, **anodep;
4241         dns_rdataset_t *ardataset;
4242         isc_boolean_t need_validation, secure_domain;
4243         dns_name_t *aname;
4244         dns_fetchevent_t *event;
4245         isc_uint32_t ttl;
4246         unsigned int valoptions = 0;
4247
4248         FCTXTRACE("ncache_message");
4249
4250         fctx->attributes &= ~FCTX_ATTR_WANTNCACHE;
4251
4252         res = fctx->res;
4253         need_validation = ISC_FALSE;
4254         secure_domain = ISC_FALSE;
4255         eresult = ISC_R_SUCCESS;
4256         name = &fctx->name;
4257         node = NULL;
4258
4259         /*
4260          * XXXMPA remove when we follow cnames and adjust the setting
4261          * of FCTX_ATTR_WANTNCACHE in noanswer_response().
4262          */
4263         INSIST(fctx->rmessage->counts[DNS_SECTION_ANSWER] == 0);
4264
4265         /*
4266          * Is DNSSEC validation required for this name?
4267          */
4268         if (fctx->res->view->enablevalidation) {
4269                 result = dns_keytable_issecuredomain(res->view->secroots, name,
4270                                                      &secure_domain);
4271                 if (result != ISC_R_SUCCESS)
4272                         return (result);
4273
4274                 if (!secure_domain && res->view->dlv != NULL) {
4275                         valoptions = DNS_VALIDATOR_DLV;
4276                         secure_domain = ISC_TRUE;
4277                 }
4278         }
4279
4280         if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
4281                 need_validation = ISC_FALSE;
4282         else
4283                 need_validation = secure_domain;
4284
4285         if (secure_domain) {
4286                 /*
4287                  * Mark all rdatasets as pending.
4288                  */
4289                 dns_rdataset_t *trdataset;
4290                 dns_name_t *tname;
4291
4292                 result = dns_message_firstname(fctx->rmessage,
4293                                                DNS_SECTION_AUTHORITY);
4294                 while (result == ISC_R_SUCCESS) {
4295                         tname = NULL;
4296                         dns_message_currentname(fctx->rmessage,
4297                                                 DNS_SECTION_AUTHORITY,
4298                                                 &tname);
4299                         for (trdataset = ISC_LIST_HEAD(tname->list);
4300                              trdataset != NULL;
4301                              trdataset = ISC_LIST_NEXT(trdataset, link))
4302                                 trdataset->trust = dns_trust_pending_answer;
4303                         result = dns_message_nextname(fctx->rmessage,
4304                                                       DNS_SECTION_AUTHORITY);
4305                 }
4306                 if (result != ISC_R_NOMORE)
4307                         return (result);
4308
4309         }
4310
4311         if (need_validation) {
4312                 /*
4313                  * Do negative response validation.
4314                  */
4315                 result = valcreate(fctx, addrinfo, name, fctx->type,
4316                                    NULL, NULL, valoptions,
4317                                    res->buckets[fctx->bucketnum].task);
4318                 /*
4319                  * If validation is necessary, return now.  Otherwise continue
4320                  * to process the message, letting the validation complete
4321                  * in its own good time.
4322                  */
4323                 return (result);
4324         }
4325
4326         LOCK(&res->buckets[fctx->bucketnum].lock);
4327
4328         adbp = NULL;
4329         aname = NULL;
4330         anodep = NULL;
4331         ardataset = NULL;
4332         if (!HAVE_ANSWER(fctx)) {
4333                 event = ISC_LIST_HEAD(fctx->events);
4334                 if (event != NULL) {
4335                         adbp = &event->db;
4336                         aname = dns_fixedname_name(&event->foundname);
4337                         result = dns_name_copy(name, aname, NULL);
4338                         if (result != ISC_R_SUCCESS)
4339                                 goto unlock;
4340                         anodep = &event->node;
4341                         ardataset = event->rdataset;
4342                 }
4343         } else
4344                 event = NULL;
4345
4346         result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
4347         if (result != ISC_R_SUCCESS)
4348                 goto unlock;
4349
4350         /*
4351          * If we are asking for a SOA record set the cache time
4352          * to zero to facilitate locating the containing zone of
4353          * a arbitary zone.
4354          */
4355         ttl = fctx->res->view->maxncachettl;
4356         if (fctx->type == dns_rdatatype_soa &&
4357             covers == dns_rdatatype_any)
4358                 ttl = 0;
4359
4360         result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
4361                                    covers, now, ttl, ardataset, &eresult);
4362         if (result != ISC_R_SUCCESS)
4363                 goto unlock;
4364
4365         if (!HAVE_ANSWER(fctx)) {
4366                 fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4367                 if (event != NULL) {
4368                         event->result = eresult;
4369                         dns_db_attach(fctx->cache, adbp);
4370                         dns_db_transfernode(fctx->cache, &node, anodep);
4371                         clone_results(fctx);
4372                 }
4373         }
4374
4375  unlock:
4376         UNLOCK(&res->buckets[fctx->bucketnum].lock);
4377
4378         if (node != NULL)
4379                 dns_db_detachnode(fctx->cache, &node);
4380
4381         return (result);
4382 }
4383
4384 static inline void
4385 mark_related(dns_name_t *name, dns_rdataset_t *rdataset,
4386              isc_boolean_t external, isc_boolean_t gluing)
4387 {
4388         name->attributes |= DNS_NAMEATTR_CACHE;
4389         if (gluing) {
4390                 rdataset->trust = dns_trust_glue;
4391                 /*
4392                  * Glue with 0 TTL causes problems.  We force the TTL to
4393                  * 1 second to prevent this.
4394                  */
4395                 if (rdataset->ttl == 0)
4396                         rdataset->ttl = 1;
4397         } else
4398                 rdataset->trust = dns_trust_additional;
4399         /*
4400          * Avoid infinite loops by only marking new rdatasets.
4401          */
4402         if (!CACHE(rdataset)) {
4403                 name->attributes |= DNS_NAMEATTR_CHASE;
4404                 rdataset->attributes |= DNS_RDATASETATTR_CHASE;
4405         }
4406         rdataset->attributes |= DNS_RDATASETATTR_CACHE;
4407         if (external)
4408                 rdataset->attributes |= DNS_RDATASETATTR_EXTERNAL;
4409 }
4410
4411 static isc_result_t
4412 check_related(void *arg, dns_name_t *addname, dns_rdatatype_t type) {
4413         fetchctx_t *fctx = arg;
4414         isc_result_t result;
4415         dns_name_t *name;
4416         dns_rdataset_t *rdataset;
4417         isc_boolean_t external;
4418         dns_rdatatype_t rtype;
4419         isc_boolean_t gluing;
4420
4421         REQUIRE(VALID_FCTX(fctx));
4422
4423         if (GLUING(fctx))
4424                 gluing = ISC_TRUE;
4425         else
4426                 gluing = ISC_FALSE;
4427         name = NULL;
4428         rdataset = NULL;
4429         result = dns_message_findname(fctx->rmessage, DNS_SECTION_ADDITIONAL,
4430                                       addname, dns_rdatatype_any, 0, &name,
4431                                       NULL);
4432         if (result == ISC_R_SUCCESS) {
4433                 external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
4434                 if (type == dns_rdatatype_a) {
4435                         for (rdataset = ISC_LIST_HEAD(name->list);
4436                              rdataset != NULL;
4437                              rdataset = ISC_LIST_NEXT(rdataset, link)) {
4438                                 if (rdataset->type == dns_rdatatype_rrsig)
4439                                         rtype = rdataset->covers;
4440                                 else
4441                                         rtype = rdataset->type;
4442                                 if (rtype == dns_rdatatype_a ||
4443                                     rtype == dns_rdatatype_aaaa)
4444                                         mark_related(name, rdataset, external,
4445                                                      gluing);
4446                         }
4447                 } else {
4448                         result = dns_message_findtype(name, type, 0,
4449                                                       &rdataset);
4450                         if (result == ISC_R_SUCCESS) {
4451                                 mark_related(name, rdataset, external, gluing);
4452                                 /*
4453                                  * Do we have its SIG too?
4454                                  */
4455                                 rdataset = NULL;
4456                                 result = dns_message_findtype(name,
4457                                                       dns_rdatatype_rrsig,
4458                                                       type, &rdataset);
4459                                 if (result == ISC_R_SUCCESS)
4460                                         mark_related(name, rdataset, external,
4461                                                      gluing);
4462                         }
4463                 }
4464         }
4465
4466         return (ISC_R_SUCCESS);
4467 }
4468
4469 static void
4470 chase_additional(fetchctx_t *fctx) {
4471         isc_boolean_t rescan;
4472         dns_section_t section = DNS_SECTION_ADDITIONAL;
4473         isc_result_t result;
4474
4475  again:
4476         rescan = ISC_FALSE;
4477
4478         for (result = dns_message_firstname(fctx->rmessage, section);
4479              result == ISC_R_SUCCESS;
4480              result = dns_message_nextname(fctx->rmessage, section)) {
4481                 dns_name_t *name = NULL;
4482                 dns_rdataset_t *rdataset;
4483                 dns_message_currentname(fctx->rmessage, DNS_SECTION_ADDITIONAL,
4484                                         &name);
4485                 if ((name->attributes & DNS_NAMEATTR_CHASE) == 0)
4486                         continue;
4487                 name->attributes &= ~DNS_NAMEATTR_CHASE;
4488                 for (rdataset = ISC_LIST_HEAD(name->list);
4489                      rdataset != NULL;
4490                      rdataset = ISC_LIST_NEXT(rdataset, link)) {
4491                         if (CHASE(rdataset)) {
4492                                 rdataset->attributes &= ~DNS_RDATASETATTR_CHASE;
4493                                 (void)dns_rdataset_additionaldata(rdataset,
4494                                                                   check_related,
4495                                                                   fctx);
4496                                 rescan = ISC_TRUE;
4497                         }
4498                 }
4499         }
4500         if (rescan)
4501                 goto again;
4502 }
4503
4504 static inline isc_result_t
4505 cname_target(dns_rdataset_t *rdataset, dns_name_t *tname) {
4506         isc_result_t result;
4507         dns_rdata_t rdata = DNS_RDATA_INIT;
4508         dns_rdata_cname_t cname;
4509
4510         result = dns_rdataset_first(rdataset);
4511         if (result != ISC_R_SUCCESS)
4512                 return (result);
4513         dns_rdataset_current(rdataset, &rdata);
4514         result = dns_rdata_tostruct(&rdata, &cname, NULL);
4515         if (result != ISC_R_SUCCESS)
4516                 return (result);
4517         dns_name_init(tname, NULL);
4518         dns_name_clone(&cname.cname, tname);
4519         dns_rdata_freestruct(&cname);
4520
4521         return (ISC_R_SUCCESS);
4522 }
4523
4524 static inline isc_result_t
4525 dname_target(dns_rdataset_t *rdataset, dns_name_t *qname, dns_name_t *oname,
4526              dns_fixedname_t *fixeddname)
4527 {
4528         isc_result_t result;
4529         dns_rdata_t rdata = DNS_RDATA_INIT;
4530         unsigned int nlabels;
4531         int order;
4532         dns_namereln_t namereln;
4533         dns_rdata_dname_t dname;
4534         dns_fixedname_t prefix;
4535
4536         /*
4537          * Get the target name of the DNAME.
4538          */
4539
4540         result = dns_rdataset_first(rdataset);
4541         if (result != ISC_R_SUCCESS)
4542                 return (result);
4543         dns_rdataset_current(rdataset, &rdata);
4544         result = dns_rdata_tostruct(&rdata, &dname, NULL);
4545         if (result != ISC_R_SUCCESS)
4546                 return (result);
4547
4548         /*
4549          * Get the prefix of qname.
4550          */
4551         namereln = dns_name_fullcompare(qname, oname, &order, &nlabels);
4552         if (namereln != dns_namereln_subdomain) {
4553                 dns_rdata_freestruct(&dname);
4554                 return (DNS_R_FORMERR);
4555         }
4556         dns_fixedname_init(&prefix);
4557         dns_name_split(qname, nlabels, dns_fixedname_name(&prefix), NULL);
4558         dns_fixedname_init(fixeddname);
4559         result = dns_name_concatenate(dns_fixedname_name(&prefix),
4560                                       &dname.dname,
4561                                       dns_fixedname_name(fixeddname), NULL);
4562         dns_rdata_freestruct(&dname);
4563         return (result);
4564 }
4565
4566 /*
4567  * Handle a no-answer response (NXDOMAIN, NXRRSET, or referral).
4568  * If bind8_ns_resp is ISC_TRUE, this is a suspected BIND 8
4569  * response to an NS query that should be treated as a referral
4570  * even though the NS records occur in the answer section
4571  * rather than the authority section.
4572  */
4573 static isc_result_t
4574 noanswer_response(fetchctx_t *fctx, dns_name_t *oqname,
4575                   isc_boolean_t bind8_ns_resp)
4576 {
4577         isc_result_t result;
4578         dns_message_t *message;
4579         dns_name_t *name, *qname, *ns_name, *soa_name, *ds_name;
4580         dns_rdataset_t *rdataset, *ns_rdataset;
4581         isc_boolean_t aa, negative_response;
4582         dns_rdatatype_t type;
4583         dns_section_t section =
4584                 bind8_ns_resp ? DNS_SECTION_ANSWER : DNS_SECTION_AUTHORITY;
4585
4586         FCTXTRACE("noanswer_response");
4587
4588         message = fctx->rmessage;
4589
4590         /*
4591          * Setup qname.
4592          */
4593         if (oqname == NULL) {
4594                 /*
4595                  * We have a normal, non-chained negative response or
4596                  * referral.
4597                  */
4598                 if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
4599                         aa = ISC_TRUE;
4600                 else
4601                         aa = ISC_FALSE;
4602                 qname = &fctx->name;
4603         } else {
4604                 /*
4605                  * We're being invoked by answer_response() after it has
4606                  * followed a CNAME/DNAME chain.
4607                  */
4608                 qname = oqname;
4609                 aa = ISC_FALSE;
4610                 /*
4611                  * If the current qname is not a subdomain of the query
4612                  * domain, there's no point in looking at the authority
4613                  * section without doing DNSSEC validation.
4614                  *
4615                  * Until we do that validation, we'll just return success
4616                  * in this case.
4617                  */
4618                 if (!dns_name_issubdomain(qname, &fctx->domain))
4619                         return (ISC_R_SUCCESS);
4620         }
4621
4622         /*
4623          * We have to figure out if this is a negative response, or a
4624          * referral.
4625          */
4626
4627         /*
4628          * Sometimes we can tell if its a negative response by looking at
4629          * the message header.
4630          */
4631         negative_response = ISC_FALSE;
4632         if (message->rcode == dns_rcode_nxdomain ||
4633             (message->counts[DNS_SECTION_ANSWER] == 0 &&
4634              message->counts[DNS_SECTION_AUTHORITY] == 0))
4635                 negative_response = ISC_TRUE;
4636
4637         /*
4638          * Process the authority section.
4639          */
4640         ns_name = NULL;
4641         ns_rdataset = NULL;
4642         soa_name = NULL;
4643         ds_name = NULL;
4644         result = dns_message_firstname(message, section);
4645         while (result == ISC_R_SUCCESS) {
4646                 name = NULL;
4647                 dns_message_currentname(message, section, &name);
4648                 if (dns_name_issubdomain(name, &fctx->domain)) {
4649                         /*
4650                          * Look for NS/SOA RRsets first.
4651                          */
4652                         for (rdataset = ISC_LIST_HEAD(name->list);
4653                              rdataset != NULL;
4654                              rdataset = ISC_LIST_NEXT(rdataset, link)) {
4655                                 type = rdataset->type;
4656                                 if (type == dns_rdatatype_rrsig)
4657                                         type = rdataset->covers;
4658                                 if (((type == dns_rdatatype_ns ||
4659                                       type == dns_rdatatype_soa) &&
4660                                      !dns_name_issubdomain(qname, name)))
4661                                         return (DNS_R_FORMERR);
4662                                 if (type == dns_rdatatype_ns) {
4663                                         /*
4664                                          * NS or RRSIG NS.
4665                                          *
4666                                          * Only one set of NS RRs is allowed.
4667                                          */
4668                                         if (rdataset->type ==
4669                                             dns_rdatatype_ns) {
4670                                                 if (ns_name != NULL &&
4671                                                     name != ns_name)
4672                                                         return (DNS_R_FORMERR);
4673                                                 ns_name = name;
4674                                                 ns_rdataset = rdataset;
4675                                         }
4676                                         name->attributes |=
4677                                                 DNS_NAMEATTR_CACHE;
4678                                         rdataset->attributes |=
4679                                                 DNS_RDATASETATTR_CACHE;
4680                                         rdataset->trust = dns_trust_glue;
4681                                 }
4682                                 if (type == dns_rdatatype_soa) {
4683                                         /*
4684                                          * SOA, or RRSIG SOA.
4685                                          *
4686                                          * Only one SOA is allowed.
4687                                          */
4688                                         if (rdataset->type ==
4689                                             dns_rdatatype_soa) {
4690                                                 if (soa_name != NULL &&
4691                                                     name != soa_name)
4692                                                         return (DNS_R_FORMERR);
4693                                                 soa_name = name;
4694                                         }
4695                                         name->attributes |=
4696                                                 DNS_NAMEATTR_NCACHE;
4697                                         rdataset->attributes |=
4698                                                 DNS_RDATASETATTR_NCACHE;
4699                                         if (aa)
4700                                                 rdataset->trust =
4701                                                     dns_trust_authauthority;
4702                                         else
4703                                                 rdataset->trust =
4704                                                         dns_trust_additional;
4705                                 }
4706                         }
4707                 }
4708                 result = dns_message_nextname(message, section);
4709                 if (result == ISC_R_NOMORE)
4710                         break;
4711                 else if (result != ISC_R_SUCCESS)
4712                         return (result);
4713         }
4714
4715         /*
4716          * A negative response has a SOA record (Type 2)
4717          * and a optional NS RRset (Type 1) or it has neither
4718          * a SOA or a NS RRset (Type 3, handled above) or
4719          * rcode is NXDOMAIN (handled above) in which case
4720          * the NS RRset is allowed (Type 4).
4721          */
4722         if (soa_name != NULL)
4723                 negative_response = ISC_TRUE;
4724
4725         result = dns_message_firstname(message, section);
4726         while (result == ISC_R_SUCCESS) {
4727                 name = NULL;
4728                 dns_message_currentname(message, section, &name);
4729                 if (dns_name_issubdomain(name, &fctx->domain)) {
4730                         for (rdataset = ISC_LIST_HEAD(name->list);
4731                              rdataset != NULL;
4732                              rdataset = ISC_LIST_NEXT(rdataset, link)) {
4733                                 type = rdataset->type;
4734                                 if (type == dns_rdatatype_rrsig)
4735                                         type = rdataset->covers;
4736                                 if (type == dns_rdatatype_nsec) {
4737                                         /*
4738                                          * NSEC or RRSIG NSEC.
4739                                          */
4740                                         if (negative_response) {
4741                                                 name->attributes |=
4742                                                         DNS_NAMEATTR_NCACHE;
4743                                                 rdataset->attributes |=
4744                                                         DNS_RDATASETATTR_NCACHE;
4745                                         } else {
4746                                                 name->attributes |=
4747                                                         DNS_NAMEATTR_CACHE;
4748                                                 rdataset->attributes |=
4749                                                         DNS_RDATASETATTR_CACHE;
4750                                         }
4751                                         if (aa)
4752                                                 rdataset->trust =
4753                                                     dns_trust_authauthority;
4754                                         else
4755                                                 rdataset->trust =
4756                                                         dns_trust_additional;
4757                                         /*
4758                                          * No additional data needs to be
4759                                          * marked.
4760                                          */
4761                                 } else if (type == dns_rdatatype_ds) {
4762                                         /*
4763                                          * DS or SIG DS.
4764                                          *
4765                                          * These should only be here if
4766                                          * this is a referral, and there
4767                                          * should only be one DS.
4768                                          */
4769                                         if (ns_name == NULL)
4770                                                 return (DNS_R_FORMERR);
4771                                         if (rdataset->type ==
4772                                             dns_rdatatype_ds) {
4773                                                 if (ds_name != NULL &&
4774                                                     name != ds_name)
4775                                                         return (DNS_R_FORMERR);
4776                                                 ds_name = name;
4777                                         }
4778                                         name->attributes |=
4779                                                 DNS_NAMEATTR_CACHE;
4780                                         rdataset->attributes |=
4781                                                 DNS_RDATASETATTR_CACHE;
4782                                         if (aa)
4783                                                 rdataset->trust =
4784                                                     dns_trust_authauthority;
4785                                         else
4786                                                 rdataset->trust =
4787                                                         dns_trust_additional;
4788                                 }
4789                         }
4790                 }
4791                 result = dns_message_nextname(message, section);
4792                 if (result == ISC_R_NOMORE)
4793                         break;
4794                 else if (result != ISC_R_SUCCESS)
4795                         return (result);
4796         }
4797
4798         /*
4799          * Trigger lookups for DNS nameservers.
4800          */
4801         if (negative_response && message->rcode == dns_rcode_noerror &&
4802             fctx->type == dns_rdatatype_ds && soa_name != NULL &&
4803             dns_name_equal(soa_name, qname) &&
4804             !dns_name_equal(qname, dns_rootname))
4805                 return (DNS_R_CHASEDSSERVERS);
4806
4807         /*
4808          * Did we find anything?
4809          */
4810         if (!negative_response && ns_name == NULL) {
4811                 /*
4812                  * Nope.
4813                  */
4814                 if (oqname != NULL) {
4815                         /*
4816                          * We've already got a partial CNAME/DNAME chain,
4817                          * and haven't found else anything useful here, but
4818                          * no error has occurred since we have an answer.
4819                          */
4820                         return (ISC_R_SUCCESS);
4821                 } else {
4822                         /*
4823                          * The responder is insane.
4824                          */
4825                         return (DNS_R_FORMERR);
4826                 }
4827         }
4828
4829         /*
4830          * If we found both NS and SOA, they should be the same name.
4831          */
4832         if (ns_name != NULL && soa_name != NULL && ns_name != soa_name)
4833                 return (DNS_R_FORMERR);
4834
4835         /*
4836          * Do we have a referral?  (We only want to follow a referral if
4837          * we're not following a chain.)
4838          */
4839         if (!negative_response && ns_name != NULL && oqname == NULL) {
4840                 /*
4841                  * We already know ns_name is a subdomain of fctx->domain.
4842                  * If ns_name is equal to fctx->domain, we're not making
4843                  * progress.  We return DNS_R_FORMERR so that we'll keep
4844                  * trying other servers.
4845                  */
4846                 if (dns_name_equal(ns_name, &fctx->domain))
4847                         return (DNS_R_FORMERR);
4848
4849                 /*
4850                  * If the referral name is not a parent of the query
4851                  * name, consider the responder insane.
4852                  */
4853                 if (! dns_name_issubdomain(&fctx->name, ns_name)) {
4854                         FCTXTRACE("referral to non-parent");
4855                         return (DNS_R_FORMERR);
4856                 }
4857
4858                 /*
4859                  * Mark any additional data related to this rdataset.
4860                  * It's important that we do this before we change the
4861                  * query domain.
4862                  */
4863                 INSIST(ns_rdataset != NULL);
4864                 fctx->attributes |= FCTX_ATTR_GLUING;
4865                 (void)dns_rdataset_additionaldata(ns_rdataset, check_related,
4866                                                   fctx);
4867                 fctx->attributes &= ~FCTX_ATTR_GLUING;
4868                 /*
4869                  * NS rdatasets with 0 TTL cause problems.
4870                  * dns_view_findzonecut() will not find them when we
4871                  * try to follow the referral, and we'll SERVFAIL
4872                  * because the best nameservers are now above QDOMAIN.
4873                  * We force the TTL to 1 second to prevent this.
4874                  */
4875                 if (ns_rdataset->ttl == 0)
4876                         ns_rdataset->ttl = 1;
4877                 /*
4878                  * Set the current query domain to the referral name.
4879                  *
4880                  * XXXRTH  We should check if we're in forward-only mode, and
4881                  *         if so we should bail out.
4882                  */
4883                 INSIST(dns_name_countlabels(&fctx->domain) > 0);
4884                 dns_name_free(&fctx->domain,
4885                               fctx->res->buckets[fctx->bucketnum].mctx);
4886                 if (dns_rdataset_isassociated(&fctx->nameservers))
4887                         dns_rdataset_disassociate(&fctx->nameservers);
4888                 dns_name_init(&fctx->domain, NULL);
4889                 result = dns_name_dup(ns_name,
4890                                       fctx->res->buckets[fctx->bucketnum].mctx,
4891                                       &fctx->domain);
4892                 if (result != ISC_R_SUCCESS)
4893                         return (result);
4894                 fctx->attributes |= FCTX_ATTR_WANTCACHE;
4895                 return (DNS_R_DELEGATION);
4896         }
4897
4898         /*
4899          * Since we're not doing a referral, we don't want to cache any
4900          * NS RRs we may have found.
4901          */
4902         if (ns_name != NULL)
4903                 ns_name->attributes &= ~DNS_NAMEATTR_CACHE;
4904
4905         if (negative_response && oqname == NULL)
4906                 fctx->attributes |= FCTX_ATTR_WANTNCACHE;
4907
4908         return (ISC_R_SUCCESS);
4909 }
4910
4911 static isc_result_t
4912 answer_response(fetchctx_t *fctx) {
4913         isc_result_t result;
4914         dns_message_t *message;
4915         dns_name_t *name, *qname, tname;
4916         dns_rdataset_t *rdataset;
4917         isc_boolean_t done, external, chaining, aa, found, want_chaining;
4918         isc_boolean_t have_answer, found_cname, found_type, wanted_chaining;
4919         unsigned int aflag;
4920         dns_rdatatype_t type;
4921         dns_fixedname_t dname, fqname;
4922
4923         FCTXTRACE("answer_response");
4924
4925         message = fctx->rmessage;
4926
4927         /*
4928          * Examine the answer section, marking those rdatasets which are
4929          * part of the answer and should be cached.
4930          */
4931
4932         done = ISC_FALSE;
4933         found_cname = ISC_FALSE;
4934         found_type = ISC_FALSE;
4935         chaining = ISC_FALSE;
4936         have_answer = ISC_FALSE;
4937         want_chaining = ISC_FALSE;
4938         if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
4939                 aa = ISC_TRUE;
4940         else
4941                 aa = ISC_FALSE;
4942         qname = &fctx->name;
4943         type = fctx->type;
4944         result = dns_message_firstname(message, DNS_SECTION_ANSWER);
4945         while (!done && result == ISC_R_SUCCESS) {
4946                 name = NULL;
4947                 dns_message_currentname(message, DNS_SECTION_ANSWER, &name);
4948                 external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
4949                 if (dns_name_equal(name, qname)) {
4950                         wanted_chaining = ISC_FALSE;
4951                         for (rdataset = ISC_LIST_HEAD(name->list);
4952                              rdataset != NULL;
4953                              rdataset = ISC_LIST_NEXT(rdataset, link)) {
4954                                 found = ISC_FALSE;
4955                                 want_chaining = ISC_FALSE;
4956                                 aflag = 0;
4957                                 if (rdataset->type == type && !found_cname) {
4958                                         /*
4959                                          * We've found an ordinary answer.
4960                                          */
4961                                         found = ISC_TRUE;
4962                                         found_type = ISC_TRUE;
4963                                         done = ISC_TRUE;
4964                                         aflag = DNS_RDATASETATTR_ANSWER;
4965                                 } else if (type == dns_rdatatype_any) {
4966                                         /*
4967                                          * We've found an answer matching
4968                                          * an ANY query.  There may be
4969                                          * more.
4970                                          */
4971                                         found = ISC_TRUE;
4972                                         aflag = DNS_RDATASETATTR_ANSWER;
4973                                 } else if (rdataset->type == dns_rdatatype_rrsig
4974                                            && rdataset->covers == type
4975                                            && !found_cname) {
4976                                         /*
4977                                          * We've found a signature that
4978                                          * covers the type we're looking for.
4979                                          */
4980                                         found = ISC_TRUE;
4981                                         found_type = ISC_TRUE;
4982                                         aflag = DNS_RDATASETATTR_ANSWERSIG;
4983                                 } else if (rdataset->type ==
4984                                            dns_rdatatype_cname
4985                                            && !found_type) {
4986                                         /*
4987                                          * We're looking for something else,
4988                                          * but we found a CNAME.
4989                                          *
4990                                          * Getting a CNAME response for some
4991                                          * query types is an error.
4992                                          */
4993                                         if (type == dns_rdatatype_rrsig ||
4994                                             type == dns_rdatatype_dnskey ||
4995                                             type == dns_rdatatype_nsec)
4996                                                 return (DNS_R_FORMERR);
4997                                         found = ISC_TRUE;
4998                                         found_cname = ISC_TRUE;
4999                                         want_chaining = ISC_TRUE;
5000                                         aflag = DNS_RDATASETATTR_ANSWER;
5001                                         result = cname_target(rdataset,
5002                                                               &tname);
5003                                         if (result != ISC_R_SUCCESS)
5004                                                 return (result);
5005                                 } else if (rdataset->type == dns_rdatatype_rrsig
5006                                            && rdataset->covers ==
5007                                            dns_rdatatype_cname
5008                                            && !found_type) {
5009                                         /*
5010                                          * We're looking for something else,
5011                                          * but we found a SIG CNAME.
5012                                          */
5013                                         found = ISC_TRUE;
5014                                         found_cname = ISC_TRUE;
5015                                         aflag = DNS_RDATASETATTR_ANSWERSIG;
5016                                 }
5017
5018                                 if (found) {
5019                                         /*
5020                                          * We've found an answer to our
5021                                          * question.
5022                                          */
5023                                         name->attributes |=
5024                                                 DNS_NAMEATTR_CACHE;
5025                                         rdataset->attributes |=
5026                                                 DNS_RDATASETATTR_CACHE;
5027                                         rdataset->trust = dns_trust_answer;
5028                                         if (!chaining) {
5029                                                 /*
5030                                                  * This data is "the" answer
5031                                                  * to our question only if
5032                                                  * we're not chaining (i.e.
5033                                                  * if we haven't followed
5034                                                  * a CNAME or DNAME).
5035                                                  */
5036                                                 INSIST(!external);
5037                                                 if (aflag ==
5038                                                     DNS_RDATASETATTR_ANSWER)
5039                                                         have_answer = ISC_TRUE;
5040                                                 name->attributes |=
5041                                                         DNS_NAMEATTR_ANSWER;
5042                                                 rdataset->attributes |= aflag;
5043                                                 if (aa)
5044                                                         rdataset->trust =
5045                                                           dns_trust_authanswer;
5046                                         } else if (external) {
5047                                                 /*
5048                                                  * This data is outside of
5049                                                  * our query domain, and
5050                                                  * may only be cached if it
5051                                                  * comes from a secure zone
5052                                                  * and validates.
5053                                                  */
5054                                                 rdataset->attributes |=
5055                                                     DNS_RDATASETATTR_EXTERNAL;
5056                                         }
5057
5058                                         /*
5059                                          * Mark any additional data related
5060                                          * to this rdataset.
5061                                          */
5062                                         (void)dns_rdataset_additionaldata(
5063                                                         rdataset,
5064                                                         check_related,
5065                                                         fctx);
5066
5067                                         /*
5068                                          * CNAME chaining.
5069                                          */
5070                                         if (want_chaining) {
5071                                                 wanted_chaining = ISC_TRUE;
5072                                                 name->attributes |=
5073                                                         DNS_NAMEATTR_CHAINING;
5074                                                 rdataset->attributes |=
5075                                                     DNS_RDATASETATTR_CHAINING;
5076                                                 qname = &tname;
5077                                         }
5078                                 }
5079                                 /*
5080                                  * We could add an "else" clause here and
5081                                  * log that we're ignoring this rdataset.
5082                                  */
5083                         }
5084                         /*
5085                          * If wanted_chaining is true, we've done
5086                          * some chaining as the result of processing
5087                          * this node, and thus we need to set
5088                          * chaining to true.
5089                          *
5090                          * We don't set chaining inside of the
5091                          * rdataset loop because doing that would
5092                          * cause us to ignore the signatures of
5093                          * CNAMEs.
5094                          */
5095                         if (wanted_chaining)
5096                                 chaining = ISC_TRUE;
5097                 } else {
5098                         /*
5099                          * Look for a DNAME (or its SIG).  Anything else is
5100                          * ignored.
5101                          */
5102                         wanted_chaining = ISC_FALSE;
5103                         for (rdataset = ISC_LIST_HEAD(name->list);
5104                              rdataset != NULL;
5105                              rdataset = ISC_LIST_NEXT(rdataset, link)) {
5106                                 isc_boolean_t found_dname = ISC_FALSE;
5107                                 found = ISC_FALSE;
5108                                 aflag = 0;
5109                                 if (rdataset->type == dns_rdatatype_dname) {
5110                                         /*
5111                                          * We're looking for something else,
5112                                          * but we found a DNAME.
5113                                          *
5114                                          * If we're not chaining, then the
5115                                          * DNAME should not be external.
5116                                          */
5117                                         if (!chaining && external)
5118                                                 return (DNS_R_FORMERR);
5119                                         found = ISC_TRUE;
5120                                         want_chaining = ISC_TRUE;
5121                                         aflag = DNS_RDATASETATTR_ANSWER;
5122                                         result = dname_target(rdataset,
5123                                                               qname, name,
5124                                                               &dname);
5125                                         if (result == ISC_R_NOSPACE) {
5126                                                 /*
5127                                                  * We can't construct the
5128                                                  * DNAME target.  Do not
5129                                                  * try to continue.
5130                                                  */
5131                                                 want_chaining = ISC_FALSE;
5132                                         } else if (result != ISC_R_SUCCESS)
5133                                                 return (result);
5134                                         else
5135                                                 found_dname = ISC_TRUE;
5136                                 } else if (rdataset->type == dns_rdatatype_rrsig
5137                                            && rdataset->covers ==
5138                                            dns_rdatatype_dname) {
5139                                         /*
5140                                          * We've found a signature that
5141                                          * covers the DNAME.
5142                                          */
5143                                         found = ISC_TRUE;
5144                                         aflag = DNS_RDATASETATTR_ANSWERSIG;
5145                                 }
5146
5147                                 if (found) {
5148                                         /*
5149                                          * We've found an answer to our
5150                                          * question.
5151                                          */
5152                                         name->attributes |=
5153                                                 DNS_NAMEATTR_CACHE;
5154                                         rdataset->attributes |=
5155                                                 DNS_RDATASETATTR_CACHE;
5156                                         rdataset->trust = dns_trust_answer;
5157                                         if (!chaining) {
5158                                                 /*
5159                                                  * This data is "the" answer
5160                                                  * to our question only if
5161                                                  * we're not chaining.
5162                                                  */
5163                                                 INSIST(!external);
5164                                                 if (aflag ==
5165                                                     DNS_RDATASETATTR_ANSWER)
5166                                                         have_answer = ISC_TRUE;
5167                                                 name->attributes |=
5168                                                         DNS_NAMEATTR_ANSWER;
5169                                                 rdataset->attributes |= aflag;
5170                                                 if (aa)
5171                                                         rdataset->trust =
5172                                                           dns_trust_authanswer;
5173                                         } else if (external) {
5174                                                 rdataset->attributes |=
5175                                                     DNS_RDATASETATTR_EXTERNAL;
5176                                         }
5177
5178                                         /*
5179                                          * DNAME chaining.
5180                                          */
5181                                         if (found_dname) {
5182                                                 /*
5183                                                  * Copy the the dname into the
5184                                                  * qname fixed name.
5185                                                  *
5186                                                  * Although we check for
5187                                                  * failure of the copy
5188                                                  * operation, in practice it
5189                                                  * should never fail since
5190                                                  * we already know that the
5191                                                  * result fits in a fixedname.
5192                                                  */
5193                                                 dns_fixedname_init(&fqname);
5194                                                 result = dns_name_copy(
5195                                                   dns_fixedname_name(&dname),
5196                                                   dns_fixedname_name(&fqname),
5197                                                   NULL);
5198                                                 if (result != ISC_R_SUCCESS)
5199                                                         return (result);
5200                                                 wanted_chaining = ISC_TRUE;
5201                                                 name->attributes |=
5202                                                         DNS_NAMEATTR_CHAINING;
5203                                                 rdataset->attributes |=
5204                                                     DNS_RDATASETATTR_CHAINING;
5205                                                 qname = dns_fixedname_name(
5206                                                                    &fqname);
5207                                         }
5208                                 }
5209                         }
5210                         if (wanted_chaining)
5211                                 chaining = ISC_TRUE;
5212                 }
5213                 result = dns_message_nextname(message, DNS_SECTION_ANSWER);
5214         }
5215         if (result == ISC_R_NOMORE)
5216                 result = ISC_R_SUCCESS;
5217         if (result != ISC_R_SUCCESS)
5218                 return (result);
5219
5220         /*
5221          * We should have found an answer.
5222          */
5223         if (!have_answer)
5224                 return (DNS_R_FORMERR);
5225
5226         /*
5227          * This response is now potentially cacheable.
5228          */
5229         fctx->attributes |= FCTX_ATTR_WANTCACHE;
5230
5231         /*
5232          * Did chaining end before we got the final answer?
5233          */
5234         if (chaining) {
5235                 /*
5236                  * Yes.  This may be a negative reply, so hand off
5237                  * authority section processing to the noanswer code.
5238                  * If it isn't a noanswer response, no harm will be
5239                  * done.
5240                  */
5241                 return (noanswer_response(fctx, qname, ISC_FALSE));
5242         }
5243
5244         /*
5245          * We didn't end with an incomplete chain, so the rcode should be
5246          * "no error".
5247          */
5248         if (message->rcode != dns_rcode_noerror)
5249                 return (DNS_R_FORMERR);
5250
5251         /*
5252          * Examine the authority section (if there is one).
5253          *
5254          * We expect there to be only one owner name for all the rdatasets
5255          * in this section, and we expect that it is not external.
5256          */
5257         done = ISC_FALSE;
5258         result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
5259         while (!done && result == ISC_R_SUCCESS) {
5260                 name = NULL;
5261                 dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
5262                 external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
5263                 if (!external) {
5264                         /*
5265                          * We expect to find NS or SIG NS rdatasets, and
5266                          * nothing else.
5267                          */
5268                         for (rdataset = ISC_LIST_HEAD(name->list);
5269                              rdataset != NULL;
5270                              rdataset = ISC_LIST_NEXT(rdataset, link)) {
5271                                 if (rdataset->type == dns_rdatatype_ns ||
5272                                     (rdataset->type == dns_rdatatype_rrsig &&
5273                                      rdataset->covers == dns_rdatatype_ns)) {
5274                                         name->attributes |=
5275                                                 DNS_NAMEATTR_CACHE;
5276                                         rdataset->attributes |=
5277                                                 DNS_RDATASETATTR_CACHE;
5278                                         if (aa && !chaining)
5279                                                 rdataset->trust =
5280                                                     dns_trust_authauthority;
5281                                         else
5282                                                 rdataset->trust =
5283                                                     dns_trust_additional;
5284
5285                                         /*
5286                                          * Mark any additional data related
5287                                          * to this rdataset.
5288                                          */
5289                                         (void)dns_rdataset_additionaldata(
5290                                                         rdataset,
5291                                                         check_related,
5292                                                         fctx);
5293                                         done = ISC_TRUE;
5294                                 }
5295                         }
5296                 }
5297                 result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
5298         }
5299         if (result == ISC_R_NOMORE)
5300                 result = ISC_R_SUCCESS;
5301
5302         return (result);
5303 }
5304
5305 static void
5306 resume_dslookup(isc_task_t *task, isc_event_t *event) {
5307         dns_fetchevent_t *fevent;
5308         dns_resolver_t *res;
5309         fetchctx_t *fctx;
5310         isc_result_t result;
5311         isc_boolean_t bucket_empty = ISC_FALSE;
5312         isc_boolean_t locked = ISC_FALSE;
5313         unsigned int bucketnum;
5314         dns_rdataset_t nameservers;
5315         dns_fixedname_t fixed;
5316         dns_name_t *domain;
5317
5318         REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
5319         fevent = (dns_fetchevent_t *)event;
5320         fctx = event->ev_arg;
5321         REQUIRE(VALID_FCTX(fctx));
5322         res = fctx->res;
5323
5324         UNUSED(task);
5325         FCTXTRACE("resume_dslookup");
5326
5327         if (fevent->node != NULL)
5328                 dns_db_detachnode(fevent->db, &fevent->node);
5329         if (fevent->db != NULL)
5330                 dns_db_detach(&fevent->db);
5331
5332         dns_rdataset_init(&nameservers);
5333
5334         bucketnum = fctx->bucketnum;
5335         if (fevent->result == ISC_R_CANCELED) {
5336                 dns_resolver_destroyfetch(&fctx->nsfetch);
5337                 fctx_done(fctx, ISC_R_CANCELED);
5338         } else if (fevent->result == ISC_R_SUCCESS) {
5339
5340                 FCTXTRACE("resuming DS lookup");
5341
5342                 dns_resolver_destroyfetch(&fctx->nsfetch);
5343                 if (dns_rdataset_isassociated(&fctx->nameservers))
5344                         dns_rdataset_disassociate(&fctx->nameservers);
5345                 dns_rdataset_clone(fevent->rdataset, &fctx->nameservers);
5346                 dns_name_free(&fctx->domain,
5347                               fctx->res->buckets[bucketnum].mctx);
5348                 dns_name_init(&fctx->domain, NULL);
5349                 result = dns_name_dup(&fctx->nsname,
5350                                       fctx->res->buckets[bucketnum].mctx,
5351                                       &fctx->domain);
5352                 if (result != ISC_R_SUCCESS) {
5353                         fctx_done(fctx, DNS_R_SERVFAIL);
5354                         goto cleanup;
5355                 }
5356                 /*
5357                  * Try again.
5358                  */
5359                 fctx_try(fctx);
5360         } else {
5361                 unsigned int n;
5362                 dns_rdataset_t *nsrdataset = NULL;
5363
5364                 /*
5365                  * Retrieve state from fctx->nsfetch before we destroy it.
5366                  */
5367                 dns_fixedname_init(&fixed);
5368                 domain = dns_fixedname_name(&fixed);
5369                 dns_name_copy(&fctx->nsfetch->private->domain, domain, NULL);
5370                 if (dns_name_equal(&fctx->nsname, domain)) {
5371                         fctx_done(fctx, DNS_R_SERVFAIL);
5372                         dns_resolver_destroyfetch(&fctx->nsfetch);
5373                         goto cleanup;
5374                 }
5375                 if (dns_rdataset_isassociated(
5376                     &fctx->nsfetch->private->nameservers)) {
5377                         dns_rdataset_clone(
5378                             &fctx->nsfetch->private->nameservers,
5379                             &nameservers);
5380                         nsrdataset = &nameservers;
5381                 } else
5382                         domain = NULL;
5383                 dns_resolver_destroyfetch(&fctx->nsfetch);
5384                 n = dns_name_countlabels(&fctx->nsname);
5385                 dns_name_getlabelsequence(&fctx->nsname, 1, n - 1,
5386                                           &fctx->nsname);
5387
5388                 if (dns_rdataset_isassociated(fevent->rdataset))
5389                         dns_rdataset_disassociate(fevent->rdataset);
5390                 FCTXTRACE("continuing to look for parent's NS records");
5391                 result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
5392                                                   dns_rdatatype_ns, domain,
5393                                                   nsrdataset, NULL, 0, task,
5394                                                   resume_dslookup, fctx,
5395                                                   &fctx->nsrrset, NULL,
5396                                                   &fctx->nsfetch);
5397                 if (result != ISC_R_SUCCESS)
5398                         fctx_done(fctx, result);
5399                 else {
5400                         LOCK(&res->buckets[bucketnum].lock);
5401                         locked = ISC_TRUE;
5402                         fctx->references++;
5403                 }
5404         }
5405
5406  cleanup:
5407         if (dns_rdataset_isassociated(&nameservers))
5408                 dns_rdataset_disassociate(&nameservers);
5409         if (dns_rdataset_isassociated(fevent->rdataset))
5410                 dns_rdataset_disassociate(fevent->rdataset);
5411         INSIST(fevent->sigrdataset == NULL);
5412         isc_event_free(&event);
5413         if (!locked)
5414                 LOCK(&res->buckets[bucketnum].lock);
5415         fctx->references--;
5416         if (fctx->references == 0)
5417                 bucket_empty = fctx_destroy(fctx);
5418         UNLOCK(&res->buckets[bucketnum].lock);
5419         if (bucket_empty)
5420                 empty_bucket(res);
5421 }
5422
5423 static inline void
5424 checknamessection(dns_message_t *message, dns_section_t section) {
5425         isc_result_t result;
5426         dns_name_t *name;
5427         dns_rdata_t rdata = DNS_RDATA_INIT;
5428         dns_rdataset_t *rdataset;
5429
5430         for (result = dns_message_firstname(message, section);
5431              result == ISC_R_SUCCESS;
5432              result = dns_message_nextname(message, section))
5433         {
5434                 name = NULL;
5435                 dns_message_currentname(message, section, &name);
5436                 for (rdataset = ISC_LIST_HEAD(name->list);
5437                      rdataset != NULL;
5438                      rdataset = ISC_LIST_NEXT(rdataset, link)) {
5439                         for (result = dns_rdataset_first(rdataset);
5440                              result == ISC_R_SUCCESS;
5441                              result = dns_rdataset_next(rdataset)) {
5442                                 dns_rdataset_current(rdataset, &rdata);
5443                                 if (!dns_rdata_checkowner(name, rdata.rdclass,
5444                                                           rdata.type,
5445                                                           ISC_FALSE) ||
5446                                     !dns_rdata_checknames(&rdata, name, NULL))
5447                                 {
5448                                         rdataset->attributes |=
5449                                                 DNS_RDATASETATTR_CHECKNAMES;
5450                                 }
5451                                 dns_rdata_reset(&rdata);
5452                         }
5453                 }
5454         }
5455 }
5456
5457 static void
5458 checknames(dns_message_t *message) {
5459
5460         checknamessection(message, DNS_SECTION_ANSWER);
5461         checknamessection(message, DNS_SECTION_AUTHORITY);
5462         checknamessection(message, DNS_SECTION_ADDITIONAL);
5463 }
5464
5465 static void
5466 log_packet(dns_message_t *message, int level, isc_mem_t *mctx) {
5467         isc_buffer_t buffer;
5468         char *buf = NULL;
5469         int len = 1024;
5470         isc_result_t result;
5471
5472         if (! isc_log_wouldlog(dns_lctx, level))
5473                 return;
5474
5475         /*
5476          * Note that these are multiline debug messages.  We want a newline
5477          * to appear in the log after each message.
5478          */
5479
5480         do {
5481                 buf = isc_mem_get(mctx, len);
5482                 if (buf == NULL)
5483                         break;
5484                 isc_buffer_init(&buffer, buf, len);
5485                 result = dns_message_totext(message, &dns_master_style_debug,
5486                                             0, &buffer);
5487                 if (result == ISC_R_NOSPACE) {
5488                         isc_mem_put(mctx, buf, len);
5489                         len += 1024;
5490                 } else if (result == ISC_R_SUCCESS)
5491                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5492                                       DNS_LOGMODULE_RESOLVER, level,
5493                                       "received packet:\n%.*s",
5494                                       (int)isc_buffer_usedlength(&buffer),
5495                                       buf);
5496         } while (result == ISC_R_NOSPACE);
5497
5498         if (buf != NULL)
5499                 isc_mem_put(mctx, buf, len);
5500 }
5501
5502 static void
5503 resquery_response(isc_task_t *task, isc_event_t *event) {
5504         isc_result_t result = ISC_R_SUCCESS;
5505         resquery_t *query = event->ev_arg;
5506         dns_dispatchevent_t *devent = (dns_dispatchevent_t *)event;
5507         isc_boolean_t keep_trying, get_nameservers, resend;
5508         isc_boolean_t truncated;
5509         dns_message_t *message;
5510         fetchctx_t *fctx;
5511         dns_name_t *fname;
5512         dns_fixedname_t foundname;
5513         isc_stdtime_t now;
5514         isc_time_t tnow, *finish;
5515         dns_adbaddrinfo_t *addrinfo;
5516         unsigned int options;
5517         unsigned int findoptions;
5518         isc_result_t broken_server;
5519
5520         REQUIRE(VALID_QUERY(query));
5521         fctx = query->fctx;
5522         options = query->options;
5523         REQUIRE(VALID_FCTX(fctx));
5524         REQUIRE(event->ev_type == DNS_EVENT_DISPATCH);
5525
5526         QTRACE("response");
5527
5528         (void)isc_timer_touch(fctx->timer);
5529
5530         keep_trying = ISC_FALSE;
5531         broken_server = ISC_R_SUCCESS;
5532         get_nameservers = ISC_FALSE;
5533         resend = ISC_FALSE;
5534         truncated = ISC_FALSE;
5535         finish = NULL;
5536
5537         if (fctx->res->exiting) {
5538                 result = ISC_R_SHUTTINGDOWN;
5539                 goto done;
5540         }
5541
5542         fctx->timeouts = 0;
5543
5544         /*
5545          * XXXRTH  We should really get the current time just once.  We
5546          *         need a routine to convert from an isc_time_t to an
5547          *         isc_stdtime_t.
5548          */
5549         TIME_NOW(&tnow);
5550         finish = &tnow;
5551         isc_stdtime_get(&now);
5552
5553         /*
5554          * Did the dispatcher have a problem?
5555          */
5556         if (devent->result != ISC_R_SUCCESS) {
5557                 if (devent->result == ISC_R_EOF &&
5558                     (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
5559                         /*
5560                          * The problem might be that they
5561                          * don't understand EDNS0.  Turn it
5562                          * off and try again.
5563                          */
5564                         options |= DNS_FETCHOPT_NOEDNS0;
5565                         resend = ISC_TRUE;
5566                         /*
5567                          * Remember that they don't like EDNS0.
5568                          */
5569                         dns_adb_changeflags(fctx->adb,
5570                                             query->addrinfo,
5571                                             DNS_FETCHOPT_NOEDNS0,
5572                                             DNS_FETCHOPT_NOEDNS0);
5573                 } else {
5574                         /*
5575                          * There's no hope for this query.
5576                          */
5577                         keep_trying = ISC_TRUE;
5578
5579                         /*
5580                          * If this is a network error on an exclusive query
5581                          * socket, mark the server as bad so that we won't try
5582                          * it for this fetch again.
5583                          */
5584                         if (query->exclusivesocket &&
5585                             (devent->result == ISC_R_HOSTUNREACH ||
5586                              devent->result == ISC_R_NETUNREACH ||
5587                              devent->result == ISC_R_CONNREFUSED ||
5588                              devent->result == ISC_R_CANCELED)) {
5589                                     broken_server = devent->result;
5590                         }
5591                 }
5592                 goto done;
5593         }
5594
5595         message = fctx->rmessage;
5596
5597         if (query->tsig != NULL) {
5598                 result = dns_message_setquerytsig(message, query->tsig);
5599                 if (result != ISC_R_SUCCESS)
5600                         goto done;
5601         }
5602
5603         if (query->tsigkey) {
5604                 result = dns_message_settsigkey(message, query->tsigkey);
5605                 if (result != ISC_R_SUCCESS)
5606                         goto done;
5607         }
5608
5609         result = dns_message_parse(message, &devent->buffer, 0);
5610         if (result != ISC_R_SUCCESS) {
5611                 switch (result) {
5612                 case ISC_R_UNEXPECTEDEND:
5613                         if (!message->question_ok ||
5614                             (message->flags & DNS_MESSAGEFLAG_TC) == 0 ||
5615                             (options & DNS_FETCHOPT_TCP) != 0) {
5616                                 /*
5617                                  * Either the message ended prematurely,
5618                                  * and/or wasn't marked as being truncated,
5619                                  * and/or this is a response to a query we
5620                                  * sent over TCP.  In all of these cases,
5621                                  * something is wrong with the remote
5622                                  * server and we don't want to retry using
5623                                  * TCP.
5624                                  */
5625                                 if ((query->options & DNS_FETCHOPT_NOEDNS0)
5626                                     == 0) {
5627                                         /*
5628                                          * The problem might be that they
5629                                          * don't understand EDNS0.  Turn it
5630                                          * off and try again.
5631                                          */
5632                                         options |= DNS_FETCHOPT_NOEDNS0;
5633                                         resend = ISC_TRUE;
5634                                         /*
5635                                          * Remember that they don't like EDNS0.
5636                                          */
5637                                         dns_adb_changeflags(
5638                                                         fctx->adb,
5639                                                         query->addrinfo,
5640                                                         DNS_FETCHOPT_NOEDNS0,
5641                                                         DNS_FETCHOPT_NOEDNS0);
5642                                 } else {
5643                                         broken_server = result;
5644                                         keep_trying = ISC_TRUE;
5645                                 }
5646                                 goto done;
5647                         }
5648                         /*
5649                          * We defer retrying via TCP for a bit so we can
5650                          * check out this message further.
5651                          */
5652                         truncated = ISC_TRUE;
5653                         break;
5654                 case DNS_R_FORMERR:
5655                         if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
5656                                 /*
5657                                  * The problem might be that they
5658                                  * don't understand EDNS0.  Turn it
5659                                  * off and try again.
5660                                  */
5661                                 options |= DNS_FETCHOPT_NOEDNS0;
5662                                 resend = ISC_TRUE;
5663                                 /*
5664                                  * Remember that they don't like EDNS0.
5665                                  */
5666                                 dns_adb_changeflags(fctx->adb,
5667                                                     query->addrinfo,
5668                                                     DNS_FETCHOPT_NOEDNS0,
5669                                                     DNS_FETCHOPT_NOEDNS0);
5670                         } else {
5671                                 broken_server = DNS_R_UNEXPECTEDRCODE;
5672                                 keep_trying = ISC_TRUE;
5673                         }
5674                         goto done;
5675                 default:
5676                         /*
5677                          * Something bad has happened.
5678                          */
5679                         goto done;
5680                 }
5681         }
5682
5683         /*
5684          * Log the incoming packet.
5685          */
5686         log_packet(message, ISC_LOG_DEBUG(10), fctx->res->mctx);
5687
5688         /*
5689          * If the message is signed, check the signature.  If not, this
5690          * returns success anyway.
5691          */
5692         result = dns_message_checksig(message, fctx->res->view);
5693         if (result != ISC_R_SUCCESS)
5694                 goto done;
5695
5696         /*
5697          * The dispatcher should ensure we only get responses with QR set.
5698          */
5699         INSIST((message->flags & DNS_MESSAGEFLAG_QR) != 0);
5700         /*
5701          * INSIST() that the message comes from the place we sent it to,
5702          * since the dispatch code should ensure this.
5703          *
5704          * INSIST() that the message id is correct (this should also be
5705          * ensured by the dispatch code).
5706          */
5707
5708
5709         /*
5710          * Deal with truncated responses by retrying using TCP.
5711          */
5712         if ((message->flags & DNS_MESSAGEFLAG_TC) != 0)
5713                 truncated = ISC_TRUE;
5714
5715         if (truncated) {
5716                 if ((options & DNS_FETCHOPT_TCP) != 0) {
5717                         broken_server = DNS_R_TRUNCATEDTCP;
5718                         keep_trying = ISC_TRUE;
5719                 } else {
5720                         options |= DNS_FETCHOPT_TCP;
5721                         resend = ISC_TRUE;
5722                 }
5723                 goto done;
5724         }
5725
5726         /*
5727          * Is it a query response?
5728          */
5729         if (message->opcode != dns_opcode_query) {
5730                 /* XXXRTH Log */
5731                 broken_server = DNS_R_UNEXPECTEDOPCODE;
5732                 keep_trying = ISC_TRUE;
5733                 goto done;
5734         }
5735
5736         /*
5737          * Is the remote server broken, or does it dislike us?
5738          */
5739         if (message->rcode != dns_rcode_noerror &&
5740             message->rcode != dns_rcode_nxdomain) {
5741                 if (((message->rcode == dns_rcode_formerr ||
5742                      message->rcode == dns_rcode_notimp) ||
5743                     (message->rcode == dns_rcode_servfail &&
5744                      dns_message_getopt(message) == NULL)) &&
5745                     (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
5746                         /*
5747                          * It's very likely they don't like EDNS0.
5748                          * If the response code is SERVFAIL, also check if the
5749                          * response contains an OPT RR and don't cache the
5750                          * failure since it can be returned for various other
5751                          * reasons.
5752                          *
5753                          * XXXRTH  We should check if the question
5754                          *         we're asking requires EDNS0, and
5755                          *         if so, we should bail out.
5756                          */
5757                         options |= DNS_FETCHOPT_NOEDNS0;
5758                         resend = ISC_TRUE;
5759                         /*
5760                          * Remember that they don't like EDNS0.
5761                          */
5762                         if (message->rcode != dns_rcode_servfail)
5763                                 dns_adb_changeflags(fctx->adb, query->addrinfo,
5764                                                     DNS_FETCHOPT_NOEDNS0,
5765                                                     DNS_FETCHOPT_NOEDNS0);
5766                 } else if (message->rcode == dns_rcode_formerr) {
5767                         if (ISFORWARDER(query->addrinfo)) {
5768                                 /*
5769                                  * This forwarder doesn't understand us,
5770                                  * but other forwarders might.  Keep trying.
5771                                  */
5772                                 broken_server = DNS_R_REMOTEFORMERR;
5773                                 keep_trying = ISC_TRUE;
5774                         } else {
5775                                 /*
5776                                  * The server doesn't understand us.  Since
5777                                  * all servers for a zone need similar
5778                                  * capabilities, we assume that we will get
5779                                  * FORMERR from all servers, and thus we
5780                                  * cannot make any more progress with this
5781                                  * fetch.
5782                                  */
5783                                 result = DNS_R_FORMERR;
5784                         }
5785                 } else if (message->rcode == dns_rcode_yxdomain) {
5786                         /*
5787                          * DNAME mapping failed because the new name
5788                          * was too long.  There's no chance of success
5789                          * for this fetch.
5790                          */
5791                         result = DNS_R_YXDOMAIN;
5792                 } else if (message->rcode == dns_rcode_badvers) {
5793                         dns_rdataset_t *opt;
5794                         unsigned int flags, mask;
5795                         unsigned int version;
5796
5797                         resend = ISC_TRUE;
5798                         opt = dns_message_getopt(message);
5799                         version = (opt->ttl >> 16) & 0xff;
5800                         flags = (version << DNS_FETCHOPT_EDNSVERSIONSHIFT) |
5801                                 DNS_FETCHOPT_EDNSVERSIONSET;
5802                         mask = DNS_FETCHOPT_EDNSVERSIONMASK |
5803                                DNS_FETCHOPT_EDNSVERSIONSET;
5804                         switch (version) {
5805                         case 0:
5806                                 dns_adb_changeflags(fctx->adb, query->addrinfo,
5807                                                     flags, mask);
5808                                 break;
5809                         default:
5810                                 broken_server = DNS_R_BADVERS;
5811                                 keep_trying = ISC_TRUE;
5812                                 break;
5813                         }
5814                 } else {
5815                         /*
5816                          * XXXRTH log.
5817                          */
5818                         broken_server = DNS_R_UNEXPECTEDRCODE;
5819                         INSIST(broken_server != ISC_R_SUCCESS);
5820                         keep_trying = ISC_TRUE;
5821                 }
5822                 goto done;
5823         }
5824
5825         /*
5826          * Is the question the same as the one we asked?
5827          */
5828         result = same_question(fctx);
5829         if (result != ISC_R_SUCCESS) {
5830                 /* XXXRTH Log */
5831                 if (result == DNS_R_FORMERR)
5832                         keep_trying = ISC_TRUE;
5833                 goto done;
5834         }
5835
5836         /*
5837          * Is the server lame?
5838          */
5839         if (fctx->res->lame_ttl != 0 && !ISFORWARDER(query->addrinfo) &&
5840             is_lame(fctx)) {
5841                 log_lame(fctx, query->addrinfo);
5842                 result = dns_adb_marklame(fctx->adb, query->addrinfo,
5843                                           &fctx->name, fctx->type,
5844                                           now + fctx->res->lame_ttl);
5845                 if (result != ISC_R_SUCCESS)
5846                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5847                                       DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR,
5848                                       "could not mark server as lame: %s",
5849                                       isc_result_totext(result));
5850                 broken_server = DNS_R_LAME;
5851                 keep_trying = ISC_TRUE;
5852                 goto done;
5853         }
5854
5855         /*
5856          * Enforce delegations only zones like NET and COM.
5857          */
5858         if (!ISFORWARDER(query->addrinfo) &&
5859             dns_view_isdelegationonly(fctx->res->view, &fctx->domain) &&
5860             !dns_name_equal(&fctx->domain, &fctx->name) &&
5861             fix_mustbedelegationornxdomain(message, fctx)) {
5862                 char namebuf[DNS_NAME_FORMATSIZE];
5863                 char domainbuf[DNS_NAME_FORMATSIZE];
5864                 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
5865                 char classbuf[64];
5866                 char typebuf[64];
5867
5868                 dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
5869                 dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
5870                 dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
5871                 dns_rdataclass_format(fctx->res->rdclass, classbuf,
5872                                       sizeof(classbuf));
5873                 isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
5874                                     sizeof(addrbuf));
5875
5876                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DELEGATION_ONLY,
5877                              DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5878                              "enforced delegation-only for '%s' (%s/%s/%s) "
5879                              "from %s",
5880                              domainbuf, namebuf, typebuf, classbuf, addrbuf);
5881         }
5882
5883         if ((fctx->res->options & DNS_RESOLVER_CHECKNAMES) != 0)
5884                 checknames(message);
5885
5886         /*
5887          * Clear cache bits.
5888          */
5889         fctx->attributes &= ~(FCTX_ATTR_WANTNCACHE | FCTX_ATTR_WANTCACHE);
5890
5891         /*
5892          * Did we get any answers?
5893          */
5894         if (message->counts[DNS_SECTION_ANSWER] > 0 &&
5895             (message->rcode == dns_rcode_noerror ||
5896              message->rcode == dns_rcode_nxdomain)) {
5897                 /*
5898                  * We've got answers.  However, if we sent
5899                  * a BIND 8 server an NS query, it may have
5900                  * incorrectly responded with a non-authoritative
5901                  * answer instead of a referral.  Since this
5902                  * answer lacks the SIGs necessary to do DNSSEC
5903                  * validation, we must invoke the following special
5904                  * kludge to treat it as a referral.
5905                  */
5906                 if (fctx->type == dns_rdatatype_ns &&
5907                     (message->flags & DNS_MESSAGEFLAG_AA) == 0 &&
5908                     !ISFORWARDER(query->addrinfo))
5909                 {
5910                         result = noanswer_response(fctx, NULL, ISC_TRUE);
5911                         if (result != DNS_R_DELEGATION) {
5912                                 /*
5913                                  * The answer section must have contained
5914                                  * something other than the NS records
5915                                  * we asked for.  Since AA is not set
5916                                  * and the server is not a forwarder,
5917                                  * it is technically lame and it's easier
5918                                  * to treat it as such than to figure out
5919                                  * some more elaborate course of action.
5920                                  */
5921                                 broken_server = DNS_R_LAME;
5922                                 keep_trying = ISC_TRUE;
5923                                 goto done;
5924                         }
5925                         goto force_referral;
5926                 }
5927                 result = answer_response(fctx);
5928                 if (result != ISC_R_SUCCESS) {
5929                         if (result == DNS_R_FORMERR)
5930                                 keep_trying = ISC_TRUE;
5931                         goto done;
5932                 }
5933         } else if (message->counts[DNS_SECTION_AUTHORITY] > 0 ||
5934                    message->rcode == dns_rcode_noerror ||
5935                    message->rcode == dns_rcode_nxdomain) {
5936                 /*
5937                  * NXDOMAIN, NXRDATASET, or referral.
5938                  */
5939                 result = noanswer_response(fctx, NULL, ISC_FALSE);
5940                 if (result == DNS_R_CHASEDSSERVERS) {
5941                 } else if (result == DNS_R_DELEGATION) {
5942                 force_referral:
5943                         /*
5944                          * We don't have the answer, but we know a better
5945                          * place to look.
5946                          */
5947                         get_nameservers = ISC_TRUE;
5948                         keep_trying = ISC_TRUE;
5949                         /*
5950                          * We have a new set of name servers, and it
5951                          * has not experienced any restarts yet.
5952                          */
5953                         fctx->restarts = 0;
5954                         result = ISC_R_SUCCESS;
5955                 } else if (result != ISC_R_SUCCESS) {
5956                         /*
5957                          * Something has gone wrong.
5958                          */
5959                         if (result == DNS_R_FORMERR)
5960                                 keep_trying = ISC_TRUE;
5961                         goto done;
5962                 }
5963         } else {
5964                 /*
5965                  * The server is insane.
5966                  */
5967                 /* XXXRTH Log */
5968                 broken_server = DNS_R_UNEXPECTEDRCODE;
5969                 keep_trying = ISC_TRUE;
5970                 goto done;
5971         }
5972
5973         /*
5974          * Follow additional section data chains.
5975          */
5976         chase_additional(fctx);
5977
5978         /*
5979          * Cache the cacheable parts of the message.  This may also cause
5980          * work to be queued to the DNSSEC validator.
5981          */
5982         if (WANTCACHE(fctx)) {
5983                 result = cache_message(fctx, query->addrinfo, now);
5984                 if (result != ISC_R_SUCCESS)
5985                         goto done;
5986         }
5987
5988         /*
5989          * Ncache the negatively cacheable parts of the message.  This may
5990          * also cause work to be queued to the DNSSEC validator.
5991          */
5992         if (WANTNCACHE(fctx)) {
5993                 dns_rdatatype_t covers;
5994                 if (message->rcode == dns_rcode_nxdomain)
5995                         covers = dns_rdatatype_any;
5996                 else
5997                         covers = fctx->type;
5998
5999                 /*
6000                  * Cache any negative cache entries in the message.
6001                  */
6002                 result = ncache_message(fctx, query->addrinfo, covers, now);
6003         }
6004
6005  done:
6006         /*
6007          * Remember the query's addrinfo, in case we need to mark the
6008          * server as broken.
6009          */
6010         addrinfo = query->addrinfo;
6011
6012         /*
6013          * Cancel the query.
6014          *
6015          * XXXRTH  Don't cancel the query if waiting for validation?
6016          */
6017         fctx_cancelquery(&query, &devent, finish, ISC_FALSE);
6018
6019         if (keep_trying) {
6020                 if (result == DNS_R_FORMERR)
6021                         broken_server = DNS_R_FORMERR;
6022                 if (broken_server != ISC_R_SUCCESS) {
6023                         /*
6024                          * Add this server to the list of bad servers for
6025                          * this fctx.
6026                          */
6027                         add_bad(fctx, addrinfo, broken_server);
6028                 }
6029
6030                 if (get_nameservers) {
6031                         dns_name_t *name;
6032                         dns_fixedname_init(&foundname);
6033                         fname = dns_fixedname_name(&foundname);
6034                         if (result != ISC_R_SUCCESS) {
6035                                 fctx_done(fctx, DNS_R_SERVFAIL);
6036                                 return;
6037                         }
6038                         findoptions = 0;
6039                         if (dns_rdatatype_atparent(fctx->type))
6040                                 findoptions |= DNS_DBFIND_NOEXACT;
6041                         if ((options & DNS_FETCHOPT_UNSHARED) == 0)
6042                                 name = &fctx->name;
6043                         else
6044                                 name = &fctx->domain;
6045                         result = dns_view_findzonecut(fctx->res->view,
6046                                                       name, fname,
6047                                                       now, findoptions,
6048                                                       ISC_TRUE,
6049                                                       &fctx->nameservers,
6050                                                       NULL);
6051                         if (result != ISC_R_SUCCESS) {
6052                                 FCTXTRACE("couldn't find a zonecut");
6053                                 fctx_done(fctx, DNS_R_SERVFAIL);
6054                                 return;
6055                         }
6056                         if (!dns_name_issubdomain(fname, &fctx->domain)) {
6057                                 /*
6058                                  * The best nameservers are now above our
6059                                  * QDOMAIN.
6060                                  */
6061                                 FCTXTRACE("nameservers now above QDOMAIN");
6062                                 fctx_done(fctx, DNS_R_SERVFAIL);
6063                                 return;
6064                         }
6065                         dns_name_free(&fctx->domain,
6066                                       fctx->res->buckets[fctx->bucketnum].mctx);
6067                         dns_name_init(&fctx->domain, NULL);
6068                         result = dns_name_dup(fname,
6069                                               fctx->res->buckets[fctx->bucketnum].mctx,
6070                                               &fctx->domain);
6071                         if (result != ISC_R_SUCCESS) {
6072                                 fctx_done(fctx, DNS_R_SERVFAIL);
6073                                 return;
6074                         }
6075                         fctx_cancelqueries(fctx, ISC_TRUE);
6076                         fctx_cleanupfinds(fctx);
6077                         fctx_cleanupaltfinds(fctx);
6078                         fctx_cleanupforwaddrs(fctx);
6079                         fctx_cleanupaltaddrs(fctx);
6080                 }
6081                 /*
6082                  * Try again.
6083                  */
6084                 fctx_try(fctx);
6085         } else if (resend) {
6086                 /*
6087                  * Resend (probably with changed options).
6088                  */
6089                 FCTXTRACE("resend");
6090                 result = fctx_query(fctx, addrinfo, options);
6091                 if (result != ISC_R_SUCCESS)
6092                         fctx_done(fctx, result);
6093         } else if (result == ISC_R_SUCCESS && !HAVE_ANSWER(fctx)) {
6094                 /*
6095                  * All has gone well so far, but we are waiting for the
6096                  * DNSSEC validator to validate the answer.
6097                  */
6098                 FCTXTRACE("wait for validator");
6099                 fctx_cancelqueries(fctx, ISC_TRUE);
6100                 /*
6101                  * We must not retransmit while the validator is working;
6102                  * it has references to the current rmessage.
6103                  */
6104                 result = fctx_stopidletimer(fctx);
6105                 if (result != ISC_R_SUCCESS)
6106                         fctx_done(fctx, result);
6107         } else if (result == DNS_R_CHASEDSSERVERS) {
6108                 unsigned int n;
6109                 add_bad(fctx, addrinfo, result);
6110                 fctx_cancelqueries(fctx, ISC_TRUE);
6111                 fctx_cleanupfinds(fctx);
6112                 fctx_cleanupforwaddrs(fctx);
6113
6114                 n = dns_name_countlabels(&fctx->name);
6115                 dns_name_getlabelsequence(&fctx->name, 1, n - 1, &fctx->nsname);
6116
6117                 FCTXTRACE("suspending DS lookup to find parent's NS records");
6118
6119                 result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
6120                                                   dns_rdatatype_ns,
6121                                                   NULL, NULL, NULL, 0, task,
6122                                                   resume_dslookup, fctx,
6123                                                   &fctx->nsrrset, NULL,
6124                                                   &fctx->nsfetch);
6125                 if (result != ISC_R_SUCCESS)
6126                         fctx_done(fctx, result);
6127                 LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
6128                 fctx->references++;
6129                 UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
6130                 result = fctx_stopidletimer(fctx);
6131                 if (result != ISC_R_SUCCESS)
6132                         fctx_done(fctx, result);
6133         } else {
6134                 /*
6135                  * We're done.
6136                  */
6137                 fctx_done(fctx, result);
6138         }
6139 }
6140
6141
6142 /***
6143  *** Resolver Methods
6144  ***/
6145
6146 static void
6147 destroy(dns_resolver_t *res) {
6148         unsigned int i;
6149         alternate_t *a;
6150
6151         REQUIRE(res->references == 0);
6152         REQUIRE(!res->priming);
6153         REQUIRE(res->primefetch == NULL);
6154
6155         RTRACE("destroy");
6156
6157         INSIST(res->nfctx == 0);
6158
6159         DESTROYLOCK(&res->primelock);
6160         DESTROYLOCK(&res->nlock);
6161         DESTROYLOCK(&res->lock);
6162         for (i = 0; i < res->nbuckets; i++) {
6163                 INSIST(ISC_LIST_EMPTY(res->buckets[i].fctxs));
6164                 isc_task_shutdown(res->buckets[i].task);
6165                 isc_task_detach(&res->buckets[i].task);
6166                 DESTROYLOCK(&res->buckets[i].lock);
6167                 isc_mem_detach(&res->buckets[i].mctx);
6168         }
6169         isc_mem_put(res->mctx, res->buckets,
6170                     res->nbuckets * sizeof(fctxbucket_t));
6171         if (res->dispatchv4 != NULL)
6172                 dns_dispatch_detach(&res->dispatchv4);
6173         if (res->dispatchv6 != NULL)
6174                 dns_dispatch_detach(&res->dispatchv6);
6175         while ((a = ISC_LIST_HEAD(res->alternates)) != NULL) {
6176                 ISC_LIST_UNLINK(res->alternates, a, link);
6177                 if (!a->isaddress)
6178                         dns_name_free(&a->_u._n.name, res->mctx);
6179                 isc_mem_put(res->mctx, a, sizeof(*a));
6180         }
6181         dns_resolver_reset_algorithms(res);
6182         dns_resolver_resetmustbesecure(res);
6183 #if USE_ALGLOCK
6184         isc_rwlock_destroy(&res->alglock);
6185 #endif
6186 #if USE_MBSLOCK
6187         isc_rwlock_destroy(&res->mbslock);
6188 #endif
6189         isc_timer_detach(&res->spillattimer);
6190         res->magic = 0;
6191         isc_mem_put(res->mctx, res, sizeof(*res));
6192 }
6193
6194 static void
6195 send_shutdown_events(dns_resolver_t *res) {
6196         isc_event_t *event, *next_event;
6197         isc_task_t *etask;
6198
6199         /*
6200          * Caller must be holding the resolver lock.
6201          */
6202
6203         for (event = ISC_LIST_HEAD(res->whenshutdown);
6204              event != NULL;
6205              event = next_event) {
6206                 next_event = ISC_LIST_NEXT(event, ev_link);
6207                 ISC_LIST_UNLINK(res->whenshutdown, event, ev_link);
6208                 etask = event->ev_sender;
6209                 event->ev_sender = res;
6210                 isc_task_sendanddetach(&etask, &event);
6211         }
6212 }
6213
6214 static void
6215 empty_bucket(dns_resolver_t *res) {
6216         RTRACE("empty_bucket");
6217
6218         LOCK(&res->lock);
6219
6220         INSIST(res->activebuckets > 0);
6221         res->activebuckets--;
6222         if (res->activebuckets == 0)
6223                 send_shutdown_events(res);
6224
6225         UNLOCK(&res->lock);
6226 }
6227
6228 static void
6229 spillattimer_countdown(isc_task_t *task, isc_event_t *event) {
6230         dns_resolver_t *res = event->ev_arg;
6231         isc_result_t result;
6232         unsigned int count;
6233         isc_boolean_t logit = ISC_FALSE;
6234
6235         REQUIRE(VALID_RESOLVER(res));
6236
6237         UNUSED(task);
6238
6239         LOCK(&res->lock);
6240         INSIST(!res->exiting);
6241         if (res->spillat > res->spillatmin) {
6242                 res->spillat--;
6243                 logit = ISC_TRUE;
6244         }
6245         if (res->spillat <= res->spillatmin) {
6246                 result = isc_timer_reset(res->spillattimer,
6247                                          isc_timertype_inactive, NULL,
6248                                          NULL, ISC_TRUE);
6249                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6250         }
6251         count = res->spillat;
6252         UNLOCK(&res->lock);
6253         if (logit)
6254                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6255                               DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
6256                               "clients-per-query decreased to %u", count);
6257
6258         isc_event_free(&event);
6259 }
6260
6261 isc_result_t
6262 dns_resolver_create(dns_view_t *view,
6263                     isc_taskmgr_t *taskmgr, unsigned int ntasks,
6264                     isc_socketmgr_t *socketmgr,
6265                     isc_timermgr_t *timermgr,
6266                     unsigned int options,
6267                     dns_dispatchmgr_t *dispatchmgr,
6268                     dns_dispatch_t *dispatchv4,
6269                     dns_dispatch_t *dispatchv6,
6270                     dns_resolver_t **resp)
6271 {
6272         dns_resolver_t *res;
6273         isc_result_t result = ISC_R_SUCCESS;
6274         unsigned int i, buckets_created = 0;
6275         isc_task_t *task = NULL;
6276         char name[16];
6277         unsigned dispattr;
6278
6279         /*
6280          * Create a resolver.
6281          */
6282
6283         REQUIRE(DNS_VIEW_VALID(view));
6284         REQUIRE(ntasks > 0);
6285         REQUIRE(resp != NULL && *resp == NULL);
6286         REQUIRE(dispatchmgr != NULL);
6287         REQUIRE(dispatchv4 != NULL || dispatchv6 != NULL);
6288
6289         res = isc_mem_get(view->mctx, sizeof(*res));
6290         if (res == NULL)
6291                 return (ISC_R_NOMEMORY);
6292         RTRACE("create");
6293         res->mctx = view->mctx;
6294         res->rdclass = view->rdclass;
6295         res->socketmgr = socketmgr;
6296         res->timermgr = timermgr;
6297         res->taskmgr = taskmgr;
6298         res->dispatchmgr = dispatchmgr;
6299         res->view = view;
6300         res->options = options;
6301         res->lame_ttl = 0;
6302         ISC_LIST_INIT(res->alternates);
6303         res->udpsize = RECV_BUFFER_SIZE;
6304         res->algorithms = NULL;
6305         res->mustbesecure = NULL;
6306         res->spillatmin = res->spillat = 10;
6307         res->spillatmax = 100;
6308         res->spillattimer = NULL;
6309         res->zero_no_soa_ttl = ISC_FALSE;
6310
6311         res->nbuckets = ntasks;
6312         res->activebuckets = ntasks;
6313         res->buckets = isc_mem_get(view->mctx,
6314                                    ntasks * sizeof(fctxbucket_t));
6315         if (res->buckets == NULL) {
6316                 result = ISC_R_NOMEMORY;
6317                 goto cleanup_res;
6318         }
6319         for (i = 0; i < ntasks; i++) {
6320                 result = isc_mutex_init(&res->buckets[i].lock);
6321                 if (result != ISC_R_SUCCESS)
6322                         goto cleanup_buckets;
6323                 res->buckets[i].task = NULL;
6324                 result = isc_task_create(taskmgr, 0, &res->buckets[i].task);
6325                 if (result != ISC_R_SUCCESS) {
6326                         DESTROYLOCK(&res->buckets[i].lock);
6327                         goto cleanup_buckets;
6328                 }
6329                 res->buckets[i].mctx = NULL;
6330                 result = isc_mem_create(0, 0, &res->buckets[i].mctx);
6331                 if (result != ISC_R_SUCCESS) {
6332                         isc_task_detach(&res->buckets[i].task);
6333                         DESTROYLOCK(&res->buckets[i].lock);
6334                         goto cleanup_buckets;
6335                 }
6336                 snprintf(name, sizeof(name), "res%u", i);
6337                 isc_task_setname(res->buckets[i].task, name, res);
6338                 ISC_LIST_INIT(res->buckets[i].fctxs);
6339                 res->buckets[i].exiting = ISC_FALSE;
6340                 buckets_created++;
6341         }
6342
6343         res->dispatchv4 = NULL;
6344         if (dispatchv4 != NULL) {
6345                 dns_dispatch_attach(dispatchv4, &res->dispatchv4);
6346                 dispattr = dns_dispatch_getattributes(dispatchv4);
6347                 res->exclusivev4 =
6348                         ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
6349         }
6350
6351         res->dispatchv6 = NULL;
6352         if (dispatchv6 != NULL) {
6353                 dns_dispatch_attach(dispatchv6, &res->dispatchv6);
6354                 dispattr = dns_dispatch_getattributes(dispatchv6);
6355                 res->exclusivev6 =
6356                         ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
6357         }
6358
6359         res->references = 1;
6360         res->exiting = ISC_FALSE;
6361         res->frozen = ISC_FALSE;
6362         ISC_LIST_INIT(res->whenshutdown);
6363         res->priming = ISC_FALSE;
6364         res->primefetch = NULL;
6365         res->nfctx = 0;
6366
6367         result = isc_mutex_init(&res->lock);
6368         if (result != ISC_R_SUCCESS)
6369                 goto cleanup_dispatches;
6370
6371         result = isc_mutex_init(&res->nlock);
6372         if (result != ISC_R_SUCCESS)
6373                 goto cleanup_lock;
6374
6375         result = isc_mutex_init(&res->primelock);
6376         if (result != ISC_R_SUCCESS)
6377                 goto cleanup_nlock;
6378
6379         task = NULL;
6380         result = isc_task_create(taskmgr, 0, &task);
6381         if (result != ISC_R_SUCCESS)
6382                  goto cleanup_primelock;
6383
6384         result = isc_timer_create(timermgr, isc_timertype_inactive, NULL, NULL,
6385                                   task, spillattimer_countdown, res,
6386                                   &res->spillattimer);
6387         isc_task_detach(&task);
6388         if (result != ISC_R_SUCCESS)
6389                  goto cleanup_primelock;
6390
6391 #if USE_ALGLOCK
6392         result = isc_rwlock_init(&res->alglock, 0, 0);
6393         if (result != ISC_R_SUCCESS)
6394                 goto cleanup_spillattimer;
6395 #endif
6396 #if USE_MBSLOCK
6397         result = isc_rwlock_init(&res->mbslock, 0, 0);
6398         if (result != ISC_R_SUCCESS)
6399                 goto cleanup_alglock;
6400 #endif
6401
6402         res->magic = RES_MAGIC;
6403
6404         *resp = res;
6405
6406         return (ISC_R_SUCCESS);
6407
6408 #if USE_MBSLOCK
6409  cleanup_alglock:
6410 #if USE_ALGLOCK
6411         isc_rwlock_destroy(&res->alglock);
6412 #endif
6413 #endif
6414 #if USE_ALGLOCK || USE_MBSLOCK
6415  cleanup_spillattimer:
6416         isc_timer_detach(&res->spillattimer);
6417 #endif
6418
6419  cleanup_primelock:
6420         DESTROYLOCK(&res->primelock);
6421
6422  cleanup_nlock:
6423         DESTROYLOCK(&res->nlock);
6424
6425  cleanup_lock:
6426         DESTROYLOCK(&res->lock);
6427
6428  cleanup_dispatches:
6429         if (res->dispatchv6 != NULL)
6430                 dns_dispatch_detach(&res->dispatchv6);
6431         if (res->dispatchv4 != NULL)
6432                 dns_dispatch_detach(&res->dispatchv4);
6433
6434  cleanup_buckets:
6435         for (i = 0; i < buckets_created; i++) {
6436                 isc_mem_detach(&res->buckets[i].mctx);
6437                 DESTROYLOCK(&res->buckets[i].lock);
6438                 isc_task_shutdown(res->buckets[i].task);
6439                 isc_task_detach(&res->buckets[i].task);
6440         }
6441         isc_mem_put(view->mctx, res->buckets,
6442                     res->nbuckets * sizeof(fctxbucket_t));
6443
6444  cleanup_res:
6445         isc_mem_put(view->mctx, res, sizeof(*res));
6446
6447         return (result);
6448 }
6449
6450 static void
6451 prime_done(isc_task_t *task, isc_event_t *event) {
6452         dns_resolver_t *res;
6453         dns_fetchevent_t *fevent;
6454         dns_fetch_t *fetch;
6455         dns_db_t *db = NULL;
6456
6457         REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
6458         fevent = (dns_fetchevent_t *)event;
6459         res = event->ev_arg;
6460         REQUIRE(VALID_RESOLVER(res));
6461
6462         UNUSED(task);
6463
6464         LOCK(&res->lock);
6465
6466         INSIST(res->priming);
6467         res->priming = ISC_FALSE;
6468         LOCK(&res->primelock);
6469         fetch = res->primefetch;
6470         res->primefetch = NULL;
6471         UNLOCK(&res->primelock);
6472
6473         UNLOCK(&res->lock);
6474
6475         if (fevent->result == ISC_R_SUCCESS &&
6476             res->view->cache != NULL && res->view->hints != NULL) {
6477                 dns_cache_attachdb(res->view->cache, &db);
6478                 dns_root_checkhints(res->view, res->view->hints, db);
6479                 dns_db_detach(&db);
6480         }
6481
6482         if (fevent->node != NULL)
6483                 dns_db_detachnode(fevent->db, &fevent->node);
6484         if (fevent->db != NULL)
6485                 dns_db_detach(&fevent->db);
6486         if (dns_rdataset_isassociated(fevent->rdataset))
6487                 dns_rdataset_disassociate(fevent->rdataset);
6488         INSIST(fevent->sigrdataset == NULL);
6489
6490         isc_mem_put(res->mctx, fevent->rdataset, sizeof(*fevent->rdataset));
6491
6492         isc_event_free(&event);
6493         dns_resolver_destroyfetch(&fetch);
6494 }
6495
6496 void
6497 dns_resolver_prime(dns_resolver_t *res) {
6498         isc_boolean_t want_priming = ISC_FALSE;
6499         dns_rdataset_t *rdataset;
6500         isc_result_t result;
6501
6502         REQUIRE(VALID_RESOLVER(res));
6503         REQUIRE(res->frozen);
6504
6505         RTRACE("dns_resolver_prime");
6506
6507         LOCK(&res->lock);
6508
6509         if (!res->exiting && !res->priming) {
6510                 INSIST(res->primefetch == NULL);
6511                 res->priming = ISC_TRUE;
6512                 want_priming = ISC_TRUE;
6513         }
6514
6515         UNLOCK(&res->lock);
6516
6517         if (want_priming) {
6518                 /*
6519                  * To avoid any possible recursive locking problems, we
6520                  * start the priming fetch like any other fetch, and holding
6521                  * no resolver locks.  No one else will try to start it
6522                  * because we're the ones who set res->priming to true.
6523                  * Any other callers of dns_resolver_prime() while we're
6524                  * running will see that res->priming is already true and
6525                  * do nothing.
6526                  */
6527                 RTRACE("priming");
6528                 rdataset = isc_mem_get(res->mctx, sizeof(*rdataset));
6529                 if (rdataset == NULL) {
6530                         LOCK(&res->lock);
6531                         INSIST(res->priming);
6532                         INSIST(res->primefetch == NULL);
6533                         res->priming = ISC_FALSE;
6534                         UNLOCK(&res->lock);
6535                         return;
6536                 }
6537                 dns_rdataset_init(rdataset);
6538                 LOCK(&res->primelock);
6539                 result = dns_resolver_createfetch(res, dns_rootname,
6540                                                   dns_rdatatype_ns,
6541                                                   NULL, NULL, NULL, 0,
6542                                                   res->buckets[0].task,
6543                                                   prime_done,
6544                                                   res, rdataset, NULL,
6545                                                   &res->primefetch);
6546                 UNLOCK(&res->primelock);
6547                 if (result != ISC_R_SUCCESS) {
6548                         LOCK(&res->lock);
6549                         INSIST(res->priming);
6550                         res->priming = ISC_FALSE;
6551                         UNLOCK(&res->lock);
6552                 }
6553         }
6554 }
6555
6556 void
6557 dns_resolver_freeze(dns_resolver_t *res) {
6558
6559         /*
6560          * Freeze resolver.
6561          */
6562
6563         REQUIRE(VALID_RESOLVER(res));
6564         REQUIRE(!res->frozen);
6565
6566         res->frozen = ISC_TRUE;
6567 }
6568
6569 void
6570 dns_resolver_attach(dns_resolver_t *source, dns_resolver_t **targetp) {
6571         REQUIRE(VALID_RESOLVER(source));
6572         REQUIRE(targetp != NULL && *targetp == NULL);
6573
6574         RRTRACE(source, "attach");
6575         LOCK(&source->lock);
6576         REQUIRE(!source->exiting);
6577
6578         INSIST(source->references > 0);
6579         source->references++;
6580         INSIST(source->references != 0);
6581         UNLOCK(&source->lock);
6582
6583         *targetp = source;
6584 }
6585
6586 void
6587 dns_resolver_whenshutdown(dns_resolver_t *res, isc_task_t *task,
6588                           isc_event_t **eventp)
6589 {
6590         isc_task_t *clone;
6591         isc_event_t *event;
6592
6593         REQUIRE(VALID_RESOLVER(res));
6594         REQUIRE(eventp != NULL);
6595
6596         event = *eventp;
6597         *eventp = NULL;
6598
6599         LOCK(&res->lock);
6600
6601         if (res->exiting && res->activebuckets == 0) {
6602                 /*
6603                  * We're already shutdown.  Send the event.
6604                  */
6605                 event->ev_sender = res;
6606                 isc_task_send(task, &event);
6607         } else {
6608                 clone = NULL;
6609                 isc_task_attach(task, &clone);
6610                 event->ev_sender = clone;
6611                 ISC_LIST_APPEND(res->whenshutdown, event, ev_link);
6612         }
6613
6614         UNLOCK(&res->lock);
6615 }
6616
6617 void
6618 dns_resolver_shutdown(dns_resolver_t *res) {
6619         unsigned int i;
6620         fetchctx_t *fctx;
6621         isc_socket_t *sock;
6622         isc_result_t result;
6623
6624         REQUIRE(VALID_RESOLVER(res));
6625
6626         RTRACE("shutdown");
6627
6628         LOCK(&res->lock);
6629
6630         if (!res->exiting) {
6631                 RTRACE("exiting");
6632                 res->exiting = ISC_TRUE;
6633
6634                 for (i = 0; i < res->nbuckets; i++) {
6635                         LOCK(&res->buckets[i].lock);
6636                         for (fctx = ISC_LIST_HEAD(res->buckets[i].fctxs);
6637                              fctx != NULL;
6638                              fctx = ISC_LIST_NEXT(fctx, link))
6639                                 fctx_shutdown(fctx);
6640                         if (res->dispatchv4 != NULL && !res->exclusivev4) {
6641                                 sock = dns_dispatch_getsocket(res->dispatchv4);
6642                                 isc_socket_cancel(sock, res->buckets[i].task,
6643                                                   ISC_SOCKCANCEL_ALL);
6644                         }
6645                         if (res->dispatchv6 != NULL && !res->exclusivev6) {
6646                                 sock = dns_dispatch_getsocket(res->dispatchv6);
6647                                 isc_socket_cancel(sock, res->buckets[i].task,
6648                                                   ISC_SOCKCANCEL_ALL);
6649                         }
6650                         res->buckets[i].exiting = ISC_TRUE;
6651                         if (ISC_LIST_EMPTY(res->buckets[i].fctxs)) {
6652                                 INSIST(res->activebuckets > 0);
6653                                 res->activebuckets--;
6654                         }
6655                         UNLOCK(&res->buckets[i].lock);
6656                 }
6657                 if (res->activebuckets == 0)
6658                         send_shutdown_events(res);
6659                 result = isc_timer_reset(res->spillattimer,
6660                                          isc_timertype_inactive, NULL,
6661                                          NULL, ISC_TRUE);
6662                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6663         }
6664
6665         UNLOCK(&res->lock);
6666 }
6667
6668 void
6669 dns_resolver_detach(dns_resolver_t **resp) {
6670         dns_resolver_t *res;
6671         isc_boolean_t need_destroy = ISC_FALSE;
6672
6673         REQUIRE(resp != NULL);
6674         res = *resp;
6675         REQUIRE(VALID_RESOLVER(res));
6676
6677         RTRACE("detach");
6678
6679         LOCK(&res->lock);
6680
6681         INSIST(res->references > 0);
6682         res->references--;
6683         if (res->references == 0) {
6684                 INSIST(res->exiting && res->activebuckets == 0);
6685                 need_destroy = ISC_TRUE;
6686         }
6687
6688         UNLOCK(&res->lock);
6689
6690         if (need_destroy)
6691                 destroy(res);
6692
6693         *resp = NULL;
6694 }
6695
6696 static inline isc_boolean_t
6697 fctx_match(fetchctx_t *fctx, dns_name_t *name, dns_rdatatype_t type,
6698            unsigned int options)
6699 {
6700         if (fctx->type != type || fctx->options != options)
6701                 return (ISC_FALSE);
6702         return (dns_name_equal(&fctx->name, name));
6703 }
6704
6705 static inline void
6706 log_fetch(dns_name_t *name, dns_rdatatype_t type) {
6707         char namebuf[DNS_NAME_FORMATSIZE];
6708         char typebuf[DNS_RDATATYPE_FORMATSIZE];
6709         int level = ISC_LOG_DEBUG(1);
6710
6711         if (! isc_log_wouldlog(dns_lctx, level))
6712                 return;
6713
6714         dns_name_format(name, namebuf, sizeof(namebuf));
6715         dns_rdatatype_format(type, typebuf, sizeof(typebuf));
6716
6717         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6718                       DNS_LOGMODULE_RESOLVER, level,
6719                       "createfetch: %s %s", namebuf, typebuf);
6720 }
6721
6722 isc_result_t
6723 dns_resolver_createfetch(dns_resolver_t *res, dns_name_t *name,
6724                          dns_rdatatype_t type,
6725                          dns_name_t *domain, dns_rdataset_t *nameservers,
6726                          dns_forwarders_t *forwarders,
6727                          unsigned int options, isc_task_t *task,
6728                          isc_taskaction_t action, void *arg,
6729                          dns_rdataset_t *rdataset,
6730                          dns_rdataset_t *sigrdataset,
6731                          dns_fetch_t **fetchp)
6732 {
6733         return (dns_resolver_createfetch2(res, name, type, domain,
6734                                           nameservers, forwarders, NULL, 0,
6735                                           options, task, action, arg,
6736                                           rdataset, sigrdataset, fetchp));
6737 }
6738
6739 isc_result_t
6740 dns_resolver_createfetch2(dns_resolver_t *res, dns_name_t *name,
6741                           dns_rdatatype_t type,
6742                           dns_name_t *domain, dns_rdataset_t *nameservers,
6743                           dns_forwarders_t *forwarders,
6744                           isc_sockaddr_t *client, dns_messageid_t id,
6745                           unsigned int options, isc_task_t *task,
6746                           isc_taskaction_t action, void *arg,
6747                           dns_rdataset_t *rdataset,
6748                           dns_rdataset_t *sigrdataset,
6749                           dns_fetch_t **fetchp)
6750 {
6751         dns_fetch_t *fetch;
6752         fetchctx_t *fctx = NULL;
6753         isc_result_t result = ISC_R_SUCCESS;
6754         unsigned int bucketnum;
6755         isc_boolean_t new_fctx = ISC_FALSE;
6756         isc_event_t *event;
6757         unsigned int count = 0;
6758         unsigned int spillat;
6759         unsigned int spillatmin;
6760
6761         UNUSED(forwarders);
6762
6763         REQUIRE(VALID_RESOLVER(res));
6764         REQUIRE(res->frozen);
6765         /* XXXRTH  Check for meta type */
6766         if (domain != NULL) {
6767                 REQUIRE(DNS_RDATASET_VALID(nameservers));
6768                 REQUIRE(nameservers->type == dns_rdatatype_ns);
6769         } else
6770                 REQUIRE(nameservers == NULL);
6771         REQUIRE(forwarders == NULL);
6772         REQUIRE(!dns_rdataset_isassociated(rdataset));
6773         REQUIRE(sigrdataset == NULL ||
6774                 !dns_rdataset_isassociated(sigrdataset));
6775         REQUIRE(fetchp != NULL && *fetchp == NULL);
6776
6777         log_fetch(name, type);
6778
6779         /*
6780          * XXXRTH  use a mempool?
6781          */
6782         fetch = isc_mem_get(res->mctx, sizeof(*fetch));
6783         if (fetch == NULL)
6784                 return (ISC_R_NOMEMORY);
6785
6786         bucketnum = dns_name_fullhash(name, ISC_FALSE) % res->nbuckets;
6787
6788         LOCK(&res->lock);
6789         spillat = res->spillat;
6790         spillatmin = res->spillatmin;
6791         UNLOCK(&res->lock);
6792         LOCK(&res->buckets[bucketnum].lock);
6793
6794         if (res->buckets[bucketnum].exiting) {
6795                 result = ISC_R_SHUTTINGDOWN;
6796                 goto unlock;
6797         }
6798
6799         if ((options & DNS_FETCHOPT_UNSHARED) == 0) {
6800                 for (fctx = ISC_LIST_HEAD(res->buckets[bucketnum].fctxs);
6801                      fctx != NULL;
6802                      fctx = ISC_LIST_NEXT(fctx, link)) {
6803                         if (fctx_match(fctx, name, type, options))
6804                                 break;
6805                 }
6806         }
6807
6808         /*
6809          * Is this a duplicate?
6810          */
6811         if (fctx != NULL && client != NULL) {
6812                 dns_fetchevent_t *fevent;
6813                 for (fevent = ISC_LIST_HEAD(fctx->events);
6814                      fevent != NULL;
6815                      fevent = ISC_LIST_NEXT(fevent, ev_link)) {
6816                         if (fevent->client != NULL && fevent->id == id &&
6817                             isc_sockaddr_equal(fevent->client, client)) {
6818                                 result = DNS_R_DUPLICATE;
6819                                 goto unlock;
6820                         }
6821                         count++;
6822                 }
6823         }
6824         if (count >= spillatmin && spillatmin != 0) {
6825                 INSIST(fctx != NULL);
6826                 if (count >= spillat)
6827                         fctx->spilled = ISC_TRUE;
6828                 if (fctx->spilled) {
6829                         result = DNS_R_DROP;
6830                         goto unlock;
6831                 }
6832         }
6833
6834         /*
6835          * If we didn't have a fetch, would attach to a done fetch, this
6836          * fetch has already cloned its results, or if the fetch has gone
6837          * "idle" (no one was interested in it), we need to start a new
6838          * fetch instead of joining with the existing one.
6839          */
6840         if (fctx == NULL ||
6841             fctx->state == fetchstate_done ||
6842             fctx->cloned ||
6843             ISC_LIST_EMPTY(fctx->events)) {
6844                 fctx = NULL;
6845                 result = fctx_create(res, name, type, domain, nameservers,
6846                                      options, bucketnum, &fctx);
6847                 if (result != ISC_R_SUCCESS)
6848                         goto unlock;
6849                 new_fctx = ISC_TRUE;
6850         }
6851
6852         result = fctx_join(fctx, task, client, id, action, arg,
6853                            rdataset, sigrdataset, fetch);
6854         if (new_fctx) {
6855                 if (result == ISC_R_SUCCESS) {
6856                         /*
6857                          * Launch this fctx.
6858                          */
6859                         event = &fctx->control_event;
6860                         ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
6861                                        DNS_EVENT_FETCHCONTROL,
6862                                        fctx_start, fctx, NULL,
6863                                        NULL, NULL);
6864                         isc_task_send(res->buckets[bucketnum].task, &event);
6865                 } else {
6866                         /*
6867                          * We don't care about the result of fctx_destroy()
6868                          * since we know we're not exiting.
6869                          */
6870                         (void)fctx_destroy(fctx);
6871                 }
6872         }
6873
6874  unlock:
6875         UNLOCK(&res->buckets[bucketnum].lock);
6876
6877         if (result == ISC_R_SUCCESS) {
6878                 FTRACE("created");
6879                 *fetchp = fetch;
6880         } else
6881                 isc_mem_put(res->mctx, fetch, sizeof(*fetch));
6882
6883         return (result);
6884 }
6885
6886 void
6887 dns_resolver_cancelfetch(dns_fetch_t *fetch) {
6888         fetchctx_t *fctx;
6889         dns_resolver_t *res;
6890         dns_fetchevent_t *event, *next_event;
6891         isc_task_t *etask;
6892
6893         REQUIRE(DNS_FETCH_VALID(fetch));
6894         fctx = fetch->private;
6895         REQUIRE(VALID_FCTX(fctx));
6896         res = fctx->res;
6897
6898         FTRACE("cancelfetch");
6899
6900         LOCK(&res->buckets[fctx->bucketnum].lock);
6901
6902         /*
6903          * Find the completion event for this fetch (as opposed
6904          * to those for other fetches that have joined the same
6905          * fctx) and send it with result = ISC_R_CANCELED.
6906          */
6907         event = NULL;
6908         if (fctx->state != fetchstate_done) {
6909                 for (event = ISC_LIST_HEAD(fctx->events);
6910                      event != NULL;
6911                      event = next_event) {
6912                         next_event = ISC_LIST_NEXT(event, ev_link);
6913                         if (event->fetch == fetch) {
6914                                 ISC_LIST_UNLINK(fctx->events, event, ev_link);
6915                                 break;
6916                         }
6917                 }
6918         }
6919         if (event != NULL) {
6920                 etask = event->ev_sender;
6921                 event->ev_sender = fctx;
6922                 event->result = ISC_R_CANCELED;
6923                 isc_task_sendanddetach(&etask, ISC_EVENT_PTR(&event));
6924         }
6925         /*
6926          * The fctx continues running even if no fetches remain;
6927          * the answer is still cached.
6928          */
6929
6930         UNLOCK(&res->buckets[fctx->bucketnum].lock);
6931 }
6932
6933 void
6934 dns_resolver_destroyfetch(dns_fetch_t **fetchp) {
6935         dns_fetch_t *fetch;
6936         dns_resolver_t *res;
6937         dns_fetchevent_t *event, *next_event;
6938         fetchctx_t *fctx;
6939         unsigned int bucketnum;
6940         isc_boolean_t bucket_empty = ISC_FALSE;
6941
6942         REQUIRE(fetchp != NULL);
6943         fetch = *fetchp;
6944         REQUIRE(DNS_FETCH_VALID(fetch));
6945         fctx = fetch->private;
6946         REQUIRE(VALID_FCTX(fctx));
6947         res = fctx->res;
6948
6949         FTRACE("destroyfetch");
6950
6951         bucketnum = fctx->bucketnum;
6952         LOCK(&res->buckets[bucketnum].lock);
6953
6954         /*
6955          * Sanity check: the caller should have gotten its event before
6956          * trying to destroy the fetch.
6957          */
6958         event = NULL;
6959         if (fctx->state != fetchstate_done) {
6960                 for (event = ISC_LIST_HEAD(fctx->events);
6961                      event != NULL;
6962                      event = next_event) {
6963                         next_event = ISC_LIST_NEXT(event, ev_link);
6964                         RUNTIME_CHECK(event->fetch != fetch);
6965                 }
6966         }
6967
6968         INSIST(fctx->references > 0);
6969         fctx->references--;
6970         if (fctx->references == 0) {
6971                 /*
6972                  * No one cares about the result of this fetch anymore.
6973                  */
6974                 if (fctx->pending == 0 && fctx->nqueries == 0 &&
6975                     ISC_LIST_EMPTY(fctx->validators) &&
6976                     SHUTTINGDOWN(fctx)) {
6977                         /*
6978                          * This fctx is already shutdown; we were just
6979                          * waiting for the last reference to go away.
6980                          */
6981                         bucket_empty = fctx_destroy(fctx);
6982                 } else {
6983                         /*
6984                          * Initiate shutdown.
6985                          */
6986                         fctx_shutdown(fctx);
6987                 }
6988         }
6989
6990         UNLOCK(&res->buckets[bucketnum].lock);
6991
6992         isc_mem_put(res->mctx, fetch, sizeof(*fetch));
6993         *fetchp = NULL;
6994
6995         if (bucket_empty)
6996                 empty_bucket(res);
6997 }
6998
6999 dns_dispatchmgr_t *
7000 dns_resolver_dispatchmgr(dns_resolver_t *resolver) {
7001         REQUIRE(VALID_RESOLVER(resolver));
7002         return (resolver->dispatchmgr);
7003 }
7004
7005 dns_dispatch_t *
7006 dns_resolver_dispatchv4(dns_resolver_t *resolver) {
7007         REQUIRE(VALID_RESOLVER(resolver));
7008         return (resolver->dispatchv4);
7009 }
7010
7011 dns_dispatch_t *
7012 dns_resolver_dispatchv6(dns_resolver_t *resolver) {
7013         REQUIRE(VALID_RESOLVER(resolver));
7014         return (resolver->dispatchv6);
7015 }
7016
7017 isc_socketmgr_t *
7018 dns_resolver_socketmgr(dns_resolver_t *resolver) {
7019         REQUIRE(VALID_RESOLVER(resolver));
7020         return (resolver->socketmgr);
7021 }
7022
7023 isc_taskmgr_t *
7024 dns_resolver_taskmgr(dns_resolver_t *resolver) {
7025         REQUIRE(VALID_RESOLVER(resolver));
7026         return (resolver->taskmgr);
7027 }
7028
7029 isc_uint32_t
7030 dns_resolver_getlamettl(dns_resolver_t *resolver) {
7031         REQUIRE(VALID_RESOLVER(resolver));
7032         return (resolver->lame_ttl);
7033 }
7034
7035 void
7036 dns_resolver_setlamettl(dns_resolver_t *resolver, isc_uint32_t lame_ttl) {
7037         REQUIRE(VALID_RESOLVER(resolver));
7038         resolver->lame_ttl = lame_ttl;
7039 }
7040
7041 unsigned int
7042 dns_resolver_nrunning(dns_resolver_t *resolver) {
7043         unsigned int n;
7044         LOCK(&resolver->nlock);
7045         n = resolver->nfctx;
7046         UNLOCK(&resolver->nlock);
7047         return (n);
7048 }
7049
7050 isc_result_t
7051 dns_resolver_addalternate(dns_resolver_t *resolver, isc_sockaddr_t *alt,
7052                           dns_name_t *name, in_port_t port) {
7053         alternate_t *a;
7054         isc_result_t result;
7055
7056         REQUIRE(VALID_RESOLVER(resolver));
7057         REQUIRE(!resolver->frozen);
7058         REQUIRE((alt == NULL) ^ (name == NULL));
7059
7060         a = isc_mem_get(resolver->mctx, sizeof(*a));
7061         if (a == NULL)
7062                 return (ISC_R_NOMEMORY);
7063         if (alt != NULL) {
7064                 a->isaddress = ISC_TRUE;
7065                 a->_u.addr = *alt;
7066         } else {
7067                 a->isaddress = ISC_FALSE;
7068                 a->_u._n.port = port;
7069                 dns_name_init(&a->_u._n.name, NULL);
7070                 result = dns_name_dup(name, resolver->mctx, &a->_u._n.name);
7071                 if (result != ISC_R_SUCCESS) {
7072                         isc_mem_put(resolver->mctx, a, sizeof(*a));
7073                         return (result);
7074                 }
7075         }
7076         ISC_LINK_INIT(a, link);
7077         ISC_LIST_APPEND(resolver->alternates, a, link);
7078
7079         return (ISC_R_SUCCESS);
7080 }
7081
7082 void
7083 dns_resolver_setudpsize(dns_resolver_t *resolver, isc_uint16_t udpsize) {
7084         REQUIRE(VALID_RESOLVER(resolver));
7085         resolver->udpsize = udpsize;
7086 }
7087
7088 isc_uint16_t
7089 dns_resolver_getudpsize(dns_resolver_t *resolver) {
7090         REQUIRE(VALID_RESOLVER(resolver));
7091         return (resolver->udpsize);
7092 }
7093
7094 static void
7095 free_algorithm(void *node, void *arg) {
7096         unsigned char *algorithms = node;
7097         isc_mem_t *mctx = arg;
7098
7099         isc_mem_put(mctx, algorithms, *algorithms);
7100 }
7101
7102 void
7103 dns_resolver_reset_algorithms(dns_resolver_t *resolver) {
7104
7105         REQUIRE(VALID_RESOLVER(resolver));
7106
7107 #if USE_ALGLOCK
7108         RWLOCK(&resolver->alglock, isc_rwlocktype_write);
7109 #endif
7110         if (resolver->algorithms != NULL)
7111                 dns_rbt_destroy(&resolver->algorithms);
7112 #if USE_ALGLOCK
7113         RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
7114 #endif
7115 }
7116
7117 isc_result_t
7118 dns_resolver_disable_algorithm(dns_resolver_t *resolver, dns_name_t *name,
7119                                unsigned int alg)
7120 {
7121         unsigned int len, mask;
7122         unsigned char *new;
7123         unsigned char *algorithms;
7124         isc_result_t result;
7125         dns_rbtnode_t *node = NULL;
7126
7127         REQUIRE(VALID_RESOLVER(resolver));
7128         if (alg > 255)
7129                 return (ISC_R_RANGE);
7130
7131 #if USE_ALGLOCK
7132         RWLOCK(&resolver->alglock, isc_rwlocktype_write);
7133 #endif
7134         if (resolver->algorithms == NULL) {
7135                 result = dns_rbt_create(resolver->mctx, free_algorithm,
7136                                         resolver->mctx, &resolver->algorithms);
7137                 if (result != ISC_R_SUCCESS)
7138                         goto cleanup;
7139         }
7140
7141         len = alg/8 + 2;
7142         mask = 1 << (alg%8);
7143
7144         result = dns_rbt_addnode(resolver->algorithms, name, &node);
7145
7146         if (result == ISC_R_SUCCESS || result == ISC_R_EXISTS) {
7147                 algorithms = node->data;
7148                 if (algorithms == NULL || len > *algorithms) {
7149                         new = isc_mem_get(resolver->mctx, len);
7150                         if (new == NULL) {
7151                                 result = ISC_R_NOMEMORY;
7152                                 goto cleanup;
7153                         }
7154                         memset(new, 0, len);
7155                         if (algorithms != NULL)
7156                                 memcpy(new, algorithms, *algorithms);
7157                         new[len-1] |= mask;
7158                         *new = len;
7159                         node->data = new;
7160                         if (algorithms != NULL)
7161                                 isc_mem_put(resolver->mctx, algorithms,
7162                                             *algorithms);
7163                 } else
7164                         algorithms[len-1] |= mask;
7165         }
7166         result = ISC_R_SUCCESS;
7167  cleanup:
7168 #if USE_ALGLOCK
7169         RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
7170 #endif
7171         return (result);
7172 }
7173
7174 isc_boolean_t
7175 dns_resolver_algorithm_supported(dns_resolver_t *resolver, dns_name_t *name,
7176                                  unsigned int alg)
7177 {
7178         unsigned int len, mask;
7179         unsigned char *algorithms;
7180         void *data = NULL;
7181         isc_result_t result;
7182         isc_boolean_t found = ISC_FALSE;
7183
7184         REQUIRE(VALID_RESOLVER(resolver));
7185
7186 #if USE_ALGLOCK
7187         RWLOCK(&resolver->alglock, isc_rwlocktype_read);
7188 #endif
7189         if (resolver->algorithms == NULL)
7190                 goto unlock;
7191         result = dns_rbt_findname(resolver->algorithms, name, 0, NULL, &data);
7192         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7193                 len = alg/8 + 2;
7194                 mask = 1 << (alg%8);
7195                 algorithms = data;
7196                 if (len <= *algorithms && (algorithms[len-1] & mask) != 0)
7197                         found = ISC_TRUE;
7198         }
7199  unlock:
7200 #if USE_ALGLOCK
7201         RWUNLOCK(&resolver->alglock, isc_rwlocktype_read);
7202 #endif
7203         if (found)
7204                 return (ISC_FALSE);
7205         return (dst_algorithm_supported(alg));
7206 }
7207
7208 isc_boolean_t
7209 dns_resolver_digest_supported(dns_resolver_t *resolver, unsigned int digest) {
7210
7211         UNUSED(resolver);
7212         return (dns_ds_digest_supported(digest));
7213 }
7214
7215 void
7216 dns_resolver_resetmustbesecure(dns_resolver_t *resolver) {
7217
7218         REQUIRE(VALID_RESOLVER(resolver));
7219
7220 #if USE_MBSLOCK
7221         RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
7222 #endif
7223         if (resolver->mustbesecure != NULL)
7224                 dns_rbt_destroy(&resolver->mustbesecure);
7225 #if USE_MBSLOCK
7226         RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
7227 #endif
7228 }
7229
7230 static isc_boolean_t yes = ISC_TRUE, no = ISC_FALSE;
7231
7232 isc_result_t
7233 dns_resolver_setmustbesecure(dns_resolver_t *resolver, dns_name_t *name,
7234                              isc_boolean_t value)
7235 {
7236         isc_result_t result;
7237
7238         REQUIRE(VALID_RESOLVER(resolver));
7239
7240 #if USE_MBSLOCK
7241         RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
7242 #endif
7243         if (resolver->mustbesecure == NULL) {
7244                 result = dns_rbt_create(resolver->mctx, NULL, NULL,
7245                                         &resolver->mustbesecure);
7246                 if (result != ISC_R_SUCCESS)
7247                         goto cleanup;
7248         }
7249         result = dns_rbt_addname(resolver->mustbesecure, name,
7250                                  value ? &yes : &no);
7251  cleanup:
7252 #if USE_MBSLOCK
7253         RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
7254 #endif
7255         return (result);
7256 }
7257
7258 isc_boolean_t
7259 dns_resolver_getmustbesecure(dns_resolver_t *resolver, dns_name_t *name) {
7260         void *data = NULL;
7261         isc_boolean_t value = ISC_FALSE;
7262         isc_result_t result;
7263
7264         REQUIRE(VALID_RESOLVER(resolver));
7265
7266 #if USE_MBSLOCK
7267         RWLOCK(&resolver->mbslock, isc_rwlocktype_read);
7268 #endif
7269         if (resolver->mustbesecure == NULL)
7270                 goto unlock;
7271         result = dns_rbt_findname(resolver->mustbesecure, name, 0, NULL, &data);
7272         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
7273                 value = *(isc_boolean_t*)data;
7274  unlock:
7275 #if USE_MBSLOCK
7276         RWUNLOCK(&resolver->mbslock, isc_rwlocktype_read);
7277 #endif
7278         return (value);
7279 }
7280
7281 void
7282 dns_resolver_getclientsperquery(dns_resolver_t *resolver, isc_uint32_t *cur,
7283                                 isc_uint32_t *min, isc_uint32_t *max)
7284 {
7285         REQUIRE(VALID_RESOLVER(resolver));
7286
7287         LOCK(&resolver->lock);
7288         if (cur != NULL)
7289                 *cur = resolver->spillat;
7290         if (min != NULL)
7291                 *min = resolver->spillatmin;
7292         if (max != NULL)
7293                 *max = resolver->spillatmax;
7294         UNLOCK(&resolver->lock);
7295 }
7296
7297 void
7298 dns_resolver_setclientsperquery(dns_resolver_t *resolver, isc_uint32_t min,
7299                                 isc_uint32_t max)
7300 {
7301         REQUIRE(VALID_RESOLVER(resolver));
7302
7303         LOCK(&resolver->lock);
7304         resolver->spillatmin = resolver->spillat = min;
7305         resolver->spillatmax = max;
7306         UNLOCK(&resolver->lock);
7307 }
7308
7309 isc_boolean_t
7310 dns_resolver_getzeronosoattl(dns_resolver_t *resolver) {
7311         REQUIRE(VALID_RESOLVER(resolver));
7312
7313         return (resolver->zero_no_soa_ttl);
7314 }
7315
7316 void
7317 dns_resolver_setzeronosoattl(dns_resolver_t *resolver, isc_boolean_t state) {
7318         REQUIRE(VALID_RESOLVER(resolver));
7319
7320         resolver->zero_no_soa_ttl = state;
7321 }