]> CyberLeo.Net >> Repos - FreeBSD/releng/9.0.git/blob - contrib/bind9/lib/dns/rbtdb.c
Upgrade to BIND 9.8.1-P1 to address the following DDOS bug:
[FreeBSD/releng/9.0.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2011  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.310.8.5.4.1 2011-11-16 09:32:08 marka Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rpz.h>
57 #include <dns/rdata.h>
58 #include <dns/rdataset.h>
59 #include <dns/rdatasetiter.h>
60 #include <dns/rdataslab.h>
61 #include <dns/rdatastruct.h>
62 #include <dns/result.h>
63 #include <dns/stats.h>
64 #include <dns/view.h>
65 #include <dns/zone.h>
66 #include <dns/zonekey.h>
67
68 #ifdef DNS_RBTDB_VERSION64
69 #include "rbtdb64.h"
70 #else
71 #include "rbtdb.h"
72 #endif
73
74 #ifdef DNS_RBTDB_VERSION64
75 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
76 #else
77 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
78 #endif
79
80 /*%
81  * Note that "impmagic" is not the first four bytes of the struct, so
82  * ISC_MAGIC_VALID cannot be used.
83  */
84 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
85                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
86
87 #ifdef DNS_RBTDB_VERSION64
88 typedef isc_uint64_t                    rbtdb_serial_t;
89 /*%
90  * Make casting easier in symbolic debuggers by using different names
91  * for the 64 bit version.
92  */
93 #define dns_rbtdb_t dns_rbtdb64_t
94 #define rdatasetheader_t rdatasetheader64_t
95 #define rbtdb_version_t rbtdb_version64_t
96 #else
97 typedef isc_uint32_t                    rbtdb_serial_t;
98 #endif
99
100 typedef isc_uint32_t                    rbtdb_rdatatype_t;
101
102 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
103 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
104 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
105
106 #define RBTDB_RDATATYPE_SIGNSEC \
107                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
108 #define RBTDB_RDATATYPE_SIGNSEC3 \
109                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
110 #define RBTDB_RDATATYPE_SIGNS \
111                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
112 #define RBTDB_RDATATYPE_SIGCNAME \
113                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
114 #define RBTDB_RDATATYPE_SIGDNAME \
115                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
116 #define RBTDB_RDATATYPE_NCACHEANY \
117                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
118
119 /*
120  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
121  * Using rwlock is effective with regard to lookup performance only when
122  * it is implemented in an efficient way.
123  * Otherwise, it is generally wise to stick to the simple locking since rwlock
124  * would require more memory or can even make lookups slower due to its own
125  * overhead (when it internally calls mutex locks).
126  */
127 #ifdef ISC_RWLOCK_USEATOMIC
128 #define DNS_RBTDB_USERWLOCK 1
129 #else
130 #define DNS_RBTDB_USERWLOCK 0
131 #endif
132
133 #if DNS_RBTDB_USERWLOCK
134 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
135 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
136 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
137 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
138 #else
139 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
140 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
141 #define RBTDB_LOCK(l, t)        LOCK(l)
142 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
143 #endif
144
145 /*
146  * Since node locking is sensitive to both performance and memory footprint,
147  * we need some trick here.  If we have both high-performance rwlock and
148  * high performance and small-memory reference counters, we use rwlock for
149  * node lock and isc_refcount for node references.  In this case, we don't have
150  * to protect the access to the counters by locks.
151  * Otherwise, we simply use ordinary mutex lock for node locking, and use
152  * simple integers as reference counters which is protected by the lock.
153  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
154  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
155  * counters first and then protect other parts of a node as read-only data.
156  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
157  * provided for these special cases.  When we can use the efficient backend
158  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
159  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
160  * section including the access to the reference counter.
161  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
162  * section is also protected by NODE_STRONGLOCK().
163  */
164 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
165 typedef isc_rwlock_t nodelock_t;
166
167 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
168 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
169 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
170 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
171 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
172
173 #define NODE_STRONGLOCK(l)      ((void)0)
174 #define NODE_STRONGUNLOCK(l)    ((void)0)
175 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
176 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
177 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
178 #else
179 typedef isc_mutex_t nodelock_t;
180
181 #define NODE_INITLOCK(l)        isc_mutex_init(l)
182 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
183 #define NODE_LOCK(l, t)         LOCK(l)
184 #define NODE_UNLOCK(l, t)       UNLOCK(l)
185 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
186
187 #define NODE_STRONGLOCK(l)      LOCK(l)
188 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
189 #define NODE_WEAKLOCK(l, t)     ((void)0)
190 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
191 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
192 #endif
193
194 /*%
195  * Whether to rate-limit updating the LRU to avoid possible thread contention.
196  * Our performance measurement has shown the cost is marginal, so it's defined
197  * to be 0 by default either with or without threads.
198  */
199 #ifndef DNS_RBTDB_LIMITLRUUPDATE
200 #define DNS_RBTDB_LIMITLRUUPDATE 0
201 #endif
202
203 /*
204  * Allow clients with a virtual time of up to 5 minutes in the past to see
205  * records that would have otherwise have expired.
206  */
207 #define RBTDB_VIRTUAL 300
208
209 struct noqname {
210         dns_name_t      name;
211         void *          neg;
212         void *          negsig;
213         dns_rdatatype_t type;
214 };
215
216 typedef struct acachectl acachectl_t;
217
218 typedef struct rdatasetheader {
219         /*%
220          * Locked by the owning node's lock.
221          */
222         rbtdb_serial_t                  serial;
223         dns_ttl_t                       rdh_ttl;
224         rbtdb_rdatatype_t               type;
225         isc_uint16_t                    attributes;
226         dns_trust_t                     trust;
227         struct noqname                  *noqname;
228         struct noqname                  *closest;
229         /*%<
230          * We don't use the LIST macros, because the LIST structure has
231          * both head and tail pointers, and is doubly linked.
232          */
233
234         struct rdatasetheader           *next;
235         /*%<
236          * If this is the top header for an rdataset, 'next' points
237          * to the top header for the next rdataset (i.e., the next type).
238          * Otherwise, it points up to the header whose down pointer points
239          * at this header.
240          */
241
242         struct rdatasetheader           *down;
243         /*%<
244          * Points to the header for the next older version of
245          * this rdataset.
246          */
247
248         isc_uint32_t                    count;
249         /*%<
250          * Monotonously increased every time this rdataset is bound so that
251          * it is used as the base of the starting point in DNS responses
252          * when the "cyclic" rrset-order is required.  Since the ordering
253          * should not be so crucial, no lock is set for the counter for
254          * performance reasons.
255          */
256
257         acachectl_t                     *additional_auth;
258         acachectl_t                     *additional_glue;
259
260         dns_rbtnode_t                   *node;
261         isc_stdtime_t                   last_used;
262         ISC_LINK(struct rdatasetheader) link;
263
264         unsigned int                    heap_index;
265         /*%<
266          * Used for TTL-based cache cleaning.
267          */
268         isc_stdtime_t                   resign;
269 } rdatasetheader_t;
270
271 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
272 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
273
274 #define RDATASET_ATTR_NONEXISTENT       0x0001
275 #define RDATASET_ATTR_STALE             0x0002
276 #define RDATASET_ATTR_IGNORE            0x0004
277 #define RDATASET_ATTR_RETAIN            0x0008
278 #define RDATASET_ATTR_NXDOMAIN          0x0010
279 #define RDATASET_ATTR_RESIGN            0x0020
280 #define RDATASET_ATTR_STATCOUNT         0x0040
281 #define RDATASET_ATTR_OPTOUT            0x0080
282 #define RDATASET_ATTR_NEGATIVE          0x0100
283
284 typedef struct acache_cbarg {
285         dns_rdatasetadditional_t        type;
286         unsigned int                    count;
287         dns_db_t                        *db;
288         dns_dbnode_t                    *node;
289         rdatasetheader_t                *header;
290 } acache_cbarg_t;
291
292 struct acachectl {
293         dns_acacheentry_t               *entry;
294         acache_cbarg_t                  *cbarg;
295 };
296
297 /*
298  * XXX
299  * When the cache will pre-expire data (due to memory low or other
300  * situations) before the rdataset's TTL has expired, it MUST
301  * respect the RETAIN bit and not expire the data until its TTL is
302  * expired.
303  */
304
305 #undef IGNORE                   /* WIN32 winbase.h defines this. */
306
307 #define EXISTS(header) \
308         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
309 #define NONEXISTENT(header) \
310         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
311 #define IGNORE(header) \
312         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
313 #define RETAIN(header) \
314         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
315 #define NXDOMAIN(header) \
316         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
317 #define RESIGN(header) \
318         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
319 #define OPTOUT(header) \
320         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
321 #define NEGATIVE(header) \
322         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
323
324 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
325
326 /*%
327  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
328  * There is a tradeoff issue about configuring this value: if this is too
329  * small, it may cause heavier contention between threads; if this is too large,
330  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
331  * The default value should work well for most environments, but this can
332  * also be configurable at compilation time via the
333  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
334  * 1 due to the assumption of overmem_purge().
335  */
336 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
338 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
339 #else
340 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
341 #endif
342 #else
343 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
344 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
345
346 typedef struct {
347         nodelock_t                      lock;
348         /* Protected in the refcount routines. */
349         isc_refcount_t                  references;
350         /* Locked by lock. */
351         isc_boolean_t                   exiting;
352 } rbtdb_nodelock_t;
353
354 typedef struct rbtdb_changed {
355         dns_rbtnode_t *                 node;
356         isc_boolean_t                   dirty;
357         ISC_LINK(struct rbtdb_changed)  link;
358 } rbtdb_changed_t;
359
360 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
361
362 typedef enum {
363         dns_db_insecure,
364         dns_db_partial,
365         dns_db_secure
366 } dns_db_secure_t;
367
368 typedef struct rbtdb_version {
369         /* Not locked */
370         rbtdb_serial_t                  serial;
371         /*
372          * Protected in the refcount routines.
373          * XXXJT: should we change the lock policy based on the refcount
374          * performance?
375          */
376         isc_refcount_t                  references;
377         /* Locked by database lock. */
378         isc_boolean_t                   writer;
379         isc_boolean_t                   commit_ok;
380         rbtdb_changedlist_t             changed_list;
381         rdatasetheaderlist_t            resigned_list;
382         ISC_LINK(struct rbtdb_version)  link;
383         dns_db_secure_t                 secure;
384         isc_boolean_t                   havensec3;
385         /* NSEC3 parameters */
386         dns_hash_t                      hash;
387         isc_uint8_t                     flags;
388         isc_uint16_t                    iterations;
389         isc_uint8_t                     salt_length;
390         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
391 } rbtdb_version_t;
392
393 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
394
395 typedef struct {
396         /* Unlocked. */
397         dns_db_t                        common;
398         /* Locks the data in this struct */
399 #if DNS_RBTDB_USERWLOCK
400         isc_rwlock_t                    lock;
401 #else
402         isc_mutex_t                     lock;
403 #endif
404         /* Locks the tree structure (prevents nodes appearing/disappearing) */
405         isc_rwlock_t                    tree_lock;
406         /* Locks for individual tree nodes */
407         unsigned int                    node_lock_count;
408         rbtdb_nodelock_t *              node_locks;
409         dns_rbtnode_t *                 origin_node;
410         dns_stats_t *                   rrsetstats; /* cache DB only */
411         /* Locked by lock. */
412         unsigned int                    active;
413         isc_refcount_t                  references;
414         unsigned int                    attributes;
415         rbtdb_serial_t                  current_serial;
416         rbtdb_serial_t                  least_serial;
417         rbtdb_serial_t                  next_serial;
418         rbtdb_version_t *               current_version;
419         rbtdb_version_t *               future_version;
420         rbtdb_versionlist_t             open_versions;
421         isc_task_t *                    task;
422         dns_dbnode_t                    *soanode;
423         dns_dbnode_t                    *nsnode;
424
425         /*
426          * This is a linked list used to implement the LRU cache.  There will
427          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
428          * placed on the linked list rdatasets[1].
429          */
430         rdatasetheaderlist_t            *rdatasets;
431
432         /*%
433          * Temporary storage for stale cache nodes and dynamically deleted
434          * nodes that await being cleaned up.
435          */
436         rbtnodelist_t                   *deadnodes;
437
438         /*
439          * Heaps.  These are used for TTL based expiry in a cache,
440          * or for zone resigning in a zone DB.  hmctx is the memory
441          * context to use for the heap (which differs from the main
442          * database memory context in the case of a cache).
443          */
444         isc_mem_t *                     hmctx;
445         isc_heap_t                      **heaps;
446
447         /* Locked by tree_lock. */
448         dns_rbt_t *                     tree;
449         dns_rbt_t *                     nsec;
450         dns_rbt_t *                     nsec3;
451         dns_rpz_cidr_t *                rpz_cidr;
452
453         /* Unlocked */
454         unsigned int                    quantum;
455 } dns_rbtdb_t;
456
457 #define RBTDB_ATTR_LOADED               0x01
458 #define RBTDB_ATTR_LOADING              0x02
459
460 /*%
461  * Search Context
462  */
463 typedef struct {
464         dns_rbtdb_t *           rbtdb;
465         rbtdb_version_t *       rbtversion;
466         rbtdb_serial_t          serial;
467         unsigned int            options;
468         dns_rbtnodechain_t      chain;
469         isc_boolean_t           copy_name;
470         isc_boolean_t           need_cleanup;
471         isc_boolean_t           wild;
472         dns_rbtnode_t *         zonecut;
473         rdatasetheader_t *      zonecut_rdataset;
474         rdatasetheader_t *      zonecut_sigrdataset;
475         dns_fixedname_t         zonecut_name;
476         isc_stdtime_t           now;
477 } rbtdb_search_t;
478
479 /*%
480  * Load Context
481  */
482 typedef struct {
483         dns_rbtdb_t *           rbtdb;
484         isc_stdtime_t           now;
485 } rbtdb_load_t;
486
487 static void rdataset_disassociate(dns_rdataset_t *rdataset);
488 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
489 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
490 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
491 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
492 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
493 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
494                                         dns_name_t *name,
495                                         dns_rdataset_t *neg,
496                                         dns_rdataset_t *negsig);
497 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
498                                         dns_name_t *name,
499                                         dns_rdataset_t *neg,
500                                         dns_rdataset_t *negsig);
501 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
502                                            dns_rdatasetadditional_t type,
503                                            dns_rdatatype_t qtype,
504                                            dns_acache_t *acache,
505                                            dns_zone_t **zonep,
506                                            dns_db_t **dbp,
507                                            dns_dbversion_t **versionp,
508                                            dns_dbnode_t **nodep,
509                                            dns_name_t *fname,
510                                            dns_message_t *msg,
511                                            isc_stdtime_t now);
512 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
513                                            dns_rdatasetadditional_t type,
514                                            dns_rdatatype_t qtype,
515                                            dns_acache_t *acache,
516                                            dns_zone_t *zone,
517                                            dns_db_t *db,
518                                            dns_dbversion_t *version,
519                                            dns_dbnode_t *node,
520                                            dns_name_t *fname);
521 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
522                                            dns_rdataset_t *rdataset,
523                                            dns_rdatasetadditional_t type,
524                                            dns_rdatatype_t qtype);
525 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
526                                               isc_stdtime_t now);
527 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
528                           isc_stdtime_t now);
529 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
530                           isc_boolean_t tree_locked);
531 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
532                           isc_stdtime_t now, isc_boolean_t tree_locked);
533 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
534                                   rdatasetheader_t *newheader);
535 static void prune_tree(isc_task_t *task, isc_event_t *event);
536 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
537 static void rdataset_expire(dns_rdataset_t *rdataset);
538
539 static dns_rdatasetmethods_t rdataset_methods = {
540         rdataset_disassociate,
541         rdataset_first,
542         rdataset_next,
543         rdataset_current,
544         rdataset_clone,
545         rdataset_count,
546         NULL,
547         rdataset_getnoqname,
548         NULL,
549         rdataset_getclosest,
550         rdataset_getadditional,
551         rdataset_setadditional,
552         rdataset_putadditional,
553         rdataset_settrust,
554         rdataset_expire
555 };
556
557 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
558 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
559 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
560 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
561                                  dns_rdataset_t *rdataset);
562
563 static dns_rdatasetitermethods_t rdatasetiter_methods = {
564         rdatasetiter_destroy,
565         rdatasetiter_first,
566         rdatasetiter_next,
567         rdatasetiter_current
568 };
569
570 typedef struct rbtdb_rdatasetiter {
571         dns_rdatasetiter_t              common;
572         rdatasetheader_t *              current;
573 } rbtdb_rdatasetiter_t;
574
575 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
576 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
577 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
578 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
579                                         dns_name_t *name);
580 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
581 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
582 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
583                                            dns_dbnode_t **nodep,
584                                            dns_name_t *name);
585 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
586 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
587                                           dns_name_t *name);
588
589 static dns_dbiteratormethods_t dbiterator_methods = {
590         dbiterator_destroy,
591         dbiterator_first,
592         dbiterator_last,
593         dbiterator_seek,
594         dbiterator_prev,
595         dbiterator_next,
596         dbiterator_current,
597         dbiterator_pause,
598         dbiterator_origin
599 };
600
601 #define DELETION_BATCH_MAX 64
602
603 /*
604  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
605  */
606 typedef struct rbtdb_dbiterator {
607         dns_dbiterator_t                common;
608         isc_boolean_t                   paused;
609         isc_boolean_t                   new_origin;
610         isc_rwlocktype_t                tree_locked;
611         isc_result_t                    result;
612         dns_fixedname_t                 name;
613         dns_fixedname_t                 origin;
614         dns_rbtnodechain_t              chain;
615         dns_rbtnodechain_t              nsec3chain;
616         dns_rbtnodechain_t              *current;
617         dns_rbtnode_t                   *node;
618         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
619         int                             delete;
620         isc_boolean_t                   nsec3only;
621         isc_boolean_t                   nonsec3;
622 } rbtdb_dbiterator_t;
623
624
625 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
626 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
627
628 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
629                        isc_event_t *event);
630 static void overmem(dns_db_t *db, isc_boolean_t overmem);
631 #ifdef BIND9
632 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
633 #endif
634
635 /*%
636  * 'init_count' is used to initialize 'newheader->count' which inturn
637  * is used to determine where in the cycle rrset-order cyclic starts.
638  * We don't lock this as we don't care about simultaneous updates.
639  *
640  * Note:
641  *      Both init_count and header->count can be ISC_UINT32_MAX.
642  *      The count on the returned rdataset however can't be as
643  *      that indicates that the database does not implement cyclic
644  *      processing.
645  */
646 static unsigned int init_count;
647
648 /*
649  * Locking
650  *
651  * If a routine is going to lock more than one lock in this module, then
652  * the locking must be done in the following order:
653  *
654  *      Tree Lock
655  *
656  *      Node Lock       (Only one from the set may be locked at one time by
657  *                       any caller)
658  *
659  *      Database Lock
660  *
661  * Failure to follow this hierarchy can result in deadlock.
662  */
663
664 /*
665  * Deleting Nodes
666  *
667  * For zone databases the node for the origin of the zone MUST NOT be deleted.
668  */
669
670
671 /*
672  * DB Routines
673  */
674
675 static void
676 attach(dns_db_t *source, dns_db_t **targetp) {
677         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
678
679         REQUIRE(VALID_RBTDB(rbtdb));
680
681         isc_refcount_increment(&rbtdb->references, NULL);
682
683         *targetp = source;
684 }
685
686 static void
687 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
688         dns_rbtdb_t *rbtdb = event->ev_arg;
689
690         UNUSED(task);
691
692         free_rbtdb(rbtdb, ISC_TRUE, event);
693 }
694
695 static void
696 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
697                   isc_boolean_t increment)
698 {
699         dns_rdatastatstype_t statattributes = 0;
700         dns_rdatastatstype_t base = 0;
701         dns_rdatastatstype_t type;
702
703         /* At the moment we count statistics only for cache DB */
704         INSIST(IS_CACHE(rbtdb));
705
706         if (NEGATIVE(header)) {
707                 if (NXDOMAIN(header))
708                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
709                 else {
710                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
711                         base = RBTDB_RDATATYPE_EXT(header->type);
712                 }
713         } else
714                 base = RBTDB_RDATATYPE_BASE(header->type);
715
716         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
717         if (increment)
718                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
719         else
720                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
721 }
722
723 static void
724 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
725         int idx;
726         isc_heap_t *heap;
727         dns_ttl_t oldttl;
728
729         oldttl = header->rdh_ttl;
730         header->rdh_ttl = newttl;
731
732         if (!IS_CACHE(rbtdb))
733                 return;
734
735         /*
736          * It's possible the rbtdb is not a cache.  If this is the case,
737          * we will not have a heap, and we move on.  If we do, though,
738          * we might need to adjust things.
739          */
740         if (header->heap_index == 0 || newttl == oldttl)
741                 return;
742         idx = header->node->locknum;
743         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
744             return;
745         heap = rbtdb->heaps[idx];
746
747         if (newttl < oldttl)
748                 isc_heap_increased(heap, header->heap_index);
749         else
750                 isc_heap_decreased(heap, header->heap_index);
751 }
752
753 /*%
754  * These functions allow the heap code to rank the priority of each
755  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
756  */
757 static isc_boolean_t
758 ttl_sooner(void *v1, void *v2) {
759         rdatasetheader_t *h1 = v1;
760         rdatasetheader_t *h2 = v2;
761
762         if (h1->rdh_ttl < h2->rdh_ttl)
763                 return (ISC_TRUE);
764         return (ISC_FALSE);
765 }
766
767 static isc_boolean_t
768 resign_sooner(void *v1, void *v2) {
769         rdatasetheader_t *h1 = v1;
770         rdatasetheader_t *h2 = v2;
771
772         if (h1->resign < h2->resign)
773                 return (ISC_TRUE);
774         return (ISC_FALSE);
775 }
776
777 /*%
778  * This function sets the heap index into the header.
779  */
780 static void
781 set_index(void *what, unsigned int index) {
782         rdatasetheader_t *h = what;
783
784         h->heap_index = index;
785 }
786
787 /*%
788  * Work out how many nodes can be deleted in the time between two
789  * requests to the nameserver.  Smooth the resulting number and use it
790  * as a estimate for the number of nodes to be deleted in the next
791  * iteration.
792  */
793 static unsigned int
794 adjust_quantum(unsigned int old, isc_time_t *start) {
795         unsigned int pps = dns_pps;     /* packets per second */
796         unsigned int interval;
797         isc_uint64_t usecs;
798         isc_time_t end;
799         unsigned int new;
800
801         if (pps < 100)
802                 pps = 100;
803         isc_time_now(&end);
804
805         interval = 1000000 / pps;       /* interval in usec */
806         if (interval == 0)
807                 interval = 1;
808         usecs = isc_time_microdiff(&end, start);
809         if (usecs == 0) {
810                 /*
811                  * We were unable to measure the amount of time taken.
812                  * Double the nodes deleted next time.
813                  */
814                 old *= 2;
815                 if (old > 1000)
816                         old = 1000;
817                 return (old);
818         }
819         new = old * interval;
820         new /= (unsigned int)usecs;
821         if (new == 0)
822                 new = 1;
823         else if (new > 1000)
824                 new = 1000;
825
826         /* Smooth */
827         new = (new + old * 3) / 4;
828
829         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
830                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
831
832         return (new);
833 }
834
835 static void
836 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
837         unsigned int i;
838         isc_ondestroy_t ondest;
839         isc_result_t result;
840         char buf[DNS_NAME_FORMATSIZE];
841         dns_rbt_t **treep;
842         isc_time_t start;
843
844         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
845                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
846
847         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
848         REQUIRE(rbtdb->future_version == NULL);
849
850         if (rbtdb->current_version != NULL) {
851                 unsigned int refs;
852
853                 isc_refcount_decrement(&rbtdb->current_version->references,
854                                        &refs);
855                 INSIST(refs == 0);
856                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
857                 isc_refcount_destroy(&rbtdb->current_version->references);
858                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
859                             sizeof(rbtdb_version_t));
860         }
861
862         /*
863          * We assume the number of remaining dead nodes is reasonably small;
864          * the overhead of unlinking all nodes here should be negligible.
865          */
866         for (i = 0; i < rbtdb->node_lock_count; i++) {
867                 dns_rbtnode_t *node;
868
869                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
870                 while (node != NULL) {
871                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
872                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
873                 }
874         }
875
876         if (event == NULL)
877                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
878
879         for (;;) {
880                 /*
881                  * pick the next tree to (start to) destroy
882                  */
883                 treep = &rbtdb->tree;
884                 if (*treep == NULL) {
885                         treep = &rbtdb->nsec;
886                         if (*treep == NULL) {
887                                 treep = &rbtdb->nsec3;
888                                 /*
889                                  * we're finished after clear cutting
890                                  */
891                                 if (*treep == NULL)
892                                         break;
893                         }
894                 }
895
896                 isc_time_now(&start);
897                 result = dns_rbt_destroy2(treep, rbtdb->quantum);
898                 if (result == ISC_R_QUOTA) {
899                         INSIST(rbtdb->task != NULL);
900                         if (rbtdb->quantum != 0)
901                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
902                                                                 &start);
903                         if (event == NULL)
904                                 event = isc_event_allocate(rbtdb->common.mctx,
905                                                            NULL,
906                                                          DNS_EVENT_FREESTORAGE,
907                                                            free_rbtdb_callback,
908                                                            rbtdb,
909                                                            sizeof(isc_event_t));
910                         if (event == NULL)
911                                 continue;
912                         isc_task_send(rbtdb->task, &event);
913                         return;
914                 }
915                 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
916         }
917
918         if (event != NULL)
919                 isc_event_free(&event);
920         if (log) {
921                 if (dns_name_dynamic(&rbtdb->common.origin))
922                         dns_name_format(&rbtdb->common.origin, buf,
923                                         sizeof(buf));
924                 else
925                         strcpy(buf, "<UNKNOWN>");
926                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
927                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
928                               "done free_rbtdb(%s)", buf);
929         }
930         if (dns_name_dynamic(&rbtdb->common.origin))
931                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
932         for (i = 0; i < rbtdb->node_lock_count; i++) {
933                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
934                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
935         }
936
937         /*
938          * Clean up LRU / re-signing order lists.
939          */
940         if (rbtdb->rdatasets != NULL) {
941                 for (i = 0; i < rbtdb->node_lock_count; i++)
942                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
943                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
944                             rbtdb->node_lock_count *
945                             sizeof(rdatasetheaderlist_t));
946         }
947         /*
948          * Clean up dead node buckets.
949          */
950         if (rbtdb->deadnodes != NULL) {
951                 for (i = 0; i < rbtdb->node_lock_count; i++)
952                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
953                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
954                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
955         }
956         /*
957          * Clean up heap objects.
958          */
959         if (rbtdb->heaps != NULL) {
960                 for (i = 0; i < rbtdb->node_lock_count; i++)
961                         isc_heap_destroy(&rbtdb->heaps[i]);
962                 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
963                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
964         }
965
966         if (rbtdb->rrsetstats != NULL)
967                 dns_stats_detach(&rbtdb->rrsetstats);
968
969 #ifdef BIND9
970         if (rbtdb->rpz_cidr != NULL)
971                 dns_rpz_cidr_free(&rbtdb->rpz_cidr);
972 #endif
973
974         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
975                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
976         isc_rwlock_destroy(&rbtdb->tree_lock);
977         isc_refcount_destroy(&rbtdb->references);
978         if (rbtdb->task != NULL)
979                 isc_task_detach(&rbtdb->task);
980
981         RBTDB_DESTROYLOCK(&rbtdb->lock);
982         rbtdb->common.magic = 0;
983         rbtdb->common.impmagic = 0;
984         ondest = rbtdb->common.ondest;
985         isc_mem_detach(&rbtdb->hmctx);
986         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
987         isc_ondestroy_notify(&ondest, rbtdb);
988 }
989
990 static inline void
991 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
992         isc_boolean_t want_free = ISC_FALSE;
993         unsigned int i;
994         unsigned int inactive = 0;
995
996         /* XXX check for open versions here */
997
998         if (rbtdb->soanode != NULL)
999                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1000         if (rbtdb->nsnode != NULL)
1001                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1002
1003         /*
1004          * Even though there are no external direct references, there still
1005          * may be nodes in use.
1006          */
1007         for (i = 0; i < rbtdb->node_lock_count; i++) {
1008                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1009                 rbtdb->node_locks[i].exiting = ISC_TRUE;
1010                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1011                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1012                     == 0) {
1013                         inactive++;
1014                 }
1015         }
1016
1017         if (inactive != 0) {
1018                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1019                 rbtdb->active -= inactive;
1020                 if (rbtdb->active == 0)
1021                         want_free = ISC_TRUE;
1022                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1023                 if (want_free) {
1024                         char buf[DNS_NAME_FORMATSIZE];
1025                         if (dns_name_dynamic(&rbtdb->common.origin))
1026                                 dns_name_format(&rbtdb->common.origin, buf,
1027                                                 sizeof(buf));
1028                         else
1029                                 strcpy(buf, "<UNKNOWN>");
1030                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1031                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1032                                       "calling free_rbtdb(%s)", buf);
1033                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1034                 }
1035         }
1036 }
1037
1038 static void
1039 detach(dns_db_t **dbp) {
1040         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1041         unsigned int refs;
1042
1043         REQUIRE(VALID_RBTDB(rbtdb));
1044
1045         isc_refcount_decrement(&rbtdb->references, &refs);
1046
1047         if (refs == 0)
1048                 maybe_free_rbtdb(rbtdb);
1049
1050         *dbp = NULL;
1051 }
1052
1053 static void
1054 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1055         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1056         rbtdb_version_t *version;
1057         unsigned int refs;
1058
1059         REQUIRE(VALID_RBTDB(rbtdb));
1060
1061         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1062         version = rbtdb->current_version;
1063         isc_refcount_increment(&version->references, &refs);
1064         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1065
1066         *versionp = (dns_dbversion_t *)version;
1067 }
1068
1069 static inline rbtdb_version_t *
1070 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1071                  unsigned int references, isc_boolean_t writer)
1072 {
1073         isc_result_t result;
1074         rbtdb_version_t *version;
1075
1076         version = isc_mem_get(mctx, sizeof(*version));
1077         if (version == NULL)
1078                 return (NULL);
1079         version->serial = serial;
1080         result = isc_refcount_init(&version->references, references);
1081         if (result != ISC_R_SUCCESS) {
1082                 isc_mem_put(mctx, version, sizeof(*version));
1083                 return (NULL);
1084         }
1085         version->writer = writer;
1086         version->commit_ok = ISC_FALSE;
1087         ISC_LIST_INIT(version->changed_list);
1088         ISC_LIST_INIT(version->resigned_list);
1089         ISC_LINK_INIT(version, link);
1090
1091         return (version);
1092 }
1093
1094 static isc_result_t
1095 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1096         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1097         rbtdb_version_t *version;
1098
1099         REQUIRE(VALID_RBTDB(rbtdb));
1100         REQUIRE(versionp != NULL && *versionp == NULL);
1101         REQUIRE(rbtdb->future_version == NULL);
1102
1103         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1104         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1105         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1106                                    ISC_TRUE);
1107         if (version != NULL) {
1108                 version->commit_ok = ISC_TRUE;
1109                 version->secure = rbtdb->current_version->secure;
1110                 version->havensec3 = rbtdb->current_version->havensec3;
1111                 if (version->havensec3) {
1112                         version->flags = rbtdb->current_version->flags;
1113                         version->iterations =
1114                                 rbtdb->current_version->iterations;
1115                         version->hash = rbtdb->current_version->hash;
1116                         version->salt_length =
1117                                 rbtdb->current_version->salt_length;
1118                         memcpy(version->salt, rbtdb->current_version->salt,
1119                                version->salt_length);
1120                 } else {
1121                         version->flags = 0;
1122                         version->iterations = 0;
1123                         version->hash = 0;
1124                         version->salt_length = 0;
1125                         memset(version->salt, 0, sizeof(version->salt));
1126                 }
1127                 rbtdb->next_serial++;
1128                 rbtdb->future_version = version;
1129         }
1130         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1131
1132         if (version == NULL)
1133                 return (ISC_R_NOMEMORY);
1134
1135         *versionp = version;
1136
1137         return (ISC_R_SUCCESS);
1138 }
1139
1140 static void
1141 attachversion(dns_db_t *db, dns_dbversion_t *source,
1142               dns_dbversion_t **targetp)
1143 {
1144         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1145         rbtdb_version_t *rbtversion = source;
1146         unsigned int refs;
1147
1148         REQUIRE(VALID_RBTDB(rbtdb));
1149
1150         isc_refcount_increment(&rbtversion->references, &refs);
1151         INSIST(refs > 1);
1152
1153         *targetp = rbtversion;
1154 }
1155
1156 static rbtdb_changed_t *
1157 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1158             dns_rbtnode_t *node)
1159 {
1160         rbtdb_changed_t *changed;
1161         unsigned int refs;
1162
1163         /*
1164          * Caller must be holding the node lock if its reference must be
1165          * protected by the lock.
1166          */
1167
1168         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1169
1170         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1171
1172         REQUIRE(version->writer);
1173
1174         if (changed != NULL) {
1175                 dns_rbtnode_refincrement(node, &refs);
1176                 INSIST(refs != 0);
1177                 changed->node = node;
1178                 changed->dirty = ISC_FALSE;
1179                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1180         } else
1181                 version->commit_ok = ISC_FALSE;
1182
1183         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1184
1185         return (changed);
1186 }
1187
1188 static void
1189 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1190                  acachectl_t *array)
1191 {
1192         unsigned int count;
1193         unsigned int i;
1194         unsigned char *raw;     /* RDATASLAB */
1195
1196         /*
1197          * The caller must be holding the corresponding node lock.
1198          */
1199
1200         if (array == NULL)
1201                 return;
1202
1203         raw = (unsigned char *)header + sizeof(*header);
1204         count = raw[0] * 256 + raw[1];
1205
1206         /*
1207          * Sanity check: since an additional cache entry has a reference to
1208          * the original DB node (in the callback arg), there should be no
1209          * acache entries when the node can be freed.
1210          */
1211         for (i = 0; i < count; i++)
1212                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1213
1214         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1215 }
1216
1217 static inline void
1218 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1219
1220         if (dns_name_dynamic(&(*noqname)->name))
1221                 dns_name_free(&(*noqname)->name, mctx);
1222         if ((*noqname)->neg != NULL)
1223                 isc_mem_put(mctx, (*noqname)->neg,
1224                             dns_rdataslab_size((*noqname)->neg, 0));
1225         if ((*noqname)->negsig != NULL)
1226                 isc_mem_put(mctx, (*noqname)->negsig,
1227                             dns_rdataslab_size((*noqname)->negsig, 0));
1228         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1229         *noqname = NULL;
1230 }
1231
1232 static inline void
1233 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1234 {
1235         ISC_LINK_INIT(h, link);
1236         h->heap_index = 0;
1237
1238 #if TRACE_HEADER
1239         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1240                 fprintf(stderr, "initialized header: %p\n", h);
1241 #else
1242         UNUSED(rbtdb);
1243 #endif
1244 }
1245
1246 static inline rdatasetheader_t *
1247 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1248 {
1249         rdatasetheader_t *h;
1250
1251         h = isc_mem_get(mctx, sizeof(*h));
1252         if (h == NULL)
1253                 return (NULL);
1254
1255 #if TRACE_HEADER
1256         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1257                 fprintf(stderr, "allocated header: %p\n", h);
1258 #endif
1259         init_rdataset(rbtdb, h);
1260         return (h);
1261 }
1262
1263 static inline void
1264 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1265 {
1266         unsigned int size;
1267         int idx;
1268
1269         if (EXISTS(rdataset) &&
1270             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1271                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1272         }
1273
1274         idx = rdataset->node->locknum;
1275         if (ISC_LINK_LINKED(rdataset, link)) {
1276                 INSIST(IS_CACHE(rbtdb));
1277                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1278         }
1279         if (rdataset->heap_index != 0)
1280                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1281         rdataset->heap_index = 0;
1282
1283         if (rdataset->noqname != NULL)
1284                 free_noqname(mctx, &rdataset->noqname);
1285         if (rdataset->closest != NULL)
1286                 free_noqname(mctx, &rdataset->closest);
1287
1288         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1289         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1290
1291         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1292                 size = sizeof(*rdataset);
1293         else
1294                 size = dns_rdataslab_size((unsigned char *)rdataset,
1295                                           sizeof(*rdataset));
1296         isc_mem_put(mctx, rdataset, size);
1297 }
1298
1299 static inline void
1300 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1301         rdatasetheader_t *header, *dcurrent;
1302         isc_boolean_t make_dirty = ISC_FALSE;
1303
1304         /*
1305          * Caller must hold the node lock.
1306          */
1307
1308         /*
1309          * We set the IGNORE attribute on rdatasets with serial number
1310          * 'serial'.  When the reference count goes to zero, these rdatasets
1311          * will be cleaned up; until that time, they will be ignored.
1312          */
1313         for (header = node->data; header != NULL; header = header->next) {
1314                 if (header->serial == serial) {
1315                         header->attributes |= RDATASET_ATTR_IGNORE;
1316                         make_dirty = ISC_TRUE;
1317                 }
1318                 for (dcurrent = header->down;
1319                      dcurrent != NULL;
1320                      dcurrent = dcurrent->down) {
1321                         if (dcurrent->serial == serial) {
1322                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1323                                 make_dirty = ISC_TRUE;
1324                         }
1325                 }
1326         }
1327         if (make_dirty)
1328                 node->dirty = 1;
1329 }
1330
1331 static inline void
1332 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1333 {
1334         rdatasetheader_t *d, *down_next;
1335
1336         for (d = top->down; d != NULL; d = down_next) {
1337                 down_next = d->down;
1338                 free_rdataset(rbtdb, mctx, d);
1339         }
1340         top->down = NULL;
1341 }
1342
1343 static inline void
1344 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1345         rdatasetheader_t *current, *top_prev, *top_next;
1346         isc_mem_t *mctx = rbtdb->common.mctx;
1347
1348         /*
1349          * Caller must be holding the node lock.
1350          */
1351
1352         top_prev = NULL;
1353         for (current = node->data; current != NULL; current = top_next) {
1354                 top_next = current->next;
1355                 clean_stale_headers(rbtdb, mctx, current);
1356                 /*
1357                  * If current is nonexistent or stale, we can clean it up.
1358                  */
1359                 if ((current->attributes &
1360                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1361                         if (top_prev != NULL)
1362                                 top_prev->next = current->next;
1363                         else
1364                                 node->data = current->next;
1365                         free_rdataset(rbtdb, mctx, current);
1366                 } else
1367                         top_prev = current;
1368         }
1369         node->dirty = 0;
1370 }
1371
1372 static inline void
1373 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1374                 rbtdb_serial_t least_serial)
1375 {
1376         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1377         rdatasetheader_t *top_prev, *top_next;
1378         isc_mem_t *mctx = rbtdb->common.mctx;
1379         isc_boolean_t still_dirty = ISC_FALSE;
1380
1381         /*
1382          * Caller must be holding the node lock.
1383          */
1384         REQUIRE(least_serial != 0);
1385
1386         top_prev = NULL;
1387         for (current = node->data; current != NULL; current = top_next) {
1388                 top_next = current->next;
1389
1390                 /*
1391                  * First, we clean up any instances of multiple rdatasets
1392                  * with the same serial number, or that have the IGNORE
1393                  * attribute.
1394                  */
1395                 dparent = current;
1396                 for (dcurrent = current->down;
1397                      dcurrent != NULL;
1398                      dcurrent = down_next) {
1399                         down_next = dcurrent->down;
1400                         INSIST(dcurrent->serial <= dparent->serial);
1401                         if (dcurrent->serial == dparent->serial ||
1402                             IGNORE(dcurrent)) {
1403                                 if (down_next != NULL)
1404                                         down_next->next = dparent;
1405                                 dparent->down = down_next;
1406                                 free_rdataset(rbtdb, mctx, dcurrent);
1407                         } else
1408                                 dparent = dcurrent;
1409                 }
1410
1411                 /*
1412                  * We've now eliminated all IGNORE datasets with the possible
1413                  * exception of current, which we now check.
1414                  */
1415                 if (IGNORE(current)) {
1416                         down_next = current->down;
1417                         if (down_next == NULL) {
1418                                 if (top_prev != NULL)
1419                                         top_prev->next = current->next;
1420                                 else
1421                                         node->data = current->next;
1422                                 free_rdataset(rbtdb, mctx, current);
1423                                 /*
1424                                  * current no longer exists, so we can
1425                                  * just continue with the loop.
1426                                  */
1427                                 continue;
1428                         } else {
1429                                 /*
1430                                  * Pull up current->down, making it the new
1431                                  * current.
1432                                  */
1433                                 if (top_prev != NULL)
1434                                         top_prev->next = down_next;
1435                                 else
1436                                         node->data = down_next;
1437                                 down_next->next = top_next;
1438                                 free_rdataset(rbtdb, mctx, current);
1439                                 current = down_next;
1440                         }
1441                 }
1442
1443                 /*
1444                  * We now try to find the first down node less than the
1445                  * least serial.
1446                  */
1447                 dparent = current;
1448                 for (dcurrent = current->down;
1449                      dcurrent != NULL;
1450                      dcurrent = down_next) {
1451                         down_next = dcurrent->down;
1452                         if (dcurrent->serial < least_serial)
1453                                 break;
1454                         dparent = dcurrent;
1455                 }
1456
1457                 /*
1458                  * If there is a such an rdataset, delete it and any older
1459                  * versions.
1460                  */
1461                 if (dcurrent != NULL) {
1462                         do {
1463                                 down_next = dcurrent->down;
1464                                 INSIST(dcurrent->serial <= least_serial);
1465                                 free_rdataset(rbtdb, mctx, dcurrent);
1466                                 dcurrent = down_next;
1467                         } while (dcurrent != NULL);
1468                         dparent->down = NULL;
1469                 }
1470
1471                 /*
1472                  * Note.  The serial number of 'current' might be less than
1473                  * least_serial too, but we cannot delete it because it is
1474                  * the most recent version, unless it is a NONEXISTENT
1475                  * rdataset.
1476                  */
1477                 if (current->down != NULL) {
1478                         still_dirty = ISC_TRUE;
1479                         top_prev = current;
1480                 } else {
1481                         /*
1482                          * If this is a NONEXISTENT rdataset, we can delete it.
1483                          */
1484                         if (NONEXISTENT(current)) {
1485                                 if (top_prev != NULL)
1486                                         top_prev->next = current->next;
1487                                 else
1488                                         node->data = current->next;
1489                                 free_rdataset(rbtdb, mctx, current);
1490                         } else
1491                                 top_prev = current;
1492                 }
1493         }
1494         if (!still_dirty)
1495                 node->dirty = 0;
1496 }
1497
1498 static void
1499 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1500 {
1501         dns_rbtnode_t *nsecnode;
1502         dns_fixedname_t fname;
1503         dns_name_t *name;
1504         isc_result_t result = ISC_R_UNEXPECTED;
1505
1506         INSIST(!ISC_LINK_LINKED(node, deadlink));
1507
1508         switch (node->nsec) {
1509         case DNS_RBT_NSEC_NORMAL:
1510 #ifdef BIND9
1511                 if (rbtdb->rpz_cidr != NULL) {
1512                         dns_fixedname_init(&fname);
1513                         name = dns_fixedname_name(&fname);
1514                         dns_rbt_fullnamefromnode(node, name);
1515                         dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1516                 }
1517 #endif
1518                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1519                 break;
1520         case DNS_RBT_NSEC_HAS_NSEC:
1521                 dns_fixedname_init(&fname);
1522                 name = dns_fixedname_name(&fname);
1523                 dns_rbt_fullnamefromnode(node, name);
1524                 /*
1525                  * Delete the corresponding node from the auxiliary NSEC
1526                  * tree before deleting from the main tree.
1527                  */
1528                 nsecnode = NULL;
1529                 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1530                                           NULL, DNS_RBTFIND_EMPTYDATA,
1531                                           NULL, NULL);
1532                 if (result != ISC_R_SUCCESS) {
1533                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1534                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1535                                       "delete_node: "
1536                                       "dns_rbt_findnode(nsec): %s",
1537                                       isc_result_totext(result));
1538                 } else {
1539                         result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1540                                                     ISC_FALSE);
1541                         if (result != ISC_R_SUCCESS) {
1542                                 isc_log_write(dns_lctx,
1543                                               DNS_LOGCATEGORY_DATABASE,
1544                                               DNS_LOGMODULE_CACHE,
1545                                               ISC_LOG_WARNING,
1546                                               "delete_nsecnode(): "
1547                                               "dns_rbt_deletenode(nsecnode): %s",
1548                                               isc_result_totext(result));
1549                         }
1550                 }
1551                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1552 #ifdef BIND9
1553                 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1554 #endif
1555                 break;
1556         case DNS_RBT_NSEC_NSEC:
1557                 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1558                 break;
1559         case DNS_RBT_NSEC_NSEC3:
1560                 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1561                 break;
1562         }
1563         if (result != ISC_R_SUCCESS) {
1564                 isc_log_write(dns_lctx,
1565                               DNS_LOGCATEGORY_DATABASE,
1566                               DNS_LOGMODULE_CACHE,
1567                               ISC_LOG_WARNING,
1568                               "delete_nsecnode(): "
1569                               "dns_rbt_deletenode: %s",
1570                               isc_result_totext(result));
1571         }
1572 }
1573
1574 /*%
1575  * Clean up dead nodes.  These are nodes which have no references, and
1576  * have no data.  They are dead but we could not or chose not to delete
1577  * them when we deleted all the data at that node because we did not want
1578  * to wait for the tree write lock.
1579  *
1580  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1581  */
1582 static void
1583 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1584         dns_rbtnode_t *node;
1585         int count = 10;         /* XXXJT: should be adjustable */
1586
1587         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1588         while (node != NULL && count > 0) {
1589                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1590
1591                 /*
1592                  * Since we're holding a tree write lock, it should be
1593                  * impossible for this node to be referenced by others.
1594                  */
1595                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1596                        node->data == NULL);
1597
1598                 delete_node(rbtdb, node);
1599
1600                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1601                 count--;
1602         }
1603 }
1604
1605 /*
1606  * Caller must be holding the node lock if its reference must be protected
1607  * by the lock.
1608  */
1609 static inline void
1610 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1611         unsigned int lockrefs, noderefs;
1612         isc_refcount_t *lockref;
1613
1614         dns_rbtnode_refincrement0(node, &noderefs);
1615         if (noderefs == 1) {    /* this is the first reference to the node */
1616                 lockref = &rbtdb->node_locks[node->locknum].references;
1617                 isc_refcount_increment0(lockref, &lockrefs);
1618                 INSIST(lockrefs != 0);
1619         }
1620         INSIST(noderefs != 0);
1621 }
1622
1623 /*
1624  * This function is assumed to be called when a node is newly referenced
1625  * and can be in the deadnode list.  In that case the node must be retrieved
1626  * from the list because it is going to be used.  In addition, if the caller
1627  * happens to hold a write lock on the tree, it's a good chance to purge dead
1628  * nodes.
1629  * Note: while a new reference is gained in multiple places, there are only very
1630  * few cases where the node can be in the deadnode list (only empty nodes can
1631  * have been added to the list).
1632  */
1633 static inline void
1634 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1635                 isc_rwlocktype_t treelocktype)
1636 {
1637         isc_boolean_t need_relock = ISC_FALSE;
1638
1639         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1640         new_reference(rbtdb, node);
1641
1642         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1643                       isc_rwlocktype_read);
1644         if (ISC_LINK_LINKED(node, deadlink))
1645                 need_relock = ISC_TRUE;
1646         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1647                  treelocktype == isc_rwlocktype_write)
1648                 need_relock = ISC_TRUE;
1649         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1650                         isc_rwlocktype_read);
1651         if (need_relock) {
1652                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1653                               isc_rwlocktype_write);
1654                 if (ISC_LINK_LINKED(node, deadlink))
1655                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1656                                         node, deadlink);
1657                 if (treelocktype == isc_rwlocktype_write)
1658                         cleanup_dead_nodes(rbtdb, node->locknum);
1659                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1660                                 isc_rwlocktype_write);
1661         }
1662
1663         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1664 }
1665
1666 /*
1667  * Caller must be holding the node lock; either the "strong", read or write
1668  * lock.  Note that the lock must be held even when node references are
1669  * atomically modified; in that case the decrement operation itself does not
1670  * have to be protected, but we must avoid a race condition where multiple
1671  * threads are decreasing the reference to zero simultaneously and at least
1672  * one of them is going to free the node.
1673  * This function returns ISC_TRUE if and only if the node reference decreases
1674  * to zero.
1675  */
1676 static isc_boolean_t
1677 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1678                     rbtdb_serial_t least_serial,
1679                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1680                     isc_boolean_t pruning)
1681 {
1682         isc_result_t result;
1683         isc_boolean_t write_locked;
1684         rbtdb_nodelock_t *nodelock;
1685         unsigned int refs, nrefs;
1686         int bucket = node->locknum;
1687         isc_boolean_t no_reference;
1688
1689         nodelock = &rbtdb->node_locks[bucket];
1690
1691         /* Handle easy and typical case first. */
1692         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1693                 dns_rbtnode_refdecrement(node, &nrefs);
1694                 INSIST((int)nrefs >= 0);
1695                 if (nrefs == 0) {
1696                         isc_refcount_decrement(&nodelock->references, &refs);
1697                         INSIST((int)refs >= 0);
1698                 }
1699                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1700         }
1701
1702         /* Upgrade the lock? */
1703         if (nlock == isc_rwlocktype_read) {
1704                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1705                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1706         }
1707         dns_rbtnode_refdecrement(node, &nrefs);
1708         INSIST((int)nrefs >= 0);
1709         if (nrefs > 0) {
1710                 /* Restore the lock? */
1711                 if (nlock == isc_rwlocktype_read)
1712                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1713                 return (ISC_FALSE);
1714         }
1715
1716         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1717                 if (IS_CACHE(rbtdb))
1718                         clean_cache_node(rbtdb, node);
1719                 else {
1720                         if (least_serial == 0) {
1721                                 /*
1722                                  * Caller doesn't know the least serial.
1723                                  * Get it.
1724                                  */
1725                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1726                                 least_serial = rbtdb->least_serial;
1727                                 RBTDB_UNLOCK(&rbtdb->lock,
1728                                              isc_rwlocktype_read);
1729                         }
1730                         clean_zone_node(rbtdb, node, least_serial);
1731                 }
1732         }
1733
1734         isc_refcount_decrement(&nodelock->references, &refs);
1735         INSIST((int)refs >= 0);
1736
1737         /*
1738          * XXXDCL should this only be done for cache zones?
1739          */
1740         if (node->data != NULL || node->down != NULL) {
1741                 /* Restore the lock? */
1742                 if (nlock == isc_rwlocktype_read)
1743                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1744                 return (ISC_TRUE);
1745         }
1746
1747         /*
1748          * Attempt to switch to a write lock on the tree.  If this fails,
1749          * we will add this node to a linked list of nodes in this locking
1750          * bucket which we will free later.
1751          */
1752         if (tlock != isc_rwlocktype_write) {
1753                 /*
1754                  * Locking hierarchy notwithstanding, we don't need to free
1755                  * the node lock before acquiring the tree write lock because
1756                  * we only do a trylock.
1757                  */
1758                 if (tlock == isc_rwlocktype_read)
1759                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1760                 else
1761                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1762                                                     isc_rwlocktype_write);
1763                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1764                               result == ISC_R_LOCKBUSY);
1765
1766                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1767         } else
1768                 write_locked = ISC_TRUE;
1769
1770         no_reference = ISC_TRUE;
1771         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1772                 /*
1773                  * We can now delete the node if the reference counter is
1774                  * zero.  This should be typically the case, but a different
1775                  * thread may still gain a (new) reference just before the
1776                  * current thread locks the tree (e.g., in findnode()).
1777                  */
1778
1779                 /*
1780                  * If this node is the only one in the level it's in, deleting
1781                  * this node may recursively make its parent the only node in
1782                  * the parent level; if so, and if no one is currently using
1783                  * the parent node, this is almost the only opportunity to
1784                  * clean it up.  But the recursive cleanup is not that trivial
1785                  * since the child and parent may be in different lock buckets,
1786                  * which would cause a lock order reversal problem.  To avoid
1787                  * the trouble, we'll dispatch a separate event for batch
1788                  * cleaning.  We need to check whether we're deleting the node
1789                  * as a result of pruning to avoid infinite dispatching.
1790                  * Note: pruning happens only when a task has been set for the
1791                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1792                  * it's their responsibility to purge stale leaves (e.g. by
1793                  * periodic walk-through).
1794                  */
1795                 if (!pruning && node->parent != NULL &&
1796                     node->parent->down == node && node->left == NULL &&
1797                     node->right == NULL && rbtdb->task != NULL) {
1798                         isc_event_t *ev;
1799                         dns_db_t *db;
1800
1801                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1802                                                 DNS_EVENT_RBTPRUNE,
1803                                                 prune_tree, node,
1804                                                 sizeof(isc_event_t));
1805                         if (ev != NULL) {
1806                                 new_reference(rbtdb, node);
1807                                 db = NULL;
1808                                 attach((dns_db_t *)rbtdb, &db);
1809                                 ev->ev_sender = db;
1810                                 isc_task_send(rbtdb->task, &ev);
1811                                 no_reference = ISC_FALSE;
1812                         } else {
1813                                 /*
1814                                  * XXX: this is a weird situation.  We could
1815                                  * ignore this error case, but then the stale
1816                                  * node will unlikely be purged except via a
1817                                  * rare condition such as manual cleanup.  So
1818                                  * we queue it in the deadnodes list, hoping
1819                                  * the memory shortage is temporary and the node
1820                                  * will be deleted later.
1821                                  */
1822                                 isc_log_write(dns_lctx,
1823                                               DNS_LOGCATEGORY_DATABASE,
1824                                               DNS_LOGMODULE_CACHE,
1825                                               ISC_LOG_INFO,
1826                                               "decrement_reference: failed to "
1827                                               "allocate pruning event");
1828                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1829                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1830                                                 deadlink);
1831                         }
1832                 } else {
1833                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1834                                 char printname[DNS_NAME_FORMATSIZE];
1835
1836                                 isc_log_write(dns_lctx,
1837                                               DNS_LOGCATEGORY_DATABASE,
1838                                               DNS_LOGMODULE_CACHE,
1839                                               ISC_LOG_DEBUG(1),
1840                                               "decrement_reference: "
1841                                               "delete from rbt: %p %s",
1842                                               node,
1843                                               dns_rbt_formatnodename(node,
1844                                                         printname,
1845                                                         sizeof(printname)));
1846                         }
1847
1848                         delete_node(rbtdb, node);
1849                 }
1850         } else if (dns_rbtnode_refcurrent(node) == 0) {
1851                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1852                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1853         } else
1854                 no_reference = ISC_FALSE;
1855
1856         /* Restore the lock? */
1857         if (nlock == isc_rwlocktype_read)
1858                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1859
1860         /*
1861          * Relock a read lock, or unlock the write lock if no lock was held.
1862          */
1863         if (tlock == isc_rwlocktype_none)
1864                 if (write_locked)
1865                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1866
1867         if (tlock == isc_rwlocktype_read)
1868                 if (write_locked)
1869                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1870
1871         return (no_reference);
1872 }
1873
1874 /*
1875  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1876  * case, the number of iteration is the number of tree levels, which is at
1877  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1878  * should be much smaller (only a few times), and even the worst case would be
1879  * acceptable for a single event.
1880  */
1881 static void
1882 prune_tree(isc_task_t *task, isc_event_t *event) {
1883         dns_rbtdb_t *rbtdb = event->ev_sender;
1884         dns_rbtnode_t *node = event->ev_arg;
1885         dns_rbtnode_t *parent;
1886         unsigned int locknum;
1887
1888         UNUSED(task);
1889
1890         isc_event_free(&event);
1891
1892         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1893         locknum = node->locknum;
1894         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1895         do {
1896                 parent = node->parent;
1897                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1898                                     isc_rwlocktype_write, ISC_TRUE);
1899
1900                 if (parent != NULL && parent->down == NULL) {
1901                         /*
1902                          * node was the only down child of the parent and has
1903                          * just been removed.  We'll then need to examine the
1904                          * parent.  Keep the lock if possible; otherwise,
1905                          * release the old lock and acquire one for the parent.
1906                          */
1907                         if (parent->locknum != locknum) {
1908                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1909                                             isc_rwlocktype_write);
1910                                 locknum = parent->locknum;
1911                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1912                                           isc_rwlocktype_write);
1913                         }
1914
1915                         /*
1916                          * We need to gain a reference to the node before
1917                          * decrementing it in the next iteration.  In addition,
1918                          * if the node is in the dead-nodes list, extract it
1919                          * from the list beforehand as we do in
1920                          * reactivate_node().
1921                          */
1922                         new_reference(rbtdb, parent);
1923                         if (ISC_LINK_LINKED(parent, deadlink)) {
1924                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1925                                                 parent, deadlink);
1926                         }
1927                 } else
1928                         parent = NULL;
1929
1930                 node = parent;
1931         } while (node != NULL);
1932         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1933         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1934
1935         detach((dns_db_t **)&rbtdb);
1936 }
1937
1938 static inline void
1939 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1940                    rbtdb_changedlist_t *cleanup_list)
1941 {
1942         /*
1943          * Caller must be holding the database lock.
1944          */
1945
1946         rbtdb->least_serial = version->serial;
1947         *cleanup_list = version->changed_list;
1948         ISC_LIST_INIT(version->changed_list);
1949 }
1950
1951 static inline void
1952 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1953         rbtdb_changed_t *changed, *next_changed;
1954
1955         /*
1956          * If the changed record is dirty, then
1957          * an update created multiple versions of
1958          * a given rdataset.  We keep this list
1959          * until we're the least open version, at
1960          * which point it's safe to get rid of any
1961          * older versions.
1962          *
1963          * If the changed record isn't dirty, then
1964          * we don't need it anymore since we're
1965          * committing and not rolling back.
1966          *
1967          * The caller must be holding the database lock.
1968          */
1969         for (changed = HEAD(version->changed_list);
1970              changed != NULL;
1971              changed = next_changed) {
1972                 next_changed = NEXT(changed, link);
1973                 if (!changed->dirty) {
1974                         UNLINK(version->changed_list,
1975                                changed, link);
1976                         APPEND(*cleanup_list,
1977                                changed, link);
1978                 }
1979         }
1980 }
1981
1982 static void
1983 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1984 #ifndef BIND9
1985         UNUSED(db);
1986         UNUSED(version);
1987         UNUSED(origin);
1988
1989         return;
1990 #else
1991         dns_rdataset_t keyset;
1992         dns_rdataset_t nsecset, signsecset;
1993         isc_boolean_t haszonekey = ISC_FALSE;
1994         isc_boolean_t hasnsec = ISC_FALSE;
1995         isc_result_t result;
1996
1997         dns_rdataset_init(&keyset);
1998         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1999                                      0, 0, &keyset, NULL);
2000         if (result == ISC_R_SUCCESS) {
2001                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
2002                 result = dns_rdataset_first(&keyset);
2003                 while (result == ISC_R_SUCCESS) {
2004                         dns_rdataset_current(&keyset, &keyrdata);
2005                         if (dns_zonekey_iszonekey(&keyrdata)) {
2006                                 haszonekey = ISC_TRUE;
2007                                 break;
2008                         }
2009                         result = dns_rdataset_next(&keyset);
2010                 }
2011                 dns_rdataset_disassociate(&keyset);
2012         }
2013         if (!haszonekey) {
2014                 version->secure = dns_db_insecure;
2015                 version->havensec3 = ISC_FALSE;
2016                 return;
2017         }
2018
2019         dns_rdataset_init(&nsecset);
2020         dns_rdataset_init(&signsecset);
2021         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2022                                      0, 0, &nsecset, &signsecset);
2023         if (result == ISC_R_SUCCESS) {
2024                 if (dns_rdataset_isassociated(&signsecset)) {
2025                         hasnsec = ISC_TRUE;
2026                         dns_rdataset_disassociate(&signsecset);
2027                 }
2028                 dns_rdataset_disassociate(&nsecset);
2029         }
2030
2031         setnsec3parameters(db, version);
2032
2033         /*
2034          * Do we have a valid NSEC/NSEC3 chain?
2035          */
2036         if (version->havensec3 || hasnsec)
2037                 version->secure = dns_db_secure;
2038         else
2039                 version->secure = dns_db_insecure;
2040 #endif
2041 }
2042
2043 /*%<
2044  * Walk the origin node looking for NSEC3PARAM records.
2045  * Cache the nsec3 parameters.
2046  */
2047 #ifdef BIND9
2048 static void
2049 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2050         dns_rbtnode_t *node;
2051         dns_rdata_nsec3param_t nsec3param;
2052         dns_rdata_t rdata = DNS_RDATA_INIT;
2053         isc_region_t region;
2054         isc_result_t result;
2055         rdatasetheader_t *header, *header_next;
2056         unsigned char *raw;             /* RDATASLAB */
2057         unsigned int count, length;
2058         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2059
2060         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2061         version->havensec3 = ISC_FALSE;
2062         node = rbtdb->origin_node;
2063         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2064                   isc_rwlocktype_read);
2065         for (header = node->data;
2066              header != NULL;
2067              header = header_next) {
2068                 header_next = header->next;
2069                 do {
2070                         if (header->serial <= version->serial &&
2071                             !IGNORE(header)) {
2072                                 if (NONEXISTENT(header))
2073                                         header = NULL;
2074                                 break;
2075                         } else
2076                                 header = header->down;
2077                 } while (header != NULL);
2078
2079                 if (header != NULL &&
2080                     (header->type == dns_rdatatype_nsec3param)) {
2081                         /*
2082                          * Find A NSEC3PARAM with a supported algorithm.
2083                          */
2084                         raw = (unsigned char *)header + sizeof(*header);
2085                         count = raw[0] * 256 + raw[1]; /* count */
2086 #if DNS_RDATASET_FIXED
2087                         raw += count * 4 + 2;
2088 #else
2089                         raw += 2;
2090 #endif
2091                         while (count-- > 0U) {
2092                                 length = raw[0] * 256 + raw[1];
2093 #if DNS_RDATASET_FIXED
2094                                 raw += 4;
2095 #else
2096                                 raw += 2;
2097 #endif
2098                                 region.base = raw;
2099                                 region.length = length;
2100                                 raw += length;
2101                                 dns_rdata_fromregion(&rdata,
2102                                                      rbtdb->common.rdclass,
2103                                                      dns_rdatatype_nsec3param,
2104                                                      &region);
2105                                 result = dns_rdata_tostruct(&rdata,
2106                                                             &nsec3param,
2107                                                             NULL);
2108                                 INSIST(result == ISC_R_SUCCESS);
2109                                 dns_rdata_reset(&rdata);
2110
2111                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2112                                     !dns_nsec3_supportedhash(nsec3param.hash))
2113                                         continue;
2114
2115                                 if (nsec3param.flags != 0)
2116                                         continue;
2117
2118                                 memcpy(version->salt, nsec3param.salt,
2119                                        nsec3param.salt_length);
2120                                 version->hash = nsec3param.hash;
2121                                 version->salt_length = nsec3param.salt_length;
2122                                 version->iterations = nsec3param.iterations;
2123                                 version->flags = nsec3param.flags;
2124                                 version->havensec3 = ISC_TRUE;
2125                                 /*
2126                                  * Look for a better algorithm than the
2127                                  * unknown test algorithm.
2128                                  */
2129                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2130                                         goto unlock;
2131                         }
2132                 }
2133         }
2134  unlock:
2135         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2136                     isc_rwlocktype_read);
2137         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2138 }
2139 #endif
2140
2141 static void
2142 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2143         dns_rbtdb_t *rbtdb = event->ev_arg;
2144         isc_boolean_t again = ISC_FALSE;
2145         unsigned int locknum;
2146         unsigned int refs;
2147
2148         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2149         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2150                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2151                           isc_rwlocktype_write);
2152                 cleanup_dead_nodes(rbtdb, locknum);
2153                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2154                         again = ISC_TRUE;
2155                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2156                             isc_rwlocktype_write);
2157         }
2158         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2159         if (again)
2160                 isc_task_send(task, &event);
2161         else {
2162                 isc_event_free(&event);
2163                 isc_refcount_decrement(&rbtdb->references, &refs);
2164                 if (refs == 0)
2165                         maybe_free_rbtdb(rbtdb);
2166         }
2167 }
2168
2169 static void
2170 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2171         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2172         rbtdb_version_t *version, *cleanup_version, *least_greater;
2173         isc_boolean_t rollback = ISC_FALSE;
2174         rbtdb_changedlist_t cleanup_list;
2175         rdatasetheaderlist_t resigned_list;
2176         rbtdb_changed_t *changed, *next_changed;
2177         rbtdb_serial_t serial, least_serial;
2178         dns_rbtnode_t *rbtnode;
2179         unsigned int refs;
2180         rdatasetheader_t *header;
2181         isc_boolean_t writer;
2182
2183         REQUIRE(VALID_RBTDB(rbtdb));
2184         version = (rbtdb_version_t *)*versionp;
2185
2186         cleanup_version = NULL;
2187         ISC_LIST_INIT(cleanup_list);
2188         ISC_LIST_INIT(resigned_list);
2189
2190         isc_refcount_decrement(&version->references, &refs);
2191         if (refs > 0) {         /* typical and easy case first */
2192                 if (commit) {
2193                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2194                         INSIST(!version->writer);
2195                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2196                 }
2197                 goto end;
2198         }
2199
2200         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2201         serial = version->serial;
2202         writer = version->writer;
2203         if (version->writer) {
2204                 if (commit) {
2205                         unsigned cur_ref;
2206                         rbtdb_version_t *cur_version;
2207
2208                         INSIST(version->commit_ok);
2209                         INSIST(version == rbtdb->future_version);
2210                         /*
2211                          * The current version is going to be replaced.
2212                          * Release the (likely last) reference to it from the
2213                          * DB itself and unlink it from the open list.
2214                          */
2215                         cur_version = rbtdb->current_version;
2216                         isc_refcount_decrement(&cur_version->references,
2217                                                &cur_ref);
2218                         if (cur_ref == 0) {
2219                                 if (cur_version->serial == rbtdb->least_serial)
2220                                         INSIST(EMPTY(cur_version->changed_list));
2221                                 UNLINK(rbtdb->open_versions,
2222                                        cur_version, link);
2223                         }
2224                         if (EMPTY(rbtdb->open_versions)) {
2225                                 /*
2226                                  * We're going to become the least open
2227                                  * version.
2228                                  */
2229                                 make_least_version(rbtdb, version,
2230                                                    &cleanup_list);
2231                         } else {
2232                                 /*
2233                                  * Some other open version is the
2234                                  * least version.  We can't cleanup
2235                                  * records that were changed in this
2236                                  * version because the older versions
2237                                  * may still be in use by an open
2238                                  * version.
2239                                  *
2240                                  * We can, however, discard the
2241                                  * changed records for things that
2242                                  * we've added that didn't exist in
2243                                  * prior versions.
2244                                  */
2245                                 cleanup_nondirty(version, &cleanup_list);
2246                         }
2247                         /*
2248                          * If the (soon to be former) current version
2249                          * isn't being used by anyone, we can clean
2250                          * it up.
2251                          */
2252                         if (cur_ref == 0) {
2253                                 cleanup_version = cur_version;
2254                                 APPENDLIST(version->changed_list,
2255                                            cleanup_version->changed_list,
2256                                            link);
2257                         }
2258                         /*
2259                          * Become the current version.
2260                          */
2261                         version->writer = ISC_FALSE;
2262                         rbtdb->current_version = version;
2263                         rbtdb->current_serial = version->serial;
2264                         rbtdb->future_version = NULL;
2265
2266                         /*
2267                          * Keep the current version in the open list, and
2268                          * gain a reference for the DB itself (see the DB
2269                          * creation function below).  This must be the only
2270                          * case where we need to increment the counter from
2271                          * zero and need to use isc_refcount_increment0().
2272                          */
2273                         isc_refcount_increment0(&version->references,
2274                                                 &cur_ref);
2275                         INSIST(cur_ref == 1);
2276                         PREPEND(rbtdb->open_versions,
2277                                 rbtdb->current_version, link);
2278                         resigned_list = version->resigned_list;
2279                         ISC_LIST_INIT(version->resigned_list);
2280                 } else {
2281                         /*
2282                          * We're rolling back this transaction.
2283                          */
2284                         cleanup_list = version->changed_list;
2285                         ISC_LIST_INIT(version->changed_list);
2286                         resigned_list = version->resigned_list;
2287                         ISC_LIST_INIT(version->resigned_list);
2288                         rollback = ISC_TRUE;
2289                         cleanup_version = version;
2290                         rbtdb->future_version = NULL;
2291                 }
2292         } else {
2293                 if (version != rbtdb->current_version) {
2294                         /*
2295                          * There are no external or internal references
2296                          * to this version and it can be cleaned up.
2297                          */
2298                         cleanup_version = version;
2299
2300                         /*
2301                          * Find the version with the least serial
2302                          * number greater than ours.
2303                          */
2304                         least_greater = PREV(version, link);
2305                         if (least_greater == NULL)
2306                                 least_greater = rbtdb->current_version;
2307
2308                         INSIST(version->serial < least_greater->serial);
2309                         /*
2310                          * Is this the least open version?
2311                          */
2312                         if (version->serial == rbtdb->least_serial) {
2313                                 /*
2314                                  * Yes.  Install the new least open
2315                                  * version.
2316                                  */
2317                                 make_least_version(rbtdb,
2318                                                    least_greater,
2319                                                    &cleanup_list);
2320                         } else {
2321                                 /*
2322                                  * Add any unexecuted cleanups to
2323                                  * those of the least greater version.
2324                                  */
2325                                 APPENDLIST(least_greater->changed_list,
2326                                            version->changed_list,
2327                                            link);
2328                         }
2329                 } else if (version->serial == rbtdb->least_serial)
2330                         INSIST(EMPTY(version->changed_list));
2331                 UNLINK(rbtdb->open_versions, version, link);
2332         }
2333         least_serial = rbtdb->least_serial;
2334         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2335
2336         /*
2337          * Update the zone's secure status.
2338          */
2339         if (writer && commit && !IS_CACHE(rbtdb))
2340                 iszonesecure(db, version, rbtdb->origin_node);
2341
2342         if (cleanup_version != NULL) {
2343                 INSIST(EMPTY(cleanup_version->changed_list));
2344                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2345                             sizeof(*cleanup_version));
2346         }
2347
2348         /*
2349          * Commit/rollback re-signed headers.
2350          */
2351         for (header = HEAD(resigned_list);
2352              header != NULL;
2353              header = HEAD(resigned_list)) {
2354                 nodelock_t *lock;
2355
2356                 ISC_LIST_UNLINK(resigned_list, header, link);
2357
2358                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2359                 NODE_LOCK(lock, isc_rwlocktype_write);
2360                 if (rollback)
2361                         resign_insert(rbtdb, header->node->locknum, header);
2362                 decrement_reference(rbtdb, header->node, least_serial,
2363                                     isc_rwlocktype_write, isc_rwlocktype_none,
2364                                     ISC_FALSE);
2365                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2366         }
2367
2368         if (!EMPTY(cleanup_list)) {
2369                 isc_event_t *event = NULL;
2370                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2371
2372                 if (rbtdb->task != NULL)
2373                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
2374                                                    DNS_EVENT_RBTDEADNODES,
2375                                                    cleanup_dead_nodes_callback,
2376                                                    rbtdb, sizeof(isc_event_t));
2377                 if (event == NULL) {
2378                         /*
2379                          * We acquire a tree write lock here in order to make
2380                          * sure that stale nodes will be removed in
2381                          * decrement_reference().  If we didn't have the lock,
2382                          * those nodes could miss the chance to be removed
2383                          * until the server stops.  The write lock is
2384                          * expensive, but this event should be rare enough
2385                          * to justify the cost.
2386                          */
2387                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2388                         tlock = isc_rwlocktype_write;
2389                 }
2390
2391                 for (changed = HEAD(cleanup_list);
2392                      changed != NULL;
2393                      changed = next_changed) {
2394                         nodelock_t *lock;
2395
2396                         next_changed = NEXT(changed, link);
2397                         rbtnode = changed->node;
2398                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2399
2400                         NODE_LOCK(lock, isc_rwlocktype_write);
2401                         /*
2402                          * This is a good opportunity to purge any dead nodes,
2403                          * so use it.
2404                          */
2405                         if (event == NULL)
2406                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2407
2408                         if (rollback)
2409                                 rollback_node(rbtnode, serial);
2410                         decrement_reference(rbtdb, rbtnode, least_serial,
2411                                             isc_rwlocktype_write, tlock,
2412                                             ISC_FALSE);
2413
2414                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2415
2416                         isc_mem_put(rbtdb->common.mctx, changed,
2417                                     sizeof(*changed));
2418                 }
2419                 if (event != NULL) {
2420                         isc_refcount_increment(&rbtdb->references, NULL);
2421                         isc_task_send(rbtdb->task, &event);
2422                 } else
2423                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2424         }
2425
2426  end:
2427         *versionp = NULL;
2428 }
2429
2430 /*
2431  * Add the necessary magic for the wildcard name 'name'
2432  * to be found in 'rbtdb'.
2433  *
2434  * In order for wildcard matching to work correctly in
2435  * zone_find(), we must ensure that a node for the wildcarding
2436  * level exists in the database, and has its 'find_callback'
2437  * and 'wild' bits set.
2438  *
2439  * E.g. if the wildcard name is "*.sub.example." then we
2440  * must ensure that "sub.example." exists and is marked as
2441  * a wildcard level.
2442  */
2443 static isc_result_t
2444 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2445         isc_result_t result;
2446         dns_name_t foundname;
2447         dns_offsets_t offsets;
2448         unsigned int n;
2449         dns_rbtnode_t *node = NULL;
2450
2451         dns_name_init(&foundname, offsets);
2452         n = dns_name_countlabels(name);
2453         INSIST(n >= 2);
2454         n--;
2455         dns_name_getlabelsequence(name, 1, n, &foundname);
2456         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2457         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2458                 return (result);
2459         if (result == ISC_R_SUCCESS)
2460                 node->nsec = DNS_RBT_NSEC_NORMAL;
2461         node->find_callback = 1;
2462         node->wild = 1;
2463         return (ISC_R_SUCCESS);
2464 }
2465
2466 static isc_result_t
2467 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2468         isc_result_t result;
2469         dns_name_t foundname;
2470         dns_offsets_t offsets;
2471         unsigned int n, l, i;
2472
2473         dns_name_init(&foundname, offsets);
2474         n = dns_name_countlabels(name);
2475         l = dns_name_countlabels(&rbtdb->common.origin);
2476         i = l + 1;
2477         while (i < n) {
2478                 dns_rbtnode_t *node = NULL;     /* dummy */
2479                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2480                 if (dns_name_iswildcard(&foundname)) {
2481                         result = add_wildcard_magic(rbtdb, &foundname);
2482                         if (result != ISC_R_SUCCESS)
2483                                 return (result);
2484                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2485                                                  &node);
2486                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2487                                 return (result);
2488                         if (result == ISC_R_SUCCESS)
2489                                 node->nsec = DNS_RBT_NSEC_NORMAL;
2490                 }
2491                 i++;
2492         }
2493         return (ISC_R_SUCCESS);
2494 }
2495
2496 static isc_result_t
2497 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2498          dns_dbnode_t **nodep)
2499 {
2500         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2501         dns_rbtnode_t *node = NULL;
2502         dns_name_t nodename;
2503         isc_result_t result;
2504         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2505
2506         REQUIRE(VALID_RBTDB(rbtdb));
2507
2508         dns_name_init(&nodename, NULL);
2509         RWLOCK(&rbtdb->tree_lock, locktype);
2510         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2511                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2512         if (result != ISC_R_SUCCESS) {
2513                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2514                 if (!create) {
2515                         if (result == DNS_R_PARTIALMATCH)
2516                                 result = ISC_R_NOTFOUND;
2517                         return (result);
2518                 }
2519                 /*
2520                  * It would be nice to try to upgrade the lock instead of
2521                  * unlocking then relocking.
2522                  */
2523                 locktype = isc_rwlocktype_write;
2524                 RWLOCK(&rbtdb->tree_lock, locktype);
2525                 node = NULL;
2526                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2527                 if (result == ISC_R_SUCCESS) {
2528 #ifdef BIND9
2529                         if (rbtdb->rpz_cidr != NULL) {
2530                                 dns_fixedname_t fnamef;
2531                                 dns_name_t *fname;
2532
2533                                 dns_fixedname_init(&fnamef);
2534                                 fname = dns_fixedname_name(&fnamef);
2535                                 dns_rbt_fullnamefromnode(node, fname);
2536                                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, fname);
2537                         }
2538 #endif
2539                         dns_rbt_namefromnode(node, &nodename);
2540 #ifdef DNS_RBT_USEHASH
2541                         node->locknum = node->hashval % rbtdb->node_lock_count;
2542 #else
2543                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2544                                 rbtdb->node_lock_count;
2545 #endif
2546                         add_empty_wildcards(rbtdb, name);
2547
2548                         if (dns_name_iswildcard(name)) {
2549                                 result = add_wildcard_magic(rbtdb, name);
2550                                 if (result != ISC_R_SUCCESS) {
2551                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2552                                         return (result);
2553                                 }
2554                         }
2555                 } else if (result != ISC_R_EXISTS) {
2556                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2557                         return (result);
2558                 }
2559         }
2560         reactivate_node(rbtdb, node, locktype);
2561         RWUNLOCK(&rbtdb->tree_lock, locktype);
2562
2563         *nodep = (dns_dbnode_t *)node;
2564
2565         return (ISC_R_SUCCESS);
2566 }
2567
2568 static isc_result_t
2569 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2570               dns_dbnode_t **nodep)
2571 {
2572         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2573         dns_rbtnode_t *node = NULL;
2574         dns_name_t nodename;
2575         isc_result_t result;
2576         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2577
2578         REQUIRE(VALID_RBTDB(rbtdb));
2579
2580         dns_name_init(&nodename, NULL);
2581         RWLOCK(&rbtdb->tree_lock, locktype);
2582         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2583                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2584         if (result != ISC_R_SUCCESS) {
2585                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2586                 if (!create) {
2587                         if (result == DNS_R_PARTIALMATCH)
2588                                 result = ISC_R_NOTFOUND;
2589                         return (result);
2590                 }
2591                 /*
2592                  * It would be nice to try to upgrade the lock instead of
2593                  * unlocking then relocking.
2594                  */
2595                 locktype = isc_rwlocktype_write;
2596                 RWLOCK(&rbtdb->tree_lock, locktype);
2597                 node = NULL;
2598                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2599                 if (result == ISC_R_SUCCESS) {
2600                         dns_rbt_namefromnode(node, &nodename);
2601 #ifdef DNS_RBT_USEHASH
2602                         node->locknum = node->hashval % rbtdb->node_lock_count;
2603 #else
2604                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2605                                 rbtdb->node_lock_count;
2606 #endif
2607                         node->nsec = DNS_RBT_NSEC_NSEC3;
2608                 } else if (result != ISC_R_EXISTS) {
2609                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2610                         return (result);
2611                 }
2612         } else {
2613                 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2614         }
2615         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2616         new_reference(rbtdb, node);
2617         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2618         RWUNLOCK(&rbtdb->tree_lock, locktype);
2619
2620         *nodep = (dns_dbnode_t *)node;
2621
2622         return (ISC_R_SUCCESS);
2623 }
2624
2625 static isc_result_t
2626 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2627         rbtdb_search_t *search = arg;
2628         rdatasetheader_t *header, *header_next;
2629         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2630         rdatasetheader_t *found;
2631         isc_result_t result;
2632         dns_rbtnode_t *onode;
2633
2634         /*
2635          * We only want to remember the topmost zone cut, since it's the one
2636          * that counts, so we'll just continue if we've already found a
2637          * zonecut.
2638          */
2639         if (search->zonecut != NULL)
2640                 return (DNS_R_CONTINUE);
2641
2642         found = NULL;
2643         result = DNS_R_CONTINUE;
2644         onode = search->rbtdb->origin_node;
2645
2646         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2647                   isc_rwlocktype_read);
2648
2649         /*
2650          * Look for an NS or DNAME rdataset active in our version.
2651          */
2652         ns_header = NULL;
2653         dname_header = NULL;
2654         sigdname_header = NULL;
2655         for (header = node->data; header != NULL; header = header_next) {
2656                 header_next = header->next;
2657                 if (header->type == dns_rdatatype_ns ||
2658                     header->type == dns_rdatatype_dname ||
2659                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2660                         do {
2661                                 if (header->serial <= search->serial &&
2662                                     !IGNORE(header)) {
2663                                         /*
2664                                          * Is this a "this rdataset doesn't
2665                                          * exist" record?
2666                                          */
2667                                         if (NONEXISTENT(header))
2668                                                 header = NULL;
2669                                         break;
2670                                 } else
2671                                         header = header->down;
2672                         } while (header != NULL);
2673                         if (header != NULL) {
2674                                 if (header->type == dns_rdatatype_dname)
2675                                         dname_header = header;
2676                                 else if (header->type ==
2677                                            RBTDB_RDATATYPE_SIGDNAME)
2678                                         sigdname_header = header;
2679                                 else if (node != onode ||
2680                                          IS_STUB(search->rbtdb)) {
2681                                         /*
2682                                          * We've found an NS rdataset that
2683                                          * isn't at the origin node.  We check
2684                                          * that they're not at the origin node,
2685                                          * because otherwise we'd erroneously
2686                                          * treat the zone top as if it were
2687                                          * a delegation.
2688                                          */
2689                                         ns_header = header;
2690                                 }
2691                         }
2692                 }
2693         }
2694
2695         /*
2696          * Did we find anything?
2697          */
2698         if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2699             ns_header != NULL) {
2700                 /*
2701                  * Note that NS has precedence over DNAME if both exist
2702                  * in a zone.  Otherwise DNAME take precedence over NS.
2703                  */
2704                 found = ns_header;
2705                 search->zonecut_sigrdataset = NULL;
2706         } else if (dname_header != NULL) {
2707                 found = dname_header;
2708                 search->zonecut_sigrdataset = sigdname_header;
2709         } else if (ns_header != NULL) {
2710                 found = ns_header;
2711                 search->zonecut_sigrdataset = NULL;
2712         }
2713
2714         if (found != NULL) {
2715                 /*
2716                  * We increment the reference count on node to ensure that
2717                  * search->zonecut_rdataset will still be valid later.
2718                  */
2719                 new_reference(search->rbtdb, node);
2720                 search->zonecut = node;
2721                 search->zonecut_rdataset = found;
2722                 search->need_cleanup = ISC_TRUE;
2723                 /*
2724                  * Since we've found a zonecut, anything beneath it is
2725                  * glue and is not subject to wildcard matching, so we
2726                  * may clear search->wild.
2727                  */
2728                 search->wild = ISC_FALSE;
2729                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2730                         /*
2731                          * If the caller does not want to find glue, then
2732                          * this is the best answer and the search should
2733                          * stop now.
2734                          */
2735                         result = DNS_R_PARTIALMATCH;
2736                 } else {
2737                         dns_name_t *zcname;
2738
2739                         /*
2740                          * The search will continue beneath the zone cut.
2741                          * This may or may not be the best match.  In case it
2742                          * is, we need to remember the node name.
2743                          */
2744                         zcname = dns_fixedname_name(&search->zonecut_name);
2745                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2746                                       ISC_R_SUCCESS);
2747                         search->copy_name = ISC_TRUE;
2748                 }
2749         } else {
2750                 /*
2751                  * There is no zonecut at this node which is active in this
2752                  * version.
2753                  *
2754                  * If this is a "wild" node and the caller hasn't disabled
2755                  * wildcard matching, remember that we've seen a wild node
2756                  * in case we need to go searching for wildcard matches
2757                  * later on.
2758                  */
2759                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2760                         search->wild = ISC_TRUE;
2761         }
2762
2763         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2764                     isc_rwlocktype_read);
2765
2766         return (result);
2767 }
2768
2769 static inline void
2770 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2771               rdatasetheader_t *header, isc_stdtime_t now,
2772               dns_rdataset_t *rdataset)
2773 {
2774         unsigned char *raw;     /* RDATASLAB */
2775
2776         /*
2777          * Caller must be holding the node reader lock.
2778          * XXXJT: technically, we need a writer lock, since we'll increment
2779          * the header count below.  However, since the actual counter value
2780          * doesn't matter, we prioritize performance here.  (We may want to
2781          * use atomic increment when available).
2782          */
2783
2784         if (rdataset == NULL)
2785                 return;
2786
2787         new_reference(rbtdb, node);
2788
2789         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2790
2791         rdataset->methods = &rdataset_methods;
2792         rdataset->rdclass = rbtdb->common.rdclass;
2793         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2794         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2795         rdataset->ttl = header->rdh_ttl - now;
2796         rdataset->trust = header->trust;
2797         if (NEGATIVE(header))
2798                 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2799         if (NXDOMAIN(header))
2800                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2801         if (OPTOUT(header))
2802                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2803         rdataset->private1 = rbtdb;
2804         rdataset->private2 = node;
2805         raw = (unsigned char *)header + sizeof(*header);
2806         rdataset->private3 = raw;
2807         rdataset->count = header->count++;
2808         if (rdataset->count == ISC_UINT32_MAX)
2809                 rdataset->count = 0;
2810
2811         /*
2812          * Reset iterator state.
2813          */
2814         rdataset->privateuint4 = 0;
2815         rdataset->private5 = NULL;
2816
2817         /*
2818          * Add noqname proof.
2819          */
2820         rdataset->private6 = header->noqname;
2821         if (rdataset->private6 != NULL)
2822                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2823         rdataset->private7 = header->closest;
2824         if (rdataset->private7 != NULL)
2825                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2826
2827         /*
2828          * Copy out re-signing information.
2829          */
2830         if (RESIGN(header)) {
2831                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2832                 rdataset->resign = header->resign;
2833         } else
2834                 rdataset->resign = 0;
2835 }
2836
2837 static inline isc_result_t
2838 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2839                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2840                  dns_rdataset_t *sigrdataset)
2841 {
2842         isc_result_t result;
2843         dns_name_t *zcname;
2844         rbtdb_rdatatype_t type;
2845         dns_rbtnode_t *node;
2846
2847         /*
2848          * The caller MUST NOT be holding any node locks.
2849          */
2850
2851         node = search->zonecut;
2852         type = search->zonecut_rdataset->type;
2853
2854         /*
2855          * If we have to set foundname, we do it before anything else.
2856          * If we were to set foundname after we had set nodep or bound the
2857          * rdataset, then we'd have to undo that work if dns_name_copy()
2858          * failed.  By setting foundname first, there's nothing to undo if
2859          * we have trouble.
2860          */
2861         if (foundname != NULL && search->copy_name) {
2862                 zcname = dns_fixedname_name(&search->zonecut_name);
2863                 result = dns_name_copy(zcname, foundname, NULL);
2864                 if (result != ISC_R_SUCCESS)
2865                         return (result);
2866         }
2867         if (nodep != NULL) {
2868                 /*
2869                  * Note that we don't have to increment the node's reference
2870                  * count here because we're going to use the reference we
2871                  * already have in the search block.
2872                  */
2873                 *nodep = node;
2874                 search->need_cleanup = ISC_FALSE;
2875         }
2876         if (rdataset != NULL) {
2877                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2878                           isc_rwlocktype_read);
2879                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2880                               search->now, rdataset);
2881                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2882                         bind_rdataset(search->rbtdb, node,
2883                                       search->zonecut_sigrdataset,
2884                                       search->now, sigrdataset);
2885                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2886                             isc_rwlocktype_read);
2887         }
2888
2889         if (type == dns_rdatatype_dname)
2890                 return (DNS_R_DNAME);
2891         return (DNS_R_DELEGATION);
2892 }
2893
2894 static inline isc_boolean_t
2895 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2896            dns_rbtnode_t *node)
2897 {
2898         unsigned char *raw;     /* RDATASLAB */
2899         unsigned int count, size;
2900         dns_name_t ns_name;
2901         isc_boolean_t valid = ISC_FALSE;
2902         dns_offsets_t offsets;
2903         isc_region_t region;
2904         rdatasetheader_t *header;
2905
2906         /*
2907          * No additional locking is required.
2908          */
2909
2910         /*
2911          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2912          * if it occurs at a zone cut, but is not valid below it.
2913          */
2914         if (type == dns_rdatatype_ns) {
2915                 if (node != search->zonecut) {
2916                         return (ISC_FALSE);
2917                 }
2918         } else if (type != dns_rdatatype_a &&
2919                    type != dns_rdatatype_aaaa &&
2920                    type != dns_rdatatype_a6) {
2921                 return (ISC_FALSE);
2922         }
2923
2924         header = search->zonecut_rdataset;
2925         raw = (unsigned char *)header + sizeof(*header);
2926         count = raw[0] * 256 + raw[1];
2927 #if DNS_RDATASET_FIXED
2928         raw += 2 + (4 * count);
2929 #else
2930         raw += 2;
2931 #endif
2932
2933         while (count > 0) {
2934                 count--;
2935                 size = raw[0] * 256 + raw[1];
2936 #if DNS_RDATASET_FIXED
2937                 raw += 4;
2938 #else
2939                 raw += 2;
2940 #endif
2941                 region.base = raw;
2942                 region.length = size;
2943                 raw += size;
2944                 /*
2945                  * XXX Until we have rdata structures, we have no choice but
2946                  * to directly access the rdata format.
2947                  */
2948                 dns_name_init(&ns_name, offsets);
2949                 dns_name_fromregion(&ns_name, &region);
2950                 if (dns_name_compare(&ns_name, name) == 0) {
2951                         valid = ISC_TRUE;
2952                         break;
2953                 }
2954         }
2955
2956         return (valid);
2957 }
2958
2959 static inline isc_boolean_t
2960 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2961             dns_name_t *name)
2962 {
2963         dns_fixedname_t fnext;
2964         dns_fixedname_t forigin;
2965         dns_name_t *next;
2966         dns_name_t *origin;
2967         dns_name_t prefix;
2968         dns_rbtdb_t *rbtdb;
2969         dns_rbtnode_t *node;
2970         isc_result_t result;
2971         isc_boolean_t answer = ISC_FALSE;
2972         rdatasetheader_t *header;
2973
2974         rbtdb = search->rbtdb;
2975
2976         dns_name_init(&prefix, NULL);
2977         dns_fixedname_init(&fnext);
2978         next = dns_fixedname_name(&fnext);
2979         dns_fixedname_init(&forigin);
2980         origin = dns_fixedname_name(&forigin);
2981
2982         result = dns_rbtnodechain_next(chain, NULL, NULL);
2983         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2984                 node = NULL;
2985                 result = dns_rbtnodechain_current(chain, &prefix,
2986                                                   origin, &node);
2987                 if (result != ISC_R_SUCCESS)
2988                         break;
2989                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2990                           isc_rwlocktype_read);
2991                 for (header = node->data;
2992                      header != NULL;
2993                      header = header->next) {
2994                         if (header->serial <= search->serial &&
2995                             !IGNORE(header) && EXISTS(header))
2996                                 break;
2997                 }
2998                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2999                             isc_rwlocktype_read);
3000                 if (header != NULL)
3001                         break;
3002                 result = dns_rbtnodechain_next(chain, NULL, NULL);
3003         }
3004         if (result == ISC_R_SUCCESS)
3005                 result = dns_name_concatenate(&prefix, origin, next, NULL);
3006         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
3007                 answer = ISC_TRUE;
3008         return (answer);
3009 }
3010
3011 static inline isc_boolean_t
3012 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
3013         dns_fixedname_t fnext;
3014         dns_fixedname_t forigin;
3015         dns_fixedname_t fprev;
3016         dns_name_t *next;
3017         dns_name_t *origin;
3018         dns_name_t *prev;
3019         dns_name_t name;
3020         dns_name_t rname;
3021         dns_name_t tname;
3022         dns_rbtdb_t *rbtdb;
3023         dns_rbtnode_t *node;
3024         dns_rbtnodechain_t chain;
3025         isc_boolean_t check_next = ISC_TRUE;
3026         isc_boolean_t check_prev = ISC_TRUE;
3027         isc_boolean_t answer = ISC_FALSE;
3028         isc_result_t result;
3029         rdatasetheader_t *header;
3030         unsigned int n;
3031
3032         rbtdb = search->rbtdb;
3033
3034         dns_name_init(&name, NULL);
3035         dns_name_init(&tname, NULL);
3036         dns_name_init(&rname, NULL);
3037         dns_fixedname_init(&fnext);
3038         next = dns_fixedname_name(&fnext);
3039         dns_fixedname_init(&fprev);
3040         prev = dns_fixedname_name(&fprev);
3041         dns_fixedname_init(&forigin);
3042         origin = dns_fixedname_name(&forigin);
3043
3044         /*
3045          * Find if qname is at or below a empty node.
3046          * Use our own copy of the chain.
3047          */
3048
3049         chain = search->chain;
3050         do {
3051                 node = NULL;
3052                 result = dns_rbtnodechain_current(&chain, &name,
3053                                                   origin, &node);
3054                 if (result != ISC_R_SUCCESS)
3055                         break;
3056                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3057                           isc_rwlocktype_read);
3058                 for (header = node->data;
3059                      header != NULL;
3060                      header = header->next) {
3061                         if (header->serial <= search->serial &&
3062                             !IGNORE(header) && EXISTS(header))
3063                                 break;
3064                 }
3065                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3066                             isc_rwlocktype_read);
3067                 if (header != NULL)
3068                         break;
3069                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3070         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3071         if (result == ISC_R_SUCCESS)
3072                 result = dns_name_concatenate(&name, origin, prev, NULL);
3073         if (result != ISC_R_SUCCESS)
3074                 check_prev = ISC_FALSE;
3075
3076         result = dns_rbtnodechain_next(&chain, NULL, NULL);
3077         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3078                 node = NULL;
3079                 result = dns_rbtnodechain_current(&chain, &name,
3080                                                   origin, &node);
3081                 if (result != ISC_R_SUCCESS)
3082                         break;
3083                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3084                           isc_rwlocktype_read);
3085                 for (header = node->data;
3086                      header != NULL;
3087                      header = header->next) {
3088                         if (header->serial <= search->serial &&
3089                             !IGNORE(header) && EXISTS(header))
3090                                 break;
3091                 }
3092                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3093                             isc_rwlocktype_read);
3094                 if (header != NULL)
3095                         break;
3096                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3097         }
3098         if (result == ISC_R_SUCCESS)
3099                 result = dns_name_concatenate(&name, origin, next, NULL);
3100         if (result != ISC_R_SUCCESS)
3101                 check_next = ISC_FALSE;
3102
3103         dns_name_clone(qname, &rname);
3104
3105         /*
3106          * Remove the wildcard label to find the terminal name.
3107          */
3108         n = dns_name_countlabels(wname);
3109         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3110
3111         do {
3112                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3113                     (check_next && dns_name_issubdomain(next, &rname))) {
3114                         answer = ISC_TRUE;
3115                         break;
3116                 }
3117                 /*
3118                  * Remove the left hand label.
3119                  */
3120                 n = dns_name_countlabels(&rname);
3121                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3122         } while (!dns_name_equal(&rname, &tname));
3123         return (answer);
3124 }
3125
3126 static inline isc_result_t
3127 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3128               dns_name_t *qname)
3129 {
3130         unsigned int i, j;
3131         dns_rbtnode_t *node, *level_node, *wnode;
3132         rdatasetheader_t *header;
3133         isc_result_t result = ISC_R_NOTFOUND;
3134         dns_name_t name;
3135         dns_name_t *wname;
3136         dns_fixedname_t fwname;
3137         dns_rbtdb_t *rbtdb;
3138         isc_boolean_t done, wild, active;
3139         dns_rbtnodechain_t wchain;
3140
3141         /*
3142          * Caller must be holding the tree lock and MUST NOT be holding
3143          * any node locks.
3144          */
3145
3146         /*
3147          * Examine each ancestor level.  If the level's wild bit
3148          * is set, then construct the corresponding wildcard name and
3149          * search for it.  If the wildcard node exists, and is active in
3150          * this version, we're done.  If not, then we next check to see
3151          * if the ancestor is active in this version.  If so, then there
3152          * can be no possible wildcard match and again we're done.  If not,
3153          * continue the search.
3154          */
3155
3156         rbtdb = search->rbtdb;
3157         i = search->chain.level_matches;
3158         done = ISC_FALSE;
3159         node = *nodep;
3160         do {
3161                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3162                           isc_rwlocktype_read);
3163
3164                 /*
3165                  * First we try to figure out if this node is active in
3166                  * the search's version.  We do this now, even though we
3167                  * may not need the information, because it simplifies the
3168                  * locking and code flow.
3169                  */
3170                 for (header = node->data;
3171                      header != NULL;
3172                      header = header->next) {
3173                         if (header->serial <= search->serial &&
3174                             !IGNORE(header) && EXISTS(header))
3175                                 break;
3176                 }
3177                 if (header != NULL)
3178                         active = ISC_TRUE;
3179                 else
3180                         active = ISC_FALSE;
3181
3182                 if (node->wild)
3183                         wild = ISC_TRUE;
3184                 else
3185                         wild = ISC_FALSE;
3186
3187                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3188                             isc_rwlocktype_read);
3189
3190                 if (wild) {
3191                         /*
3192                          * Construct the wildcard name for this level.
3193                          */
3194                         dns_name_init(&name, NULL);
3195                         dns_rbt_namefromnode(node, &name);
3196                         dns_fixedname_init(&fwname);
3197                         wname = dns_fixedname_name(&fwname);
3198                         result = dns_name_concatenate(dns_wildcardname, &name,
3199                                                       wname, NULL);
3200                         j = i;
3201                         while (result == ISC_R_SUCCESS && j != 0) {
3202                                 j--;
3203                                 level_node = search->chain.levels[j];
3204                                 dns_name_init(&name, NULL);
3205                                 dns_rbt_namefromnode(level_node, &name);
3206                                 result = dns_name_concatenate(wname,
3207                                                               &name,
3208                                                               wname,
3209                                                               NULL);
3210                         }
3211                         if (result != ISC_R_SUCCESS)
3212                                 break;
3213
3214                         wnode = NULL;
3215                         dns_rbtnodechain_init(&wchain, NULL);
3216                         result = dns_rbt_findnode(rbtdb->tree, wname,
3217                                                   NULL, &wnode, &wchain,
3218                                                   DNS_RBTFIND_EMPTYDATA,
3219                                                   NULL, NULL);
3220                         if (result == ISC_R_SUCCESS) {
3221                                 nodelock_t *lock;
3222
3223                                 /*
3224                                  * We have found the wildcard node.  If it
3225                                  * is active in the search's version, we're
3226                                  * done.
3227                                  */
3228                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3229                                 NODE_LOCK(lock, isc_rwlocktype_read);
3230                                 for (header = wnode->data;
3231                                      header != NULL;
3232                                      header = header->next) {
3233                                         if (header->serial <= search->serial &&
3234                                             !IGNORE(header) && EXISTS(header))
3235                                                 break;
3236                                 }
3237                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3238                                 if (header != NULL ||
3239                                     activeempty(search, &wchain, wname)) {
3240                                         if (activeemtpynode(search, qname,
3241                                                             wname)) {
3242                                                 return (ISC_R_NOTFOUND);
3243                                         }
3244                                         /*
3245                                          * The wildcard node is active!
3246                                          *
3247                                          * Note: result is still ISC_R_SUCCESS
3248                                          * so we don't have to set it.
3249                                          */
3250                                         *nodep = wnode;
3251                                         break;
3252                                 }
3253                         } else if (result != ISC_R_NOTFOUND &&
3254                                    result != DNS_R_PARTIALMATCH) {
3255                                 /*
3256                                  * An error has occurred.  Bail out.
3257                                  */
3258                                 break;
3259                         }
3260                 }
3261
3262                 if (active) {
3263                         /*
3264                          * The level node is active.  Any wildcarding
3265                          * present at higher levels has no
3266                          * effect and we're done.
3267                          */
3268                         result = ISC_R_NOTFOUND;
3269                         break;
3270                 }
3271
3272                 if (i > 0) {
3273                         i--;
3274                         node = search->chain.levels[i];
3275                 } else
3276                         done = ISC_TRUE;
3277         } while (!done);
3278
3279         return (result);
3280 }
3281
3282 static isc_boolean_t
3283 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3284 {
3285         dns_rdata_t rdata = DNS_RDATA_INIT;
3286         dns_rdata_nsec3_t nsec3;
3287         unsigned char *raw;                     /* RDATASLAB */
3288         unsigned int rdlen, count;
3289         isc_region_t region;
3290         isc_result_t result;
3291
3292         REQUIRE(header->type == dns_rdatatype_nsec3);
3293
3294         raw = (unsigned char *)header + sizeof(*header);
3295         count = raw[0] * 256 + raw[1]; /* count */
3296 #if DNS_RDATASET_FIXED
3297         raw += count * 4 + 2;
3298 #else
3299         raw += 2;
3300 #endif
3301         while (count-- > 0) {
3302                 rdlen = raw[0] * 256 + raw[1];
3303 #if DNS_RDATASET_FIXED
3304                 raw += 4;
3305 #else
3306                 raw += 2;
3307 #endif
3308                 region.base = raw;
3309                 region.length = rdlen;
3310                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3311                                      dns_rdatatype_nsec3, &region);
3312                 raw += rdlen;
3313                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3314                 INSIST(result == ISC_R_SUCCESS);
3315                 if (nsec3.hash == search->rbtversion->hash &&
3316                     nsec3.iterations == search->rbtversion->iterations &&
3317                     nsec3.salt_length == search->rbtversion->salt_length &&
3318                     memcmp(nsec3.salt, search->rbtversion->salt,
3319                            nsec3.salt_length) == 0)
3320                         return (ISC_TRUE);
3321                 dns_rdata_reset(&rdata);
3322         }
3323         return (ISC_FALSE);
3324 }
3325
3326 /*
3327  * Find node of the NSEC/NSEC3 record that is 'name'.
3328  */
3329 static inline isc_result_t
3330 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3331                     dns_name_t *name, dns_name_t *origin,
3332                     dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3333                     isc_boolean_t *firstp)
3334 {
3335         dns_fixedname_t ftarget;
3336         dns_name_t *target;
3337         dns_rbtnode_t *nsecnode;
3338         isc_result_t result;
3339
3340         REQUIRE(nodep != NULL && *nodep == NULL);
3341
3342         if (type == dns_rdatatype_nsec3) {
3343                 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3344                 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3345                         return (result);
3346                 result = dns_rbtnodechain_current(&search->chain, name, origin,
3347                                                   nodep);
3348                 return (result);
3349         }
3350
3351         dns_fixedname_init(&ftarget);
3352         target = dns_fixedname_name(&ftarget);
3353
3354         for (;;) {
3355                 if (*firstp) {
3356                         /*
3357                          * Construct the name of the second node to check.
3358                          * It is the first node sought in the NSEC tree.
3359                          */
3360                         *firstp = ISC_FALSE;
3361                         dns_rbtnodechain_init(nsecchain, NULL);
3362                         result = dns_name_concatenate(name, origin,
3363                                                       target, NULL);
3364                         if (result != ISC_R_SUCCESS)
3365                                 return (result);
3366                         nsecnode = NULL;
3367                         result = dns_rbt_findnode(search->rbtdb->nsec,
3368                                                   target, NULL,
3369                                                   &nsecnode, nsecchain,
3370                                                   DNS_RBTFIND_NOOPTIONS,
3371                                                   NULL, NULL);
3372                         if (result == ISC_R_SUCCESS) {
3373                                 /*
3374                                  * Since this was the first loop, finding the
3375                                  * name in the NSEC tree implies that the first
3376                                  * node checked in the main tree had an
3377                                  * unacceptable NSEC record.
3378                                  * Try the previous node in the NSEC tree.
3379                                  */
3380                                 result = dns_rbtnodechain_prev(nsecchain,
3381                                                                name, origin);
3382                                 if (result == DNS_R_NEWORIGIN)
3383                                         result = ISC_R_SUCCESS;
3384                         } else if (result == ISC_R_NOTFOUND ||
3385                                    result == DNS_R_PARTIALMATCH) {
3386                                 result = dns_rbtnodechain_current(nsecchain,
3387                                                         name, origin, NULL);
3388                                 if (result == ISC_R_NOTFOUND)
3389                                         result = ISC_R_NOMORE;
3390                         }
3391                 } else {
3392                         /*
3393                          * This is a second or later trip through the auxiliary
3394                          * tree for the name of a third or earlier NSEC node in
3395                          * the main tree.  Previous trips through the NSEC tree
3396                          * must have found nodes in the main tree with NSEC
3397                          * records.  Perhaps they lacked signature records.
3398                          */
3399                         result = dns_rbtnodechain_prev(nsecchain, name, origin);
3400                         if (result == DNS_R_NEWORIGIN)
3401                                 result = ISC_R_SUCCESS;
3402                 }
3403                 if (result != ISC_R_SUCCESS)
3404                         return (result);
3405
3406                 /*
3407                  * Construct the name to seek in the main tree.
3408                  */
3409                 result = dns_name_concatenate(name, origin, target, NULL);
3410                 if (result != ISC_R_SUCCESS)
3411                         return (result);
3412
3413                 *nodep = NULL;
3414                 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3415                                           nodep, &search->chain,
3416                                           DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3417                 if (result == ISC_R_SUCCESS)
3418                         return (result);
3419
3420                 /*
3421                  * There should always be a node in the main tree with the
3422                  * same name as the node in the auxiliary NSEC tree, except for
3423                  * nodes in the auxiliary tree that are awaiting deletion.
3424                  */
3425                 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3426                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3427                                       DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3428                                       "previous_closest_nsec(): %s",
3429                                       isc_result_totext(result));
3430                         return (DNS_R_BADDB);
3431                 }
3432         }
3433 }
3434
3435 /*
3436  * Find the NSEC/NSEC3 which is or before the current point on the
3437  * search chain.  For NSEC3 records only NSEC3 records that match the
3438  * current NSEC3PARAM record are considered.
3439  */
3440 static inline isc_result_t
3441 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3442                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3443                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3444                   dns_db_secure_t secure)
3445 {
3446         dns_rbtnode_t *node, *prevnode;
3447         rdatasetheader_t *header, *header_next, *found, *foundsig;
3448         dns_rbtnodechain_t nsecchain;
3449         isc_boolean_t empty_node;
3450         isc_result_t result;
3451         dns_fixedname_t fname, forigin;
3452         dns_name_t *name, *origin;
3453         dns_rdatatype_t type;
3454         rbtdb_rdatatype_t sigtype;
3455         isc_boolean_t wraps;
3456         isc_boolean_t first = ISC_TRUE;
3457         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3458
3459         if (tree == search->rbtdb->nsec3) {
3460                 type = dns_rdatatype_nsec3;
3461                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3462                 wraps = ISC_TRUE;
3463         } else {
3464                 type = dns_rdatatype_nsec;
3465                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3466                 wraps = ISC_FALSE;
3467         }
3468
3469         /*
3470          * Use the auxiliary tree only starting with the second node in the
3471          * hope that the original node will be right much of the time.
3472          */
3473         dns_fixedname_init(&fname);
3474         name = dns_fixedname_name(&fname);
3475         dns_fixedname_init(&forigin);
3476         origin = dns_fixedname_name(&forigin);
3477  again:
3478         node = NULL;
3479         prevnode = NULL;
3480         result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3481         if (result != ISC_R_SUCCESS)
3482                 return (result);
3483         do {
3484                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3485                           isc_rwlocktype_read);
3486                 found = NULL;
3487                 foundsig = NULL;
3488                 empty_node = ISC_TRUE;
3489                 for (header = node->data;
3490                      header != NULL;
3491                      header = header_next) {
3492                         header_next = header->next;
3493                         /*
3494                          * Look for an active, extant NSEC or RRSIG NSEC.
3495                          */
3496                         do {
3497                                 if (header->serial <= search->serial &&
3498                                     !IGNORE(header)) {
3499                                         /*
3500                                          * Is this a "this rdataset doesn't
3501                                          * exist" record?
3502                                          */
3503                                         if (NONEXISTENT(header))
3504                                                 header = NULL;
3505                                         break;
3506                                 } else
3507                                         header = header->down;
3508                         } while (header != NULL);
3509                         if (header != NULL) {
3510                                 /*
3511                                  * We now know that there is at least one
3512                                  * active rdataset at this node.
3513                                  */
3514                                 empty_node = ISC_FALSE;
3515                                 if (header->type == type) {
3516                                         found = header;
3517                                         if (foundsig != NULL)
3518                                                 break;
3519                                 } else if (header->type == sigtype) {
3520                                         foundsig = header;
3521                                         if (found != NULL)
3522                                                 break;
3523                                 }
3524                         }
3525                 }
3526                 if (!empty_node) {
3527                         if (found != NULL && search->rbtversion->havensec3 &&
3528                             found->type == dns_rdatatype_nsec3 &&
3529                             !matchparams(found, search)) {
3530                                 empty_node = ISC_TRUE;
3531                                 found = NULL;
3532                                 foundsig = NULL;
3533                                 result = previous_closest_nsec(type, search,
3534                                                                name, origin,
3535                                                                &prevnode, NULL,
3536                                                                NULL);
3537                         } else if (found != NULL &&
3538                                    (foundsig != NULL || !need_sig)) {
3539                                 /*
3540                                  * We've found the right NSEC/NSEC3 record.
3541                                  *
3542                                  * Note: for this to really be the right
3543                                  * NSEC record, it's essential that the NSEC
3544                                  * records of any nodes obscured by a zone
3545                                  * cut have been removed; we assume this is
3546                                  * the case.
3547                                  */
3548                                 result = dns_name_concatenate(name, origin,
3549                                                               foundname, NULL);
3550                                 if (result == ISC_R_SUCCESS) {
3551                                         if (nodep != NULL) {
3552                                                 new_reference(search->rbtdb,
3553                                                               node);
3554                                                 *nodep = node;
3555                                         }
3556                                         bind_rdataset(search->rbtdb, node,
3557                                                       found, search->now,
3558                                                       rdataset);
3559                                         if (foundsig != NULL)
3560                                                 bind_rdataset(search->rbtdb,
3561                                                               node,
3562                                                               foundsig,
3563                                                               search->now,
3564                                                               sigrdataset);
3565                                 }
3566                         } else if (found == NULL && foundsig == NULL) {
3567                                 /*
3568                                  * This node is active, but has no NSEC or
3569                                  * RRSIG NSEC.  That means it's glue or
3570                                  * other obscured zone data that isn't
3571                                  * relevant for our search.  Treat the
3572                                  * node as if it were empty and keep looking.
3573                                  */
3574                                 empty_node = ISC_TRUE;
3575                                 result = previous_closest_nsec(type, search,
3576                                                                name, origin,
3577                                                                &prevnode,
3578                                                                &nsecchain,
3579                                                                &first);
3580                         } else {
3581                                 /*
3582                                  * We found an active node, but either the
3583                                  * NSEC or the RRSIG NSEC is missing.  This
3584                                  * shouldn't happen.
3585                                  */
3586                                 result = DNS_R_BADDB;
3587                         }
3588                 } else {
3589                         /*
3590                          * This node isn't active.  We've got to keep
3591                          * looking.
3592                          */
3593                         result = previous_closest_nsec(type, search,
3594                                                        name, origin, &prevnode,
3595                                                        &nsecchain, &first);
3596                 }
3597                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3598                             isc_rwlocktype_read);
3599                 node = prevnode;
3600                 prevnode = NULL;
3601         } while (empty_node && result == ISC_R_SUCCESS);
3602
3603         if (!first)
3604                 dns_rbtnodechain_invalidate(&nsecchain);
3605
3606         if (result == ISC_R_NOMORE && wraps) {
3607                 result = dns_rbtnodechain_last(&search->chain, tree,
3608                                                NULL, NULL);
3609                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3610                         wraps = ISC_FALSE;
3611                         goto again;
3612                 }
3613         }
3614
3615         /*
3616          * If the result is ISC_R_NOMORE, then we got to the beginning of
3617          * the database and didn't find a NSEC record.  This shouldn't
3618          * happen.
3619          */
3620         if (result == ISC_R_NOMORE)
3621                 result = DNS_R_BADDB;
3622
3623         return (result);
3624 }
3625
3626 static isc_result_t
3627 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3628           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3629           dns_dbnode_t **nodep, dns_name_t *foundname,
3630           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3631 {
3632         dns_rbtnode_t *node = NULL;
3633         isc_result_t result;
3634         rbtdb_search_t search;
3635         isc_boolean_t cname_ok = ISC_TRUE;
3636         isc_boolean_t close_version = ISC_FALSE;
3637         isc_boolean_t maybe_zonecut = ISC_FALSE;
3638         isc_boolean_t at_zonecut = ISC_FALSE;
3639         isc_boolean_t wild;
3640         isc_boolean_t empty_node;
3641         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3642         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3643         rbtdb_rdatatype_t sigtype;
3644         isc_boolean_t active;
3645         dns_rbtnodechain_t chain;
3646         nodelock_t *lock;
3647         dns_rbt_t *tree;
3648
3649         search.rbtdb = (dns_rbtdb_t *)db;
3650
3651         REQUIRE(VALID_RBTDB(search.rbtdb));
3652
3653         /*
3654          * We don't care about 'now'.
3655          */
3656         UNUSED(now);
3657
3658         /*
3659          * If the caller didn't supply a version, attach to the current
3660          * version.
3661          */
3662         if (version == NULL) {
3663                 currentversion(db, &version);
3664                 close_version = ISC_TRUE;
3665         }
3666
3667         search.rbtversion = version;
3668         search.serial = search.rbtversion->serial;
3669         search.options = options;
3670         search.copy_name = ISC_FALSE;
3671         search.need_cleanup = ISC_FALSE;
3672         search.wild = ISC_FALSE;
3673         search.zonecut = NULL;
3674         dns_fixedname_init(&search.zonecut_name);
3675         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3676         search.now = 0;
3677
3678         /*
3679          * 'wild' will be true iff. we've matched a wildcard.
3680          */
3681         wild = ISC_FALSE;
3682
3683         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3684
3685         /*
3686          * Search down from the root of the tree.  If, while going down, we
3687          * encounter a callback node, zone_zonecut_callback() will search the
3688          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3689          */
3690         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3691                                                          search.rbtdb->tree;
3692         result = dns_rbt_findnode(tree, name, foundname, &node,
3693                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3694                                   zone_zonecut_callback, &search);
3695
3696         if (result == DNS_R_PARTIALMATCH) {
3697         partial_match:
3698                 if (search.zonecut != NULL) {
3699                     result = setup_delegation(&search, nodep, foundname,
3700                                               rdataset, sigrdataset);
3701                     goto tree_exit;
3702                 }
3703
3704                 if (search.wild) {
3705                         /*
3706                          * At least one of the levels in the search chain
3707                          * potentially has a wildcard.  For each such level,
3708                          * we must see if there's a matching wildcard active
3709                          * in the current version.
3710                          */
3711                         result = find_wildcard(&search, &node, name);
3712                         if (result == ISC_R_SUCCESS) {
3713                                 result = dns_name_copy(name, foundname, NULL);
3714                                 if (result != ISC_R_SUCCESS)
3715                                         goto tree_exit;
3716                                 wild = ISC_TRUE;
3717                                 goto found;
3718                         }
3719                         else if (result != ISC_R_NOTFOUND)
3720                                 goto tree_exit;
3721                 }
3722
3723                 chain = search.chain;
3724                 active = activeempty(&search, &chain, name);
3725
3726                 /*
3727                  * If we're here, then the name does not exist, is not
3728                  * beneath a zonecut, and there's no matching wildcard.
3729                  */
3730                 if ((search.rbtversion->secure == dns_db_secure &&
3731                      !search.rbtversion->havensec3) ||
3732                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3733                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3734                 {
3735                         result = find_closest_nsec(&search, nodep, foundname,
3736                                                    rdataset, sigrdataset, tree,
3737                                                    search.rbtversion->secure);
3738                         if (result == ISC_R_SUCCESS)
3739                                 result = active ? DNS_R_EMPTYNAME :
3740                                                   DNS_R_NXDOMAIN;
3741                 } else
3742                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3743                 goto tree_exit;
3744         } else if (result != ISC_R_SUCCESS)
3745                 goto tree_exit;
3746
3747  found:
3748         /*
3749          * We have found a node whose name is the desired name, or we
3750          * have matched a wildcard.
3751          */
3752
3753         if (search.zonecut != NULL) {
3754                 /*
3755                  * If we're beneath a zone cut, we don't want to look for
3756                  * CNAMEs because they're not legitimate zone glue.
3757                  */
3758                 cname_ok = ISC_FALSE;
3759         } else {
3760                 /*
3761                  * The node may be a zone cut itself.  If it might be one,
3762                  * make sure we check for it later.
3763                  *
3764                  * DS records live above the zone cut in ordinary zone so
3765                  * we want to ignore any referral.
3766                  *
3767                  * Stub zones don't have anything "above" the delgation so
3768                  * we always return a referral.
3769                  */
3770                 if (node->find_callback &&
3771                     ((node != search.rbtdb->origin_node &&
3772                       !dns_rdatatype_atparent(type)) ||
3773                      IS_STUB(search.rbtdb)))
3774                         maybe_zonecut = ISC_TRUE;
3775         }
3776
3777         /*
3778          * Certain DNSSEC types are not subject to CNAME matching
3779          * (RFC4035, section 2.5 and RFC3007).
3780          *
3781          * We don't check for RRSIG, because we don't store RRSIG records
3782          * directly.
3783          */
3784         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3785                 cname_ok = ISC_FALSE;
3786
3787         /*
3788          * We now go looking for rdata...
3789          */
3790
3791         lock = &search.rbtdb->node_locks[node->locknum].lock;
3792         NODE_LOCK(lock, isc_rwlocktype_read);
3793
3794         found = NULL;
3795         foundsig = NULL;
3796         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3797         nsecheader = NULL;
3798         nsecsig = NULL;
3799         cnamesig = NULL;
3800         empty_node = ISC_TRUE;
3801         for (header = node->data; header != NULL; header = header_next) {
3802                 header_next = header->next;
3803                 /*
3804                  * Look for an active, extant rdataset.
3805                  */
3806                 do {
3807                         if (header->serial <= search.serial &&
3808                             !IGNORE(header)) {
3809                                 /*
3810                                  * Is this a "this rdataset doesn't
3811                                  * exist" record?
3812                                  */
3813                                 if (NONEXISTENT(header))
3814                                         header = NULL;
3815                                 break;
3816                         } else
3817                                 header = header->down;
3818                 } while (header != NULL);
3819                 if (header != NULL) {
3820                         /*
3821                          * We now know that there is at least one active
3822                          * rdataset at this node.
3823                          */
3824                         empty_node = ISC_FALSE;
3825
3826                         /*
3827                          * Do special zone cut handling, if requested.
3828                          */
3829                         if (maybe_zonecut &&
3830                             header->type == dns_rdatatype_ns) {
3831                                 /*
3832                                  * We increment the reference count on node to
3833                                  * ensure that search->zonecut_rdataset will
3834                                  * still be valid later.
3835                                  */
3836                                 new_reference(search.rbtdb, node);
3837                                 search.zonecut = node;
3838                                 search.zonecut_rdataset = header;
3839                                 search.zonecut_sigrdataset = NULL;
3840                                 search.need_cleanup = ISC_TRUE;
3841                                 maybe_zonecut = ISC_FALSE;
3842                                 at_zonecut = ISC_TRUE;
3843                                 /*
3844                                  * It is not clear if KEY should still be
3845                                  * allowed at the parent side of the zone
3846                                  * cut or not.  It is needed for RFC3007
3847                                  * validated updates.
3848                                  */
3849                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3850                                     && type != dns_rdatatype_nsec
3851                                     && type != dns_rdatatype_key) {
3852                                         /*
3853                                          * Glue is not OK, but any answer we
3854                                          * could return would be glue.  Return
3855                                          * the delegation.
3856                                          */
3857                                         found = NULL;
3858                                         break;
3859                                 }
3860                                 if (found != NULL && foundsig != NULL)
3861                                         break;
3862                         }
3863
3864
3865                         /*
3866                          * If the NSEC3 record doesn't match the chain
3867                          * we are using behave as if it isn't here.
3868                          */
3869                         if (header->type == dns_rdatatype_nsec3 &&
3870                            !matchparams(header, &search)) {
3871                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3872                                 goto partial_match;
3873                         }
3874                         /*
3875                          * If we found a type we were looking for,
3876                          * remember it.
3877                          */
3878                         if (header->type == type ||
3879                             type == dns_rdatatype_any ||
3880                             (header->type == dns_rdatatype_cname &&
3881                              cname_ok)) {
3882                                 /*
3883                                  * We've found the answer!
3884                                  */
3885                                 found = header;
3886                                 if (header->type == dns_rdatatype_cname &&
3887                                     cname_ok) {
3888                                         /*
3889                                          * We may be finding a CNAME instead
3890                                          * of the desired type.
3891                                          *
3892                                          * If we've already got the CNAME RRSIG,
3893                                          * use it, otherwise change sigtype
3894                                          * so that we find it.
3895                                          */
3896                                         if (cnamesig != NULL)
3897                                                 foundsig = cnamesig;
3898                                         else
3899                                                 sigtype =
3900                                                     RBTDB_RDATATYPE_SIGCNAME;
3901                                 }
3902                                 /*
3903                                  * If we've got all we need, end the search.
3904                                  */
3905                                 if (!maybe_zonecut && foundsig != NULL)
3906                                         break;
3907                         } else if (header->type == sigtype) {
3908                                 /*
3909                                  * We've found the RRSIG rdataset for our
3910                                  * target type.  Remember it.
3911                                  */
3912                                 foundsig = header;
3913                                 /*
3914                                  * If we've got all we need, end the search.
3915                                  */
3916                                 if (!maybe_zonecut && found != NULL)
3917                                         break;
3918                         } else if (header->type == dns_rdatatype_nsec &&
3919                                    !search.rbtversion->havensec3) {
3920                                 /*
3921                                  * Remember a NSEC rdataset even if we're
3922                                  * not specifically looking for it, because
3923                                  * we might need it later.
3924                                  */
3925                                 nsecheader = header;
3926                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3927                                    !search.rbtversion->havensec3) {
3928                                 /*
3929                                  * If we need the NSEC rdataset, we'll also
3930                                  * need its signature.
3931                                  */
3932                                 nsecsig = header;
3933                         } else if (cname_ok &&
3934                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3935                                 /*
3936                                  * If we get a CNAME match, we'll also need
3937                                  * its signature.
3938                                  */
3939                                 cnamesig = header;
3940                         }
3941                 }
3942         }
3943
3944         if (empty_node) {
3945                 /*
3946                  * We have an exact match for the name, but there are no
3947                  * active rdatasets in the desired version.  That means that
3948                  * this node doesn't exist in the desired version, and that
3949                  * we really have a partial match.
3950                  */
3951                 if (!wild) {
3952                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3953                         goto partial_match;
3954                 }
3955         }
3956
3957         /*
3958          * If we didn't find what we were looking for...
3959          */
3960         if (found == NULL) {
3961                 if (search.zonecut != NULL) {
3962                         /*
3963                          * We were trying to find glue at a node beneath a
3964                          * zone cut, but didn't.
3965                          *
3966                          * Return the delegation.
3967                          */
3968                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3969                         result = setup_delegation(&search, nodep, foundname,
3970                                                   rdataset, sigrdataset);
3971                         goto tree_exit;
3972                 }
3973                 /*
3974                  * The desired type doesn't exist.
3975                  */
3976                 result = DNS_R_NXRRSET;
3977                 if (search.rbtversion->secure == dns_db_secure &&
3978                     !search.rbtversion->havensec3 &&
3979                     (nsecheader == NULL || nsecsig == NULL)) {
3980                         /*
3981                          * The zone is secure but there's no NSEC,
3982                          * or the NSEC has no signature!
3983                          */
3984                         if (!wild) {
3985                                 result = DNS_R_BADDB;
3986                                 goto node_exit;
3987                         }
3988
3989                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3990                         result = find_closest_nsec(&search, nodep, foundname,
3991                                                    rdataset, sigrdataset,
3992                                                    search.rbtdb->tree,
3993                                                    search.rbtversion->secure);
3994                         if (result == ISC_R_SUCCESS)
3995                                 result = DNS_R_EMPTYWILD;
3996                         goto tree_exit;
3997                 }
3998                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3999                     nsecheader == NULL)
4000                 {
4001                         /*
4002                          * There's no NSEC record, and we were told
4003                          * to find one.
4004                          */
4005                         result = DNS_R_BADDB;
4006                         goto node_exit;
4007                 }
4008                 if (nodep != NULL) {
4009                         new_reference(search.rbtdb, node);
4010                         *nodep = node;
4011                 }
4012                 if ((search.rbtversion->secure == dns_db_secure &&
4013                      !search.rbtversion->havensec3) ||
4014                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
4015                 {
4016                         bind_rdataset(search.rbtdb, node, nsecheader,
4017                                       0, rdataset);
4018                         if (nsecsig != NULL)
4019                                 bind_rdataset(search.rbtdb, node,
4020                                               nsecsig, 0, sigrdataset);
4021                 }
4022                 if (wild)
4023                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4024                 goto node_exit;
4025         }
4026
4027         /*
4028          * We found what we were looking for, or we found a CNAME.
4029          */
4030
4031         if (type != found->type &&
4032             type != dns_rdatatype_any &&
4033             found->type == dns_rdatatype_cname) {
4034                 /*
4035                  * We weren't doing an ANY query and we found a CNAME instead
4036                  * of the type we were looking for, so we need to indicate
4037                  * that result to the caller.
4038                  */
4039                 result = DNS_R_CNAME;
4040         } else if (search.zonecut != NULL) {
4041                 /*
4042                  * If we're beneath a zone cut, we must indicate that the
4043                  * result is glue, unless we're actually at the zone cut
4044                  * and the type is NSEC or KEY.
4045                  */
4046                 if (search.zonecut == node) {
4047                         /*
4048                          * It is not clear if KEY should still be
4049                          * allowed at the parent side of the zone
4050                          * cut or not.  It is needed for RFC3007
4051                          * validated updates.
4052                          */
4053                         if (type == dns_rdatatype_nsec ||
4054                             type == dns_rdatatype_nsec3 ||
4055                             type == dns_rdatatype_key)
4056                                 result = ISC_R_SUCCESS;
4057                         else if (type == dns_rdatatype_any)
4058                                 result = DNS_R_ZONECUT;
4059                         else
4060                                 result = DNS_R_GLUE;
4061                 } else
4062                         result = DNS_R_GLUE;
4063                 /*
4064                  * We might have found data that isn't glue, but was occluded
4065                  * by a dynamic update.  If the caller cares about this, they
4066                  * will have told us to validate glue.
4067                  *
4068                  * XXX We should cache the glue validity state!
4069                  */
4070                 if (result == DNS_R_GLUE &&
4071                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4072                     !valid_glue(&search, foundname, type, node)) {
4073                         NODE_UNLOCK(lock, isc_rwlocktype_read);
4074                         result = setup_delegation(&search, nodep, foundname,
4075                                                   rdataset, sigrdataset);
4076                     goto tree_exit;
4077                 }
4078         } else {
4079                 /*
4080                  * An ordinary successful query!
4081                  */
4082                 result = ISC_R_SUCCESS;
4083         }
4084
4085         if (nodep != NULL) {
4086                 if (!at_zonecut)
4087                         new_reference(search.rbtdb, node);
4088                 else
4089                         search.need_cleanup = ISC_FALSE;
4090                 *nodep = node;
4091         }
4092
4093         if (type != dns_rdatatype_any) {
4094                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4095                 if (foundsig != NULL)
4096                         bind_rdataset(search.rbtdb, node, foundsig, 0,
4097                                       sigrdataset);
4098         }
4099
4100         if (wild)
4101                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4102
4103  node_exit:
4104         NODE_UNLOCK(lock, isc_rwlocktype_read);
4105
4106  tree_exit:
4107         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4108
4109         /*
4110          * If we found a zonecut but aren't going to use it, we have to
4111          * let go of it.
4112          */
4113         if (search.need_cleanup) {
4114                 node = search.zonecut;
4115                 INSIST(node != NULL);
4116                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4117
4118                 NODE_LOCK(lock, isc_rwlocktype_read);
4119                 decrement_reference(search.rbtdb, node, 0,
4120                                     isc_rwlocktype_read, isc_rwlocktype_none,
4121                                     ISC_FALSE);
4122                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4123         }
4124
4125         if (close_version)
4126                 closeversion(db, &version, ISC_FALSE);
4127
4128         dns_rbtnodechain_reset(&search.chain);
4129
4130         return (result);
4131 }
4132
4133 static isc_result_t
4134 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4135                  isc_stdtime_t now, dns_dbnode_t **nodep,
4136                  dns_name_t *foundname,
4137                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4138 {
4139         UNUSED(db);
4140         UNUSED(name);
4141         UNUSED(options);
4142         UNUSED(now);
4143         UNUSED(nodep);
4144         UNUSED(foundname);
4145         UNUSED(rdataset);
4146         UNUSED(sigrdataset);
4147
4148         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4149
4150         /* NOTREACHED */
4151         return (ISC_R_NOTIMPLEMENTED);
4152 }
4153
4154 static isc_result_t
4155 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4156         rbtdb_search_t *search = arg;
4157         rdatasetheader_t *header, *header_prev, *header_next;
4158         rdatasetheader_t *dname_header, *sigdname_header;
4159         isc_result_t result;
4160         nodelock_t *lock;
4161         isc_rwlocktype_t locktype;
4162
4163         /* XXX comment */
4164
4165         REQUIRE(search->zonecut == NULL);
4166
4167         /*
4168          * Keep compiler silent.
4169          */
4170         UNUSED(name);
4171
4172         lock = &(search->rbtdb->node_locks[node->locknum].lock);
4173         locktype = isc_rwlocktype_read;
4174         NODE_LOCK(lock, locktype);
4175
4176         /*
4177          * Look for a DNAME or RRSIG DNAME rdataset.
4178          */
4179         dname_header = NULL;
4180         sigdname_header = NULL;
4181         header_prev = NULL;
4182         for (header = node->data; header != NULL; header = header_next) {
4183                 header_next = header->next;
4184                 if (header->rdh_ttl <= search->now) {
4185                         /*
4186                          * This rdataset is stale.  If no one else is
4187                          * using the node, we can clean it up right
4188                          * now, otherwise we mark it as stale, and
4189                          * the node as dirty, so it will get cleaned
4190                          * up later.
4191                          */
4192                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4193                             (locktype == isc_rwlocktype_write ||
4194                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4195                                 /*
4196                                  * We update the node's status only when we
4197                                  * can get write access; otherwise, we leave
4198                                  * others to this work.  Periodical cleaning
4199                                  * will eventually take the job as the last
4200                                  * resort.
4201                                  * We won't downgrade the lock, since other
4202                                  * rdatasets are probably stale, too.
4203                                  */
4204                                 locktype = isc_rwlocktype_write;
4205
4206                                 if (dns_rbtnode_refcurrent(node) == 0) {
4207                                         isc_mem_t *mctx;
4208
4209                                         /*
4210                                          * header->down can be non-NULL if the
4211                                          * refcount has just decremented to 0
4212                                          * but decrement_reference() has not
4213                                          * performed clean_cache_node(), in
4214                                          * which case we need to purge the
4215                                          * stale headers first.
4216                                          */
4217                                         mctx = search->rbtdb->common.mctx;
4218                                         clean_stale_headers(search->rbtdb,
4219                                                             mctx,
4220                                                             header);
4221                                         if (header_prev != NULL)
4222                                                 header_prev->next =
4223                                                         header->next;
4224                                         else
4225                                                 node->data = header->next;
4226                                         free_rdataset(search->rbtdb, mctx,
4227                                                       header);
4228                                 } else {
4229                                         header->attributes |=
4230                                                 RDATASET_ATTR_STALE;
4231                                         node->dirty = 1;
4232                                         header_prev = header;
4233                                 }
4234                         } else
4235                                 header_prev = header;
4236                 } else if (header->type == dns_rdatatype_dname &&
4237                            EXISTS(header)) {
4238                         dname_header = header;
4239                         header_prev = header;
4240                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4241                          EXISTS(header)) {
4242                         sigdname_header = header;
4243                         header_prev = header;
4244                 } else
4245                         header_prev = header;
4246         }
4247
4248         if (dname_header != NULL &&
4249             (!DNS_TRUST_PENDING(dname_header->trust) ||
4250              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4251                 /*
4252                  * We increment the reference count on node to ensure that
4253                  * search->zonecut_rdataset will still be valid later.
4254                  */
4255                 new_reference(search->rbtdb, node);
4256                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4257                 search->zonecut = node;
4258                 search->zonecut_rdataset = dname_header;
4259                 search->zonecut_sigrdataset = sigdname_header;
4260                 search->need_cleanup = ISC_TRUE;
4261                 result = DNS_R_PARTIALMATCH;
4262         } else
4263                 result = DNS_R_CONTINUE;
4264
4265         NODE_UNLOCK(lock, locktype);
4266
4267         return (result);
4268 }
4269
4270 static inline isc_result_t
4271 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4272                      dns_dbnode_t **nodep, dns_name_t *foundname,
4273                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4274 {
4275         unsigned int i;
4276         dns_rbtnode_t *level_node;
4277         rdatasetheader_t *header, *header_prev, *header_next;
4278         rdatasetheader_t *found, *foundsig;
4279         isc_result_t result = ISC_R_NOTFOUND;
4280         dns_name_t name;
4281         dns_rbtdb_t *rbtdb;
4282         isc_boolean_t done;
4283         nodelock_t *lock;
4284         isc_rwlocktype_t locktype;
4285
4286         /*
4287          * Caller must be holding the tree lock.
4288          */
4289
4290         rbtdb = search->rbtdb;
4291         i = search->chain.level_matches;
4292         done = ISC_FALSE;
4293         do {
4294                 locktype = isc_rwlocktype_read;
4295                 lock = &rbtdb->node_locks[node->locknum].lock;
4296                 NODE_LOCK(lock, locktype);
4297
4298                 /*
4299                  * Look for NS and RRSIG NS rdatasets.
4300                  */
4301                 found = NULL;
4302                 foundsig = NULL;
4303                 header_prev = NULL;
4304                 for (header = node->data;
4305                      header != NULL;
4306                      header = header_next) {
4307                         header_next = header->next;
4308                         if (header->rdh_ttl <= search->now) {
4309                                 /*
4310                                  * This rdataset is stale.  If no one else is
4311                                  * using the node, we can clean it up right
4312                                  * now, otherwise we mark it as stale, and
4313                                  * the node as dirty, so it will get cleaned
4314                                  * up later.
4315                                  */
4316                                 if ((header->rdh_ttl <= search->now -
4317                                                     RBTDB_VIRTUAL) &&
4318                                     (locktype == isc_rwlocktype_write ||
4319                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4320                                         /*
4321                                          * We update the node's status only
4322                                          * when we can get write access.
4323                                          */
4324                                         locktype = isc_rwlocktype_write;
4325
4326                                         if (dns_rbtnode_refcurrent(node)
4327                                             == 0) {
4328                                                 isc_mem_t *m;
4329
4330                                                 m = search->rbtdb->common.mctx;
4331                                                 clean_stale_headers(
4332                                                         search->rbtdb,
4333                                                         m, header);
4334                                                 if (header_prev != NULL)
4335                                                         header_prev->next =
4336                                                                 header->next;
4337                                                 else
4338                                                         node->data =
4339                                                                 header->next;
4340                                                 free_rdataset(rbtdb, m,
4341                                                               header);
4342                                         } else {
4343                                                 header->attributes |=
4344                                                         RDATASET_ATTR_STALE;
4345                                                 node->dirty = 1;
4346                                                 header_prev = header;
4347                                         }
4348                                 } else
4349                                         header_prev = header;
4350                         } else if (EXISTS(header)) {
4351                                 /*
4352                                  * We've found an extant rdataset.  See if
4353                                  * we're interested in it.
4354                                  */
4355                                 if (header->type == dns_rdatatype_ns) {
4356                                         found = header;
4357                                         if (foundsig != NULL)
4358                                                 break;
4359                                 } else if (header->type ==
4360                                            RBTDB_RDATATYPE_SIGNS) {
4361                                         foundsig = header;
4362                                         if (found != NULL)
4363                                                 break;
4364                                 }
4365                                 header_prev = header;
4366                         } else
4367                                 header_prev = header;
4368                 }
4369
4370                 if (found != NULL) {
4371                         /*
4372                          * If we have to set foundname, we do it before
4373                          * anything else.  If we were to set foundname after
4374                          * we had set nodep or bound the rdataset, then we'd
4375                          * have to undo that work if dns_name_concatenate()
4376                          * failed.  By setting foundname first, there's
4377                          * nothing to undo if we have trouble.
4378                          */
4379                         if (foundname != NULL) {
4380                                 dns_name_init(&name, NULL);
4381                                 dns_rbt_namefromnode(node, &name);
4382                                 result = dns_name_copy(&name, foundname, NULL);
4383                                 while (result == ISC_R_SUCCESS && i > 0) {
4384                                         i--;
4385                                         level_node = search->chain.levels[i];
4386                                         dns_name_init(&name, NULL);
4387                                         dns_rbt_namefromnode(level_node,
4388                                                              &name);
4389                                         result =
4390                                                 dns_name_concatenate(foundname,
4391                                                                      &name,
4392                                                                      foundname,
4393                                                                      NULL);
4394                                 }
4395                                 if (result != ISC_R_SUCCESS) {
4396                                         *nodep = NULL;
4397                                         goto node_exit;
4398                                 }
4399                         }
4400                         result = DNS_R_DELEGATION;
4401                         if (nodep != NULL) {
4402                                 new_reference(search->rbtdb, node);
4403                                 *nodep = node;
4404                         }
4405                         bind_rdataset(search->rbtdb, node, found, search->now,
4406                                       rdataset);
4407                         if (foundsig != NULL)
4408                                 bind_rdataset(search->rbtdb, node, foundsig,
4409                                               search->now, sigrdataset);
4410                         if (need_headerupdate(found, search->now) ||
4411                             (foundsig != NULL &&
4412                              need_headerupdate(foundsig, search->now))) {
4413                                 if (locktype != isc_rwlocktype_write) {
4414                                         NODE_UNLOCK(lock, locktype);
4415                                         NODE_LOCK(lock, isc_rwlocktype_write);
4416                                         locktype = isc_rwlocktype_write;
4417                                         POST(locktype);
4418                                 }
4419                                 if (need_headerupdate(found, search->now))
4420                                         update_header(search->rbtdb, found,
4421                                                       search->now);
4422                                 if (foundsig != NULL &&
4423                                     need_headerupdate(foundsig, search->now)) {
4424                                         update_header(search->rbtdb, foundsig,
4425                                                       search->now);
4426                                 }
4427                         }
4428                 }
4429
4430         node_exit:
4431                 NODE_UNLOCK(lock, locktype);
4432
4433                 if (found == NULL && i > 0) {
4434                         i--;
4435                         node = search->chain.levels[i];
4436                 } else
4437                         done = ISC_TRUE;
4438
4439         } while (!done);
4440
4441         return (result);
4442 }
4443
4444 static isc_result_t
4445 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4446                   isc_stdtime_t now, dns_name_t *foundname,
4447                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4448 {
4449         dns_rbtnode_t *node;
4450         rdatasetheader_t *header, *header_next, *header_prev;
4451         rdatasetheader_t *found, *foundsig;
4452         isc_boolean_t empty_node;
4453         isc_result_t result;
4454         dns_fixedname_t fname, forigin;
4455         dns_name_t *name, *origin;
4456         rbtdb_rdatatype_t matchtype, sigmatchtype;
4457         nodelock_t *lock;
4458         isc_rwlocktype_t locktype;
4459
4460         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4461         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4462                                              dns_rdatatype_nsec);
4463
4464         do {
4465                 node = NULL;
4466                 dns_fixedname_init(&fname);
4467                 name = dns_fixedname_name(&fname);
4468                 dns_fixedname_init(&forigin);
4469                 origin = dns_fixedname_name(&forigin);
4470                 result = dns_rbtnodechain_current(&search->chain, name,
4471                                                   origin, &node);
4472                 if (result != ISC_R_SUCCESS)
4473                         return (result);
4474                 locktype = isc_rwlocktype_read;
4475                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4476                 NODE_LOCK(lock, locktype);
4477                 found = NULL;
4478                 foundsig = NULL;
4479                 empty_node = ISC_TRUE;
4480                 header_prev = NULL;
4481                 for (header = node->data;
4482                      header != NULL;
4483                      header = header_next) {
4484                         header_next = header->next;
4485                         if (header->rdh_ttl <= now) {
4486                                 /*
4487                                  * This rdataset is stale.  If no one else is
4488                                  * using the node, we can clean it up right
4489                                  * now, otherwise we mark it as stale, and the
4490                                  * node as dirty, so it will get cleaned up
4491                                  * later.
4492                                  */
4493                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4494                                     (locktype == isc_rwlocktype_write ||
4495                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4496                                         /*
4497                                          * We update the node's status only
4498                                          * when we can get write access.
4499                                          */
4500                                         locktype = isc_rwlocktype_write;
4501
4502                                         if (dns_rbtnode_refcurrent(node)
4503                                             == 0) {
4504                                                 isc_mem_t *m;
4505
4506                                                 m = search->rbtdb->common.mctx;
4507                                                 clean_stale_headers(
4508                                                         search->rbtdb,
4509                                                         m, header);
4510                                                 if (header_prev != NULL)
4511                                                         header_prev->next =
4512                                                                 header->next;
4513                                                 else
4514                                                         node->data = header->next;
4515                                                 free_rdataset(search->rbtdb, m,
4516                                                               header);
4517                                         } else {
4518                                                 header->attributes |=
4519                                                         RDATASET_ATTR_STALE;
4520                                                 node->dirty = 1;
4521                                                 header_prev = header;
4522                                         }
4523                                 } else
4524                                         header_prev = header;
4525                                 continue;
4526                         }
4527                         if (NONEXISTENT(header) ||
4528                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4529                                 header_prev = header;
4530                                 continue;
4531                         }
4532                         empty_node = ISC_FALSE;
4533                         if (header->type == matchtype)
4534                                 found = header;
4535                         else if (header->type == sigmatchtype)
4536                                 foundsig = header;
4537                         header_prev = header;
4538                 }
4539                 if (found != NULL) {
4540                         result = dns_name_concatenate(name, origin,
4541                                                       foundname, NULL);
4542                         if (result != ISC_R_SUCCESS)
4543                                 goto unlock_node;
4544                         bind_rdataset(search->rbtdb, node, found,
4545                                       now, rdataset);
4546                         if (foundsig != NULL)
4547                                 bind_rdataset(search->rbtdb, node, foundsig,
4548                                               now, sigrdataset);
4549                         new_reference(search->rbtdb, node);
4550                         *nodep = node;
4551                         result = DNS_R_COVERINGNSEC;
4552                 } else if (!empty_node) {
4553                         result = ISC_R_NOTFOUND;
4554                 } else
4555                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4556                                                        NULL);
4557  unlock_node:
4558                 NODE_UNLOCK(lock, locktype);
4559         } while (empty_node && result == ISC_R_SUCCESS);
4560         return (result);
4561 }
4562
4563 /*
4564  * Mark a database for response policy rewriting.
4565  */
4566 #ifdef BIND9
4567 static void
4568 get_rpz_enabled(dns_db_t *db, dns_rpz_st_t *st)
4569 {
4570         dns_rbtdb_t *rbtdb;
4571
4572         rbtdb = (dns_rbtdb_t *)db;
4573         REQUIRE(VALID_RBTDB(rbtdb));
4574         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4575         dns_rpz_enabled(rbtdb->rpz_cidr, st);
4576         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4577 }
4578
4579 /*
4580  * Search the CDIR block tree of a response policy tree of trees for all of
4581  * the IP addresses in an A or AAAA rdataset.
4582  * Among the policies for all IPv4 and IPv6 addresses for a name, choose
4583  * the longest prefix.  Among those with the longest prefix, the first
4584  * configured policy.  Among answers for with the longest prefixes for
4585  * two or more IP addresses in the A and AAAA rdatasets the lexically
4586  * smallest address.
4587  */
4588 static isc_result_t
4589 rpz_findips(dns_rpz_zone_t *rpz, dns_rpz_type_t rpz_type,
4590             dns_zone_t *zone, dns_db_t *db, dns_dbversion_t *version,
4591             dns_rdataset_t *ardataset, dns_rpz_st_t *st)
4592 {
4593         dns_rbtdb_t *rbtdb;
4594         struct in_addr ina;
4595         struct in6_addr in6a;
4596         isc_netaddr_t netaddr;
4597         dns_fixedname_t selfnamef, qnamef;
4598         dns_name_t *selfname, *qname;
4599         dns_rbtnode_t *node;
4600         dns_rdataset_t zrdataset;
4601         dns_rpz_cidr_bits_t prefix;
4602         isc_result_t result;
4603         dns_rpz_policy_t rpz_policy;
4604         dns_ttl_t ttl;
4605
4606         rbtdb = (dns_rbtdb_t *)db;
4607         REQUIRE(VALID_RBTDB(rbtdb));
4608         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4609
4610         if (rbtdb->rpz_cidr == NULL) {
4611                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4612                 dns_db_detach(&db);
4613                 dns_zone_detach(&zone);
4614                 return (ISC_R_UNEXPECTED);
4615         }
4616
4617         dns_fixedname_init(&selfnamef);
4618         dns_fixedname_init(&qnamef);
4619         selfname = dns_fixedname_name(&selfnamef);
4620         qname = dns_fixedname_name(&qnamef);
4621
4622         for (result = dns_rdataset_first(ardataset);
4623              result == ISC_R_SUCCESS;
4624              result = dns_rdataset_next(ardataset)) {
4625                 dns_rdata_t rdata = DNS_RDATA_INIT;
4626                 dns_rdataset_current(ardataset, &rdata);
4627                 switch (rdata.type) {
4628                 case dns_rdatatype_a:
4629                         INSIST(rdata.length == 4);
4630                         memcpy(&ina.s_addr, rdata.data, 4);
4631                         isc_netaddr_fromin(&netaddr, &ina);
4632                         break;
4633                 case dns_rdatatype_aaaa:
4634                         INSIST(rdata.length == 16);
4635                         memcpy(in6a.s6_addr, rdata.data, 16);
4636                         isc_netaddr_fromin6(&netaddr, &in6a);
4637                         break;
4638                 default:
4639                         continue;
4640                 }
4641
4642                 result = dns_rpz_cidr_find(rbtdb->rpz_cidr, &netaddr, rpz_type,
4643                                            selfname, qname, &prefix);
4644                 if (result != ISC_R_SUCCESS)
4645                         continue;
4646
4647                 /*
4648                  * Choose the policy with the longest matching prefix.
4649                  * Between policies with the same prefix, choose the first
4650                  * configured.
4651                  */
4652                 if (st->m.policy != DNS_RPZ_POLICY_MISS) {
4653                         if (prefix < st->m.prefix)
4654                                 continue;
4655                         if (prefix == st->m.prefix &&
4656                             rpz->num > st->m.rpz->num)
4657                                 continue;
4658                 }
4659
4660                 /*
4661                  * We have rpz_st an entry with a prefix at least as long as
4662                  * the prefix of the entry we had before.  Find the node
4663                  * corresponding to CDIR tree entry.
4664                  */
4665                 node = NULL;
4666                 result = dns_rbt_findnode(rbtdb->tree, qname, NULL,
4667                                           &node, NULL, 0, NULL, NULL);
4668                 if (result != ISC_R_SUCCESS) {
4669                         char namebuf[DNS_NAME_FORMATSIZE];
4670
4671                         dns_name_format(qname, namebuf, sizeof(namebuf));
4672                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4673                                       DNS_LOGMODULE_CACHE, DNS_RPZ_ERROR_LEVEL,
4674                                       "rpz_findips findnode(%s): %s",
4675                                       namebuf, isc_result_totext(result));
4676                         continue;
4677                 }
4678                 /*
4679                  * First look for a simple rewrite of the IP address.
4680                  * If that fails, look for a CNAME.  If we cannot find
4681                  * a CNAME or the CNAME is neither of the special forms
4682                  * "*" or ".", treat it like a real CNAME.
4683                  */
4684                 dns_rdataset_init(&zrdataset);
4685                 result = dns_db_findrdataset(db, node, version, ardataset->type,
4686                                              0, 0, &zrdataset, NULL);
4687                 if (result != ISC_R_SUCCESS)
4688                         result = dns_db_findrdataset(db, node, version,
4689                                                      dns_rdatatype_cname,
4690                                                      0, 0, &zrdataset, NULL);
4691                 if (result == ISC_R_SUCCESS) {
4692                         if (zrdataset.type != dns_rdatatype_cname) {
4693                                 rpz_policy = DNS_RPZ_POLICY_RECORD;
4694                         } else {
4695                                 rpz_policy = dns_rpz_decode_cname(&zrdataset,
4696                                                                   selfname);
4697                                 if (rpz_policy == DNS_RPZ_POLICY_RECORD)
4698                                         result = DNS_R_CNAME;
4699                         }
4700                         ttl = zrdataset.ttl;
4701                 } else {
4702                         rpz_policy = DNS_RPZ_POLICY_RECORD;
4703                         result = DNS_R_NXRRSET;
4704                         ttl = DNS_RPZ_TTL_DEFAULT;
4705                 }
4706
4707                 /*
4708                  * Use an overriding action specified in the configuration file
4709                  */
4710                 if (rpz->policy != DNS_RPZ_POLICY_GIVEN &&
4711                     rpz_policy != DNS_RPZ_POLICY_NO_OP)
4712                         rpz_policy = rpz->policy;
4713
4714                 /*
4715                  * We know the new prefix is at least as long as the current.
4716                  * Prefer the new answer if the new prefix is longer.
4717                  * Prefer the zone configured first if the prefixes are equal.
4718                  * With two actions from the same zone, prefer the action
4719                  * on the "smallest" name.
4720                  */
4721                 if (st->m.policy == DNS_RPZ_POLICY_MISS ||
4722                     prefix > st->m.prefix ||
4723                     rpz->num <= st->m.rpz->num ||
4724                     0 > dns_name_compare(qname, st->qname)) {
4725                         if (dns_rdataset_isassociated(st->m.rdataset))
4726                                 dns_rdataset_disassociate(st->m.rdataset);
4727                         if (st->m.node != NULL)
4728                                 dns_db_detachnode(st->m.db, &st->m.node);
4729                         if (st->m.db != NULL)
4730                                 dns_db_detach(&st->m.db);
4731                         if (st->m.zone != NULL)
4732                                 dns_zone_detach(&st->m.zone);
4733                         st->m.rpz = rpz;
4734                         st->m.type = rpz_type;
4735                         st->m.prefix = prefix;
4736                         st->m.policy = rpz_policy;
4737                         st->m.ttl = ttl;
4738                         st->m.result = result;
4739                         dns_name_copy(qname, st->qname, NULL);
4740                         if (rpz_policy == DNS_RPZ_POLICY_RECORD &&
4741                             result != DNS_R_NXRRSET) {
4742                                 dns_rdataset_clone(&zrdataset,st->m.rdataset);
4743                                 dns_db_attachnode(db, node, &st->m.node);
4744                         }
4745                         dns_db_attach(db, &st->m.db);
4746                         dns_zone_attach(zone, &st->m.zone);
4747                 }
4748                 if (dns_rdataset_isassociated(&zrdataset))
4749                         dns_rdataset_disassociate(&zrdataset);
4750         }
4751
4752         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4753         return (ISC_R_SUCCESS);
4754 }
4755 #endif
4756
4757 static isc_result_t
4758 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4759            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4760            dns_dbnode_t **nodep, dns_name_t *foundname,
4761            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4762 {
4763         dns_rbtnode_t *node = NULL;
4764         isc_result_t result;
4765         rbtdb_search_t search;
4766         isc_boolean_t cname_ok = ISC_TRUE;
4767         isc_boolean_t empty_node;
4768         nodelock_t *lock;
4769         isc_rwlocktype_t locktype;
4770         rdatasetheader_t *header, *header_prev, *header_next;
4771         rdatasetheader_t *found, *nsheader;
4772         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4773         rdatasetheader_t *update, *updatesig;
4774         rbtdb_rdatatype_t sigtype, negtype;
4775
4776         UNUSED(version);
4777
4778         search.rbtdb = (dns_rbtdb_t *)db;
4779
4780         REQUIRE(VALID_RBTDB(search.rbtdb));
4781         REQUIRE(version == NULL);
4782
4783         if (now == 0)
4784                 isc_stdtime_get(&now);
4785
4786         search.rbtversion = NULL;
4787         search.serial = 1;
4788         search.options = options;
4789         search.copy_name = ISC_FALSE;
4790         search.need_cleanup = ISC_FALSE;
4791         search.wild = ISC_FALSE;
4792         search.zonecut = NULL;
4793         dns_fixedname_init(&search.zonecut_name);
4794         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4795         search.now = now;
4796         update = NULL;
4797         updatesig = NULL;
4798
4799         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4800
4801         /*
4802          * Search down from the root of the tree.  If, while going down, we
4803          * encounter a callback node, cache_zonecut_callback() will search the
4804          * rdatasets at the zone cut for a DNAME rdataset.
4805          */
4806         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4807                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4808                                   cache_zonecut_callback, &search);
4809
4810         if (result == DNS_R_PARTIALMATCH) {
4811                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4812                         result = find_coveringnsec(&search, nodep, now,
4813                                                    foundname, rdataset,
4814                                                    sigrdataset);
4815                         if (result == DNS_R_COVERINGNSEC)
4816                                 goto tree_exit;
4817                 }
4818                 if (search.zonecut != NULL) {
4819                     result = setup_delegation(&search, nodep, foundname,
4820                                               rdataset, sigrdataset);
4821                     goto tree_exit;
4822                 } else {
4823                 find_ns:
4824                         result = find_deepest_zonecut(&search, node, nodep,
4825                                                       foundname, rdataset,
4826                                                       sigrdataset);
4827                         goto tree_exit;
4828                 }
4829         } else if (result != ISC_R_SUCCESS)
4830                 goto tree_exit;
4831
4832         /*
4833          * Certain DNSSEC types are not subject to CNAME matching
4834          * (RFC4035, section 2.5 and RFC3007).
4835          *
4836          * We don't check for RRSIG, because we don't store RRSIG records
4837          * directly.
4838          */
4839         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4840                 cname_ok = ISC_FALSE;
4841
4842         /*
4843          * We now go looking for rdata...
4844          */
4845
4846         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4847         locktype = isc_rwlocktype_read;
4848         NODE_LOCK(lock, locktype);
4849
4850         found = NULL;
4851         foundsig = NULL;
4852         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4853         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4854         nsheader = NULL;
4855         nssig = NULL;
4856         cnamesig = NULL;
4857         empty_node = ISC_TRUE;
4858         header_prev = NULL;
4859         for (header = node->data; header != NULL; header = header_next) {
4860                 header_next = header->next;
4861                 if (header->rdh_ttl <= now) {
4862                         /*
4863                          * This rdataset is stale.  If no one else is using the
4864                          * node, we can clean it up right now, otherwise we
4865                          * mark it as stale, and the node as dirty, so it will
4866                          * get cleaned up later.
4867                          */
4868                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4869                             (locktype == isc_rwlocktype_write ||
4870                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4871                                 /*
4872                                  * We update the node's status only when we
4873                                  * can get write access.
4874                                  */
4875                                 locktype = isc_rwlocktype_write;
4876
4877                                 if (dns_rbtnode_refcurrent(node) == 0) {
4878                                         isc_mem_t *mctx;
4879
4880                                         mctx = search.rbtdb->common.mctx;
4881                                         clean_stale_headers(search.rbtdb, mctx,
4882                                                             header);
4883                                         if (header_prev != NULL)
4884                                                 header_prev->next =
4885                                                         header->next;
4886                                         else
4887                                                 node->data = header->next;
4888                                         free_rdataset(search.rbtdb, mctx,
4889                                                       header);
4890                                 } else {
4891                                         header->attributes |=
4892                                                 RDATASET_ATTR_STALE;
4893                                         node->dirty = 1;
4894                                         header_prev = header;
4895                                 }
4896                         } else
4897                                 header_prev = header;
4898                 } else if (EXISTS(header)) {
4899                         /*
4900                          * We now know that there is at least one active
4901                          * non-stale rdataset at this node.
4902                          */
4903                         empty_node = ISC_FALSE;
4904
4905                         /*
4906                          * If we found a type we were looking for, remember
4907                          * it.
4908                          */
4909                         if (header->type == type ||
4910                             (type == dns_rdatatype_any &&
4911                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4912                             (cname_ok && header->type ==
4913                              dns_rdatatype_cname)) {
4914                                 /*
4915                                  * We've found the answer.
4916                                  */
4917                                 found = header;
4918                                 if (header->type == dns_rdatatype_cname &&
4919                                     cname_ok &&
4920                                     cnamesig != NULL) {
4921                                         /*
4922                                          * If we've already got the
4923                                          * CNAME RRSIG, use it.
4924                                          */
4925                                         foundsig = cnamesig;
4926                                 }
4927                         } else if (header->type == sigtype) {
4928                                 /*
4929                                  * We've found the RRSIG rdataset for our
4930                                  * target type.  Remember it.
4931                                  */
4932                                 foundsig = header;
4933                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4934                                    header->type == negtype) {
4935                                 /*
4936                                  * We've found a negative cache entry.
4937                                  */
4938                                 found = header;
4939                         } else if (header->type == dns_rdatatype_ns) {
4940                                 /*
4941                                  * Remember a NS rdataset even if we're
4942                                  * not specifically looking for it, because
4943                                  * we might need it later.
4944                                  */
4945                                 nsheader = header;
4946                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4947                                 /*
4948                                  * If we need the NS rdataset, we'll also
4949                                  * need its signature.
4950                                  */
4951                                 nssig = header;
4952                         } else if (cname_ok &&
4953                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4954                                 /*
4955                                  * If we get a CNAME match, we'll also need
4956                                  * its signature.
4957                                  */
4958                                 cnamesig = header;
4959                         }
4960                         header_prev = header;
4961                 } else
4962                         header_prev = header;
4963         }
4964
4965         if (empty_node) {
4966                 /*
4967                  * We have an exact match for the name, but there are no
4968                  * extant rdatasets.  That means that this node doesn't
4969                  * meaningfully exist, and that we really have a partial match.
4970                  */
4971                 NODE_UNLOCK(lock, locktype);
4972                 goto find_ns;
4973         }
4974
4975         /*
4976          * If we didn't find what we were looking for...
4977          */
4978         if (found == NULL ||
4979             (DNS_TRUST_ADDITIONAL(found->trust) &&
4980              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4981             (found->trust == dns_trust_glue &&
4982              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4983             (DNS_TRUST_PENDING(found->trust) &&
4984              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4985                 /*
4986                  * If there is an NS rdataset at this node, then this is the
4987                  * deepest zone cut.
4988                  */
4989                 if (nsheader != NULL) {
4990                         if (nodep != NULL) {
4991                                 new_reference(search.rbtdb, node);
4992                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4993                                 *nodep = node;
4994                         }
4995                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4996                                       rdataset);
4997                         if (need_headerupdate(nsheader, search.now))
4998                                 update = nsheader;
4999                         if (nssig != NULL) {
5000                                 bind_rdataset(search.rbtdb, node, nssig,
5001                                               search.now, sigrdataset);
5002                                 if (need_headerupdate(nssig, search.now))
5003                                         updatesig = nssig;
5004                         }
5005                         result = DNS_R_DELEGATION;
5006                         goto node_exit;
5007                 }
5008
5009                 /*
5010                  * Go find the deepest zone cut.
5011                  */
5012                 NODE_UNLOCK(lock, locktype);
5013                 goto find_ns;
5014         }
5015
5016         /*
5017          * We found what we were looking for, or we found a CNAME.
5018          */
5019
5020         if (nodep != NULL) {
5021                 new_reference(search.rbtdb, node);
5022                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5023                 *nodep = node;
5024         }
5025
5026         if (NEGATIVE(found)) {
5027                 /*
5028                  * We found a negative cache entry.
5029                  */
5030                 if (NXDOMAIN(found))
5031                         result = DNS_R_NCACHENXDOMAIN;
5032                 else
5033                         result = DNS_R_NCACHENXRRSET;
5034         } else if (type != found->type &&
5035                    type != dns_rdatatype_any &&
5036                    found->type == dns_rdatatype_cname) {
5037                 /*
5038                  * We weren't doing an ANY query and we found a CNAME instead
5039                  * of the type we were looking for, so we need to indicate
5040                  * that result to the caller.
5041                  */
5042                 result = DNS_R_CNAME;
5043         } else {
5044                 /*
5045                  * An ordinary successful query!
5046                  */
5047                 result = ISC_R_SUCCESS;
5048         }
5049
5050         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5051             result == DNS_R_NCACHENXRRSET) {
5052                 bind_rdataset(search.rbtdb, node, found, search.now,
5053                               rdataset);
5054                 if (need_headerupdate(found, search.now))
5055                         update = found;
5056                 if (!NEGATIVE(found) && foundsig != NULL) {
5057                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
5058                                       sigrdataset);
5059                         if (need_headerupdate(foundsig, search.now))
5060                                 updatesig = foundsig;
5061                 }
5062         }
5063
5064  node_exit:
5065         if ((update != NULL || updatesig != NULL) &&
5066             locktype != isc_rwlocktype_write) {
5067                 NODE_UNLOCK(lock, locktype);
5068                 NODE_LOCK(lock, isc_rwlocktype_write);
5069                 locktype = isc_rwlocktype_write;
5070                 POST(locktype);
5071         }
5072         if (update != NULL && need_headerupdate(update, search.now))
5073                 update_header(search.rbtdb, update, search.now);
5074         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5075                 update_header(search.rbtdb, updatesig, search.now);
5076
5077         NODE_UNLOCK(lock, locktype);
5078
5079  tree_exit:
5080         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5081
5082         /*
5083          * If we found a zonecut but aren't going to use it, we have to
5084          * let go of it.
5085          */
5086         if (search.need_cleanup) {
5087                 node = search.zonecut;
5088                 INSIST(node != NULL);
5089                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5090
5091                 NODE_LOCK(lock, isc_rwlocktype_read);
5092                 decrement_reference(search.rbtdb, node, 0,
5093                                     isc_rwlocktype_read, isc_rwlocktype_none,
5094                                     ISC_FALSE);
5095                 NODE_UNLOCK(lock, isc_rwlocktype_read);
5096         }
5097
5098         dns_rbtnodechain_reset(&search.chain);
5099
5100         return (result);
5101 }
5102
5103 static isc_result_t
5104 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5105                   isc_stdtime_t now, dns_dbnode_t **nodep,
5106                   dns_name_t *foundname,
5107                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5108 {
5109         dns_rbtnode_t *node = NULL;
5110         nodelock_t *lock;
5111         isc_result_t result;
5112         rbtdb_search_t search;
5113         rdatasetheader_t *header, *header_prev, *header_next;
5114         rdatasetheader_t *found, *foundsig;
5115         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5116         isc_rwlocktype_t locktype;
5117
5118         search.rbtdb = (dns_rbtdb_t *)db;
5119
5120         REQUIRE(VALID_RBTDB(search.rbtdb));
5121
5122         if (now == 0)
5123                 isc_stdtime_get(&now);
5124
5125         search.rbtversion = NULL;
5126         search.serial = 1;
5127         search.options = options;
5128         search.copy_name = ISC_FALSE;
5129         search.need_cleanup = ISC_FALSE;
5130         search.wild = ISC_FALSE;
5131         search.zonecut = NULL;
5132         dns_fixedname_init(&search.zonecut_name);
5133         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5134         search.now = now;
5135
5136         if ((options & DNS_DBFIND_NOEXACT) != 0)
5137                 rbtoptions |= DNS_RBTFIND_NOEXACT;
5138
5139         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5140
5141         /*
5142          * Search down from the root of the tree.
5143          */
5144         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5145                                   &search.chain, rbtoptions, NULL, &search);
5146
5147         if (result == DNS_R_PARTIALMATCH) {
5148         find_ns:
5149                 result = find_deepest_zonecut(&search, node, nodep, foundname,
5150                                               rdataset, sigrdataset);
5151                 goto tree_exit;
5152         } else if (result != ISC_R_SUCCESS)
5153                 goto tree_exit;
5154
5155         /*
5156          * We now go looking for an NS rdataset at the node.
5157          */
5158
5159         lock = &(search.rbtdb->node_locks[node->locknum].lock);
5160         locktype = isc_rwlocktype_read;
5161         NODE_LOCK(lock, locktype);
5162
5163         found = NULL;
5164         foundsig = NULL;
5165         header_prev = NULL;
5166         for (header = node->data; header != NULL; header = header_next) {
5167                 header_next = header->next;
5168                 if (header->rdh_ttl <= now) {
5169                         /*
5170                          * This rdataset is stale.  If no one else is using the
5171                          * node, we can clean it up right now, otherwise we
5172                          * mark it as stale, and the node as dirty, so it will
5173                          * get cleaned up later.
5174                          */
5175                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5176                             (locktype == isc_rwlocktype_write ||
5177                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5178                                 /*
5179                                  * We update the node's status only when we
5180                                  * can get write access.
5181                                  */
5182                                 locktype = isc_rwlocktype_write;
5183
5184                                 if (dns_rbtnode_refcurrent(node) == 0) {
5185                                         isc_mem_t *mctx;
5186
5187                                         mctx = search.rbtdb->common.mctx;
5188                                         clean_stale_headers(search.rbtdb, mctx,
5189                                                             header);
5190                                         if (header_prev != NULL)
5191                                                 header_prev->next =
5192                                                         header->next;
5193                                         else
5194                                                 node->data = header->next;
5195                                         free_rdataset(search.rbtdb, mctx,
5196                                                       header);
5197                                 } else {
5198                                         header->attributes |=
5199                                                 RDATASET_ATTR_STALE;
5200                                         node->dirty = 1;
5201                                         header_prev = header;
5202                                 }
5203                         } else
5204                                 header_prev = header;
5205                 } else if (EXISTS(header)) {
5206                         /*
5207                          * If we found a type we were looking for, remember
5208                          * it.
5209                          */
5210                         if (header->type == dns_rdatatype_ns) {
5211                                 /*
5212                                  * Remember a NS rdataset even if we're
5213                                  * not specifically looking for it, because
5214                                  * we might need it later.
5215                                  */
5216                                 found = header;
5217                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5218                                 /*
5219                                  * If we need the NS rdataset, we'll also
5220                                  * need its signature.
5221                                  */
5222                                 foundsig = header;
5223                         }
5224                         header_prev = header;
5225                 } else
5226                         header_prev = header;
5227         }
5228
5229         if (found == NULL) {
5230                 /*
5231                  * No NS records here.
5232                  */
5233                 NODE_UNLOCK(lock, locktype);
5234                 goto find_ns;
5235         }
5236
5237         if (nodep != NULL) {
5238                 new_reference(search.rbtdb, node);
5239                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5240                 *nodep = node;
5241         }
5242
5243         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5244         if (foundsig != NULL)
5245                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5246                               sigrdataset);
5247
5248         if (need_headerupdate(found, search.now) ||
5249             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
5250                 if (locktype != isc_rwlocktype_write) {
5251                         NODE_UNLOCK(lock, locktype);
5252                         NODE_LOCK(lock, isc_rwlocktype_write);
5253                         locktype = isc_rwlocktype_write;
5254                         POST(locktype);
5255                 }
5256                 if (need_headerupdate(found, search.now))
5257                         update_header(search.rbtdb, found, search.now);
5258                 if (foundsig != NULL &&
5259                     need_headerupdate(foundsig, search.now)) {
5260                         update_header(search.rbtdb, foundsig, search.now);
5261                 }
5262         }
5263
5264         NODE_UNLOCK(lock, locktype);
5265
5266  tree_exit:
5267         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5268
5269         INSIST(!search.need_cleanup);
5270
5271         dns_rbtnodechain_reset(&search.chain);
5272
5273         if (result == DNS_R_DELEGATION)
5274                 result = ISC_R_SUCCESS;
5275
5276         return (result);
5277 }
5278
5279 static void
5280 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5281         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5282         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5283         unsigned int refs;
5284
5285         REQUIRE(VALID_RBTDB(rbtdb));
5286         REQUIRE(targetp != NULL && *targetp == NULL);
5287
5288         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5289         dns_rbtnode_refincrement(node, &refs);
5290         INSIST(refs != 0);
5291         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5292
5293         *targetp = source;
5294 }
5295
5296 static void
5297 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5298         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5299         dns_rbtnode_t *node;
5300         isc_boolean_t want_free = ISC_FALSE;
5301         isc_boolean_t inactive = ISC_FALSE;
5302         rbtdb_nodelock_t *nodelock;
5303
5304         REQUIRE(VALID_RBTDB(rbtdb));
5305         REQUIRE(targetp != NULL && *targetp != NULL);
5306
5307         node = (dns_rbtnode_t *)(*targetp);
5308         nodelock = &rbtdb->node_locks[node->locknum];
5309
5310         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5311
5312         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5313                                 isc_rwlocktype_none, ISC_FALSE)) {
5314                 if (isc_refcount_current(&nodelock->references) == 0 &&
5315                     nodelock->exiting) {
5316                         inactive = ISC_TRUE;
5317                 }
5318         }
5319
5320         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5321
5322         *targetp = NULL;
5323
5324         if (inactive) {
5325                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5326                 rbtdb->active--;
5327                 if (rbtdb->active == 0)
5328                         want_free = ISC_TRUE;
5329                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5330                 if (want_free) {
5331                         char buf[DNS_NAME_FORMATSIZE];
5332                         if (dns_name_dynamic(&rbtdb->common.origin))
5333                                 dns_name_format(&rbtdb->common.origin, buf,
5334                                                 sizeof(buf));
5335                         else
5336                                 strcpy(buf, "<UNKNOWN>");
5337                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5338                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5339                                       "calling free_rbtdb(%s)", buf);
5340                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
5341                 }
5342         }
5343 }
5344
5345 static isc_result_t
5346 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5347         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5348         dns_rbtnode_t *rbtnode = node;
5349         rdatasetheader_t *header;
5350         isc_boolean_t force_expire = ISC_FALSE;
5351         /*
5352          * These are the category and module used by the cache cleaner.
5353          */
5354         isc_boolean_t log = ISC_FALSE;
5355         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5356         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5357         int level = ISC_LOG_DEBUG(2);
5358         char printname[DNS_NAME_FORMATSIZE];
5359
5360         REQUIRE(VALID_RBTDB(rbtdb));
5361
5362         /*
5363          * Caller must hold a tree lock.
5364          */
5365
5366         if (now == 0)
5367                 isc_stdtime_get(&now);
5368
5369         if (isc_mem_isovermem(rbtdb->common.mctx)) {
5370                 isc_uint32_t val;
5371
5372                 isc_random_get(&val);
5373                 /*
5374                  * XXXDCL Could stand to have a better policy, like LRU.
5375                  */
5376                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5377
5378                 /*
5379                  * Note that 'log' can be true IFF overmem is also true.
5380                  * overmem can currently only be true for cache
5381                  * databases -- hence all of the "overmem cache" log strings.
5382                  */
5383                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5384                 if (log)
5385                         isc_log_write(dns_lctx, category, module, level,
5386                                       "overmem cache: %s %s",
5387                                       force_expire ? "FORCE" : "check",
5388                                       dns_rbt_formatnodename(rbtnode,
5389                                                            printname,
5390                                                            sizeof(printname)));
5391         }
5392
5393         /*
5394          * We may not need write access, but this code path is not performance
5395          * sensitive, so it should be okay to always lock as a writer.
5396          */
5397         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5398                   isc_rwlocktype_write);
5399
5400         for (header = rbtnode->data; header != NULL; header = header->next)
5401                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5402                         /*
5403                          * We don't check if refcurrent(rbtnode) == 0 and try
5404                          * to free like we do in cache_find(), because
5405                          * refcurrent(rbtnode) must be non-zero.  This is so
5406                          * because 'node' is an argument to the function.
5407                          */
5408                         header->attributes |= RDATASET_ATTR_STALE;
5409                         rbtnode->dirty = 1;
5410                         if (log)
5411                                 isc_log_write(dns_lctx, category, module,
5412                                               level, "overmem cache: stale %s",
5413                                               printname);
5414                 } else if (force_expire) {
5415                         if (! RETAIN(header)) {
5416                                 set_ttl(rbtdb, header, 0);
5417                                 header->attributes |= RDATASET_ATTR_STALE;
5418                                 rbtnode->dirty = 1;
5419                         } else if (log) {
5420                                 isc_log_write(dns_lctx, category, module,
5421                                               level, "overmem cache: "
5422                                               "reprieve by RETAIN() %s",
5423                                               printname);
5424                         }
5425                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5426                         isc_log_write(dns_lctx, category, module, level,
5427                                       "overmem cache: saved %s", printname);
5428
5429         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5430                     isc_rwlocktype_write);
5431
5432         return (ISC_R_SUCCESS);
5433 }
5434
5435 static void
5436 overmem(dns_db_t *db, isc_boolean_t overmem) {
5437         /* This is an empty callback.  See adb.c:water() */
5438
5439         UNUSED(db);
5440         UNUSED(overmem);
5441
5442         return;
5443 }
5444
5445 static void
5446 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5447         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5448         dns_rbtnode_t *rbtnode = node;
5449         isc_boolean_t first;
5450
5451         REQUIRE(VALID_RBTDB(rbtdb));
5452
5453         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5454                   isc_rwlocktype_read);
5455
5456         fprintf(out, "node %p, %u references, locknum = %u\n",
5457                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5458                 rbtnode->locknum);
5459         if (rbtnode->data != NULL) {
5460                 rdatasetheader_t *current, *top_next;
5461
5462                 for (current = rbtnode->data; current != NULL;
5463                      current = top_next) {
5464                         top_next = current->next;
5465                         first = ISC_TRUE;
5466                         fprintf(out, "\ttype %u", current->type);
5467                         do {
5468                                 if (!first)
5469                                         fprintf(out, "\t");
5470                                 first = ISC_FALSE;
5471                                 fprintf(out,
5472                                         "\tserial = %lu, ttl = %u, "
5473                                         "trust = %u, attributes = %u, "
5474                                         "resign = %u\n",
5475                                         (unsigned long)current->serial,
5476                                         current->rdh_ttl,
5477                                         current->trust,
5478                                         current->attributes,
5479                                         current->resign);
5480                                 current = current->down;
5481                         } while (current != NULL);
5482                 }
5483         } else
5484                 fprintf(out, "(empty)\n");
5485
5486         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5487                     isc_rwlocktype_read);
5488 }
5489
5490 static isc_result_t
5491 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5492 {
5493         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5494         rbtdb_dbiterator_t *rbtdbiter;
5495
5496         REQUIRE(VALID_RBTDB(rbtdb));
5497
5498         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5499         if (rbtdbiter == NULL)
5500                 return (ISC_R_NOMEMORY);
5501
5502         rbtdbiter->common.methods = &dbiterator_methods;
5503         rbtdbiter->common.db = NULL;
5504         dns_db_attach(db, &rbtdbiter->common.db);
5505         rbtdbiter->common.relative_names =
5506                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5507         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5508         rbtdbiter->common.cleaning = ISC_FALSE;
5509         rbtdbiter->paused = ISC_TRUE;
5510         rbtdbiter->tree_locked = isc_rwlocktype_none;
5511         rbtdbiter->result = ISC_R_SUCCESS;
5512         dns_fixedname_init(&rbtdbiter->name);
5513         dns_fixedname_init(&rbtdbiter->origin);
5514         rbtdbiter->node = NULL;
5515         rbtdbiter->delete = 0;
5516         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5517         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5518         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5519         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5520         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5521         if (rbtdbiter->nsec3only)
5522                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5523         else
5524                 rbtdbiter->current = &rbtdbiter->chain;
5525
5526         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5527
5528         return (ISC_R_SUCCESS);
5529 }
5530
5531 static isc_result_t
5532 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5533                   dns_rdatatype_t type, dns_rdatatype_t covers,
5534                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5535                   dns_rdataset_t *sigrdataset)
5536 {
5537         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5538         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5539         rdatasetheader_t *header, *header_next, *found, *foundsig;
5540         rbtdb_serial_t serial;
5541         rbtdb_version_t *rbtversion = version;
5542         isc_boolean_t close_version = ISC_FALSE;
5543         rbtdb_rdatatype_t matchtype, sigmatchtype;
5544
5545         REQUIRE(VALID_RBTDB(rbtdb));
5546         REQUIRE(type != dns_rdatatype_any);
5547
5548         if (rbtversion == NULL) {
5549                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5550                 close_version = ISC_TRUE;
5551         }
5552         serial = rbtversion->serial;
5553         now = 0;
5554
5555         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5556                   isc_rwlocktype_read);
5557
5558         found = NULL;
5559         foundsig = NULL;
5560         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5561         if (covers == 0)
5562                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5563         else
5564                 sigmatchtype = 0;
5565
5566         for (header = rbtnode->data; header != NULL; header = header_next) {
5567                 header_next = header->next;
5568                 do {
5569                         if (header->serial <= serial &&
5570                             !IGNORE(header)) {
5571                                 /*
5572                                  * Is this a "this rdataset doesn't
5573                                  * exist" record?
5574                                  */
5575                                 if (NONEXISTENT(header))
5576                                         header = NULL;
5577                                 break;
5578                         } else
5579                                 header = header->down;
5580                 } while (header != NULL);
5581                 if (header != NULL) {
5582                         /*
5583                          * We have an active, extant rdataset.  If it's a
5584                          * type we're looking for, remember it.
5585                          */
5586                         if (header->type == matchtype) {
5587                                 found = header;
5588                                 if (foundsig != NULL)
5589                                         break;
5590                         } else if (header->type == sigmatchtype) {
5591                                 foundsig = header;
5592                                 if (found != NULL)
5593                                         break;
5594                         }
5595                 }
5596         }
5597         if (found != NULL) {
5598                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5599                 if (foundsig != NULL)
5600                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5601                                       sigrdataset);
5602         }
5603
5604         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5605                     isc_rwlocktype_read);
5606
5607         if (close_version)
5608                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5609                              ISC_FALSE);
5610
5611         if (found == NULL)
5612                 return (ISC_R_NOTFOUND);
5613
5614         return (ISC_R_SUCCESS);
5615 }
5616
5617 static isc_result_t
5618 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5619                    dns_rdatatype_t type, dns_rdatatype_t covers,
5620                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5621                    dns_rdataset_t *sigrdataset)
5622 {
5623         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5624         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5625         rdatasetheader_t *header, *header_next, *found, *foundsig;
5626         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5627         isc_result_t result;
5628         nodelock_t *lock;
5629         isc_rwlocktype_t locktype;
5630
5631         REQUIRE(VALID_RBTDB(rbtdb));
5632         REQUIRE(type != dns_rdatatype_any);
5633
5634         UNUSED(version);
5635
5636         result = ISC_R_SUCCESS;
5637
5638         if (now == 0)
5639                 isc_stdtime_get(&now);
5640
5641         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5642         locktype = isc_rwlocktype_read;
5643         NODE_LOCK(lock, locktype);
5644
5645         found = NULL;
5646         foundsig = NULL;
5647         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5648         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5649         if (covers == 0)
5650                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5651         else
5652                 sigmatchtype = 0;
5653
5654         for (header = rbtnode->data; header != NULL; header = header_next) {
5655                 header_next = header->next;
5656                 if (header->rdh_ttl <= now) {
5657                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5658                             (locktype == isc_rwlocktype_write ||
5659                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5660                                 /*
5661                                  * We update the node's status only when we
5662                                  * can get write access.
5663                                  */
5664                                 locktype = isc_rwlocktype_write;
5665
5666                                 /*
5667                                  * We don't check if refcurrent(rbtnode) == 0
5668                                  * and try to free like we do in cache_find(),
5669                                  * because refcurrent(rbtnode) must be
5670                                  * non-zero.  This is so because 'node' is an
5671                                  * argument to the function.
5672                                  */
5673                                 header->attributes |= RDATASET_ATTR_STALE;
5674                                 rbtnode->dirty = 1;
5675                         }
5676                 } else if (EXISTS(header)) {
5677                         if (header->type == matchtype)
5678                                 found = header;
5679                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5680                                  header->type == negtype)
5681                                 found = header;
5682                         else if (header->type == sigmatchtype)
5683                                 foundsig = header;
5684                 }
5685         }
5686         if (found != NULL) {
5687                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5688                 if (!NEGATIVE(found) && foundsig != NULL)
5689                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5690                                       sigrdataset);
5691         }
5692
5693         NODE_UNLOCK(lock, locktype);
5694
5695         if (found == NULL)
5696                 return (ISC_R_NOTFOUND);
5697
5698         if (NEGATIVE(found)) {
5699                 /*
5700                  * We found a negative cache entry.
5701                  */
5702                 if (NXDOMAIN(found))
5703                         result = DNS_R_NCACHENXDOMAIN;
5704                 else
5705                         result = DNS_R_NCACHENXRRSET;
5706         }
5707
5708         return (result);
5709 }
5710
5711 static isc_result_t
5712 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5713              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5714 {
5715         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5716         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5717         rbtdb_version_t *rbtversion = version;
5718         rbtdb_rdatasetiter_t *iterator;
5719         unsigned int refs;
5720
5721         REQUIRE(VALID_RBTDB(rbtdb));
5722
5723         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5724         if (iterator == NULL)
5725                 return (ISC_R_NOMEMORY);
5726
5727         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5728                 now = 0;
5729                 if (rbtversion == NULL)
5730                         currentversion(db,
5731                                  (dns_dbversion_t **) (void *)(&rbtversion));
5732                 else {
5733                         unsigned int refs;
5734
5735                         isc_refcount_increment(&rbtversion->references,
5736                                                &refs);
5737                         INSIST(refs > 1);
5738                 }
5739         } else {
5740                 if (now == 0)
5741                         isc_stdtime_get(&now);
5742                 rbtversion = NULL;
5743         }
5744
5745         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5746         iterator->common.methods = &rdatasetiter_methods;
5747         iterator->common.db = db;
5748         iterator->common.node = node;
5749         iterator->common.version = (dns_dbversion_t *)rbtversion;
5750         iterator->common.now = now;
5751
5752         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5753
5754         dns_rbtnode_refincrement(rbtnode, &refs);
5755         INSIST(refs != 0);
5756
5757         iterator->current = NULL;
5758
5759         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5760
5761         *iteratorp = (dns_rdatasetiter_t *)iterator;
5762
5763         return (ISC_R_SUCCESS);
5764 }
5765
5766 static isc_boolean_t
5767 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5768         rdatasetheader_t *header, *header_next;
5769         isc_boolean_t cname, other_data;
5770         dns_rdatatype_t rdtype;
5771
5772         /*
5773          * The caller must hold the node lock.
5774          */
5775
5776         /*
5777          * Look for CNAME and "other data" rdatasets active in our version.
5778          */
5779         cname = ISC_FALSE;
5780         other_data = ISC_FALSE;
5781         for (header = node->data; header != NULL; header = header_next) {
5782                 header_next = header->next;
5783                 if (header->type == dns_rdatatype_cname) {
5784                         /*
5785                          * Look for an active extant CNAME.
5786                          */
5787                         do {
5788                                 if (header->serial <= serial &&
5789                                     !IGNORE(header)) {
5790                                         /*
5791                                          * Is this a "this rdataset doesn't
5792                                          * exist" record?
5793                                          */
5794                                         if (NONEXISTENT(header))
5795                                                 header = NULL;
5796                                         break;
5797                                 } else
5798                                         header = header->down;
5799                         } while (header != NULL);
5800                         if (header != NULL)
5801                                 cname = ISC_TRUE;
5802                 } else {
5803                         /*
5804                          * Look for active extant "other data".
5805                          *
5806                          * "Other data" is any rdataset whose type is not
5807                          * KEY, NSEC, SIG or RRSIG.
5808                          */
5809                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5810                         if (rdtype != dns_rdatatype_key &&
5811                             rdtype != dns_rdatatype_sig &&
5812                             rdtype != dns_rdatatype_nsec &&
5813                             rdtype != dns_rdatatype_rrsig) {
5814                                 /*
5815                                  * Is it active and extant?
5816                                  */
5817                                 do {
5818                                         if (header->serial <= serial &&
5819                                             !IGNORE(header)) {
5820                                                 /*
5821                                                  * Is this a "this rdataset
5822                                                  * doesn't exist" record?
5823                                                  */
5824                                                 if (NONEXISTENT(header))
5825                                                         header = NULL;
5826                                                 break;
5827                                         } else
5828                                                 header = header->down;
5829                                 } while (header != NULL);
5830                                 if (header != NULL)
5831                                         other_data = ISC_TRUE;
5832                         }
5833                 }
5834         }
5835
5836         if (cname && other_data)
5837                 return (ISC_TRUE);
5838
5839         return (ISC_FALSE);
5840 }
5841
5842 static isc_result_t
5843 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5844         isc_result_t result;
5845
5846         INSIST(!IS_CACHE(rbtdb));
5847         INSIST(newheader->heap_index == 0);
5848         INSIST(!ISC_LINK_LINKED(newheader, link));
5849
5850         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5851         return (result);
5852 }
5853
5854 static isc_result_t
5855 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5856     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5857     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5858 {
5859         rbtdb_changed_t *changed = NULL;
5860         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5861         unsigned char *merged;
5862         isc_result_t result;
5863         isc_boolean_t header_nx;
5864         isc_boolean_t newheader_nx;
5865         isc_boolean_t merge;
5866         dns_rdatatype_t rdtype, covers;
5867         rbtdb_rdatatype_t negtype, sigtype;
5868         dns_trust_t trust;
5869         int idx;
5870
5871         /*
5872          * Add an rdatasetheader_t to a node.
5873          */
5874
5875         /*
5876          * Caller must be holding the node lock.
5877          */
5878
5879         if ((options & DNS_DBADD_MERGE) != 0) {
5880                 REQUIRE(rbtversion != NULL);
5881                 merge = ISC_TRUE;
5882         } else
5883                 merge = ISC_FALSE;
5884
5885         if ((options & DNS_DBADD_FORCE) != 0)
5886                 trust = dns_trust_ultimate;
5887         else
5888                 trust = newheader->trust;
5889
5890         if (rbtversion != NULL && !loading) {
5891                 /*
5892                  * We always add a changed record, even if no changes end up
5893                  * being made to this node, because it's harmless and
5894                  * simplifies the code.
5895                  */
5896                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5897                 if (changed == NULL) {
5898                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5899                         return (ISC_R_NOMEMORY);
5900                 }
5901         }
5902
5903         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5904         topheader_prev = NULL;
5905         sigheader = NULL;
5906         negtype = 0;
5907         if (rbtversion == NULL && !newheader_nx) {
5908                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5909                 if (NEGATIVE(newheader)) {
5910                         /*
5911                          * We're adding a negative cache entry.
5912                          */
5913                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5914                         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5915                                                         covers);
5916                         for (topheader = rbtnode->data;
5917                              topheader != NULL;
5918                              topheader = topheader->next) {
5919                                 /*
5920                                  * If we're adding an negative cache entry
5921                                  * which covers all types (NXDOMAIN,
5922                                  * NODATA(QTYPE=ANY)).
5923                                  *
5924                                  * We make all other data stale so that the
5925                                  * only rdataset that can be found at this
5926                                  * node is the negative cache entry.
5927                                  *
5928                                  * Otherwise look for any RRSIGs of the
5929                                  * given type so they can be marked stale
5930                                  * later.
5931                                  */
5932                                 if (covers == dns_rdatatype_any) {
5933                                         set_ttl(rbtdb, topheader, 0);
5934                                         topheader->attributes |=
5935                                                 RDATASET_ATTR_STALE;
5936                                         rbtnode->dirty = 1;
5937                                 } else if (topheader->type == sigtype)
5938                                         sigheader = topheader;
5939                         }
5940                         if (covers == dns_rdatatype_any)
5941                                 goto find_header;
5942                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5943                 } else {
5944                         /*
5945                          * We're adding something that isn't a
5946                          * negative cache entry.  Look for an extant
5947                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5948                          * cache entry.
5949                          */
5950                         for (topheader = rbtnode->data;
5951                              topheader != NULL;
5952                              topheader = topheader->next) {
5953                                 if (topheader->type ==
5954                                     RBTDB_RDATATYPE_NCACHEANY)
5955                                         break;
5956                         }
5957                         if (topheader != NULL && EXISTS(topheader) &&
5958                             topheader->rdh_ttl > now) {
5959                                 /*
5960                                  * Found one.
5961                                  */
5962                                 if (trust < topheader->trust) {
5963                                         /*
5964                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5965                                          * is more trusted.
5966                                          */
5967                                         free_rdataset(rbtdb,
5968                                                       rbtdb->common.mctx,
5969                                                       newheader);
5970                                         if (addedrdataset != NULL)
5971                                                 bind_rdataset(rbtdb, rbtnode,
5972                                                               topheader, now,
5973                                                               addedrdataset);
5974                                         return (DNS_R_UNCHANGED);
5975                                 }
5976                                 /*
5977                                  * The new rdataset is better.  Expire the
5978                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5979                                  */
5980                                 set_ttl(rbtdb, topheader, 0);
5981                                 topheader->attributes |= RDATASET_ATTR_STALE;
5982                                 rbtnode->dirty = 1;
5983                                 topheader = NULL;
5984                                 goto find_header;
5985                         }
5986                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5987                 }
5988         }
5989
5990         for (topheader = rbtnode->data;
5991              topheader != NULL;
5992              topheader = topheader->next) {
5993                 if (topheader->type == newheader->type ||
5994                     topheader->type == negtype)
5995                         break;
5996                 topheader_prev = topheader;
5997         }
5998
5999  find_header:
6000         /*
6001          * If header isn't NULL, we've found the right type.  There may be
6002          * IGNORE rdatasets between the top of the chain and the first real
6003          * data.  We skip over them.
6004          */
6005         header = topheader;
6006         while (header != NULL && IGNORE(header))
6007                 header = header->down;
6008         if (header != NULL) {
6009                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6010
6011                 /*
6012                  * Deleting an already non-existent rdataset has no effect.
6013                  */
6014                 if (header_nx && newheader_nx) {
6015                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6016                         return (DNS_R_UNCHANGED);
6017                 }
6018
6019                 /*
6020                  * Trying to add an rdataset with lower trust to a cache DB
6021                  * has no effect, provided that the cache data isn't stale.
6022                  */
6023                 if (rbtversion == NULL && trust < header->trust &&
6024                     (header->rdh_ttl > now || header_nx)) {
6025                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6026                         if (addedrdataset != NULL)
6027                                 bind_rdataset(rbtdb, rbtnode, header, now,
6028                                               addedrdataset);
6029                         return (DNS_R_UNCHANGED);
6030                 }
6031
6032                 /*
6033                  * Don't merge if a nonexistent rdataset is involved.
6034                  */
6035                 if (merge && (header_nx || newheader_nx))
6036                         merge = ISC_FALSE;
6037
6038                 /*
6039                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6040                  * that is the union of 'newheader' and 'header'.
6041                  */
6042                 if (merge) {
6043                         unsigned int flags = 0;
6044                         INSIST(rbtversion->serial >= header->serial);
6045                         merged = NULL;
6046                         result = ISC_R_SUCCESS;
6047
6048                         if ((options & DNS_DBADD_EXACT) != 0)
6049                                 flags |= DNS_RDATASLAB_EXACT;
6050                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6051                              newheader->rdh_ttl != header->rdh_ttl)
6052                                         result = DNS_R_NOTEXACT;
6053                         else if (newheader->rdh_ttl != header->rdh_ttl)
6054                                 flags |= DNS_RDATASLAB_FORCE;
6055                         if (result == ISC_R_SUCCESS)
6056                                 result = dns_rdataslab_merge(
6057                                              (unsigned char *)header,
6058                                              (unsigned char *)newheader,
6059                                              (unsigned int)(sizeof(*newheader)),
6060                                              rbtdb->common.mctx,
6061                                              rbtdb->common.rdclass,
6062                                              (dns_rdatatype_t)header->type,
6063                                              flags, &merged);
6064                         if (result == ISC_R_SUCCESS) {
6065                                 /*
6066                                  * If 'header' has the same serial number as
6067                                  * we do, we could clean it up now if we knew
6068                                  * that our caller had no references to it.
6069                                  * We don't know this, however, so we leave it
6070                                  * alone.  It will get cleaned up when
6071                                  * clean_zone_node() runs.
6072                                  */
6073                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6074                                               newheader);
6075                                 newheader = (rdatasetheader_t *)merged;
6076                                 init_rdataset(rbtdb, newheader);
6077                                 if (loading && RESIGN(newheader) &&
6078                                     RESIGN(header) &&
6079                                     header->resign < newheader->resign)
6080                                         newheader->resign = header->resign;
6081                         } else {
6082                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6083                                               newheader);
6084                                 return (result);
6085                         }
6086                 }
6087                 /*
6088                  * Don't replace existing NS, A and AAAA RRsets
6089                  * in the cache if they are already exist.  This
6090                  * prevents named being locked to old servers.
6091                  * Don't lower trust of existing record if the
6092                  * update is forced.
6093                  */
6094                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6095                     header->type == dns_rdatatype_ns &&
6096                     !header_nx && !newheader_nx &&
6097                     header->trust >= newheader->trust &&
6098                     dns_rdataslab_equalx((unsigned char *)header,
6099                                          (unsigned char *)newheader,
6100                                          (unsigned int)(sizeof(*newheader)),
6101                                          rbtdb->common.rdclass,
6102                                          (dns_rdatatype_t)header->type)) {
6103                         /*
6104                          * Honour the new ttl if it is less than the
6105                          * older one.
6106                          */
6107                         if (header->rdh_ttl > newheader->rdh_ttl)
6108                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6109                         if (header->noqname == NULL &&
6110                             newheader->noqname != NULL) {
6111                                 header->noqname = newheader->noqname;
6112                                 newheader->noqname = NULL;
6113                         }
6114                         if (header->closest == NULL &&
6115                             newheader->closest != NULL) {
6116                                 header->closest = newheader->closest;
6117                                 newheader->closest = NULL;
6118                         }
6119                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6120                         if (addedrdataset != NULL)
6121                                 bind_rdataset(rbtdb, rbtnode, header, now,
6122                                               addedrdataset);
6123                         return (ISC_R_SUCCESS);
6124                 }
6125                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6126                     (header->type == dns_rdatatype_a ||
6127                      header->type == dns_rdatatype_aaaa) &&
6128                     !header_nx && !newheader_nx &&
6129                     header->trust >= newheader->trust &&
6130                     dns_rdataslab_equal((unsigned char *)header,
6131                                         (unsigned char *)newheader,
6132                                         (unsigned int)(sizeof(*newheader)))) {
6133                         /*
6134                          * Honour the new ttl if it is less than the
6135                          * older one.
6136                          */
6137                         if (header->rdh_ttl > newheader->rdh_ttl)
6138                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6139                         if (header->noqname == NULL &&
6140                             newheader->noqname != NULL) {
6141                                 header->noqname = newheader->noqname;
6142                                 newheader->noqname = NULL;
6143                         }
6144                         if (header->closest == NULL &&
6145                             newheader->closest != NULL) {
6146                                 header->closest = newheader->closest;
6147                                 newheader->closest = NULL;
6148                         }
6149                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6150                         if (addedrdataset != NULL)
6151                                 bind_rdataset(rbtdb, rbtnode, header, now,
6152                                               addedrdataset);
6153                         return (ISC_R_SUCCESS);
6154                 }
6155                 INSIST(rbtversion == NULL ||
6156                        rbtversion->serial >= topheader->serial);
6157                 if (topheader_prev != NULL)
6158                         topheader_prev->next = newheader;
6159                 else
6160                         rbtnode->data = newheader;
6161                 newheader->next = topheader->next;
6162                 if (loading) {
6163                         /*
6164                          * There are no other references to 'header' when
6165                          * loading, so we MAY clean up 'header' now.
6166                          * Since we don't generate changed records when
6167                          * loading, we MUST clean up 'header' now.
6168                          */
6169                         newheader->down = NULL;
6170                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
6171                 } else {
6172                         newheader->down = topheader;
6173                         topheader->next = newheader;
6174                         rbtnode->dirty = 1;
6175                         if (changed != NULL)
6176                                 changed->dirty = ISC_TRUE;
6177                         if (rbtversion == NULL) {
6178                                 set_ttl(rbtdb, header, 0);
6179                                 header->attributes |= RDATASET_ATTR_STALE;
6180                                 if (sigheader != NULL) {
6181                                         set_ttl(rbtdb, sigheader, 0);
6182                                         sigheader->attributes |=
6183                                                  RDATASET_ATTR_STALE;
6184                                 }
6185                         }
6186                         idx = newheader->node->locknum;
6187                         if (IS_CACHE(rbtdb)) {
6188                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6189                                                  newheader, link);
6190                                 /*
6191                                  * XXXMLG We don't check the return value
6192                                  * here.  If it fails, we will not do TTL
6193                                  * based expiry on this node.  However, we
6194                                  * will do it on the LRU side, so memory
6195                                  * will not leak... for long.
6196                                  */
6197                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
6198                         } else if (RESIGN(newheader))
6199                                 resign_insert(rbtdb, idx, newheader);
6200                 }
6201         } else {
6202                 /*
6203                  * No non-IGNORED rdatasets of the given type exist at
6204                  * this node.
6205                  */
6206
6207                 /*
6208                  * If we're trying to delete the type, don't bother.
6209                  */
6210                 if (newheader_nx) {
6211                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6212                         return (DNS_R_UNCHANGED);
6213                 }
6214
6215                 if (topheader != NULL) {
6216                         /*
6217                          * We have an list of rdatasets of the given type,
6218                          * but they're all marked IGNORE.  We simply insert
6219                          * the new rdataset at the head of the list.
6220                          *
6221                          * Ignored rdatasets cannot occur during loading, so
6222                          * we INSIST on it.
6223                          */
6224                         INSIST(!loading);
6225                         INSIST(rbtversion == NULL ||
6226                                rbtversion->serial >= topheader->serial);
6227                         if (topheader_prev != NULL)
6228                                 topheader_prev->next = newheader;
6229                         else
6230                                 rbtnode->data = newheader;
6231                         newheader->next = topheader->next;
6232                         newheader->down = topheader;
6233                         topheader->next = newheader;
6234                         rbtnode->dirty = 1;
6235                         if (changed != NULL)
6236                                 changed->dirty = ISC_TRUE;
6237                 } else {
6238                         /*
6239                          * No rdatasets of the given type exist at the node.
6240                          */
6241                         newheader->next = rbtnode->data;
6242                         newheader->down = NULL;
6243                         rbtnode->data = newheader;
6244                 }
6245                 idx = newheader->node->locknum;
6246                 if (IS_CACHE(rbtdb)) {
6247                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6248                                          newheader, link);
6249                         isc_heap_insert(rbtdb->heaps[idx], newheader);
6250                 } else if (RESIGN(newheader)) {
6251                         resign_insert(rbtdb, idx, newheader);
6252                 }
6253         }
6254
6255         /*
6256          * Check if the node now contains CNAME and other data.
6257          */
6258         if (rbtversion != NULL &&
6259             cname_and_other_data(rbtnode, rbtversion->serial))
6260                 return (DNS_R_CNAMEANDOTHER);
6261
6262         if (addedrdataset != NULL)
6263                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6264
6265         return (ISC_R_SUCCESS);
6266 }
6267
6268 static inline isc_boolean_t
6269 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6270                 rbtdb_rdatatype_t type)
6271 {
6272         if (IS_CACHE(rbtdb)) {
6273                 if (type == dns_rdatatype_dname)
6274                         return (ISC_TRUE);
6275                 else
6276                         return (ISC_FALSE);
6277         } else if (type == dns_rdatatype_dname ||
6278                    (type == dns_rdatatype_ns &&
6279                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6280                 return (ISC_TRUE);
6281         return (ISC_FALSE);
6282 }
6283
6284 static inline isc_result_t
6285 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6286            dns_rdataset_t *rdataset)
6287 {
6288         struct noqname *noqname;
6289         isc_mem_t *mctx = rbtdb->common.mctx;
6290         dns_name_t name;
6291         dns_rdataset_t neg, negsig;
6292         isc_result_t result;
6293         isc_region_t r;
6294
6295         dns_name_init(&name, NULL);
6296         dns_rdataset_init(&neg);
6297         dns_rdataset_init(&negsig);
6298
6299         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6300         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6301
6302         noqname = isc_mem_get(mctx, sizeof(*noqname));
6303         if (noqname == NULL) {
6304                 result = ISC_R_NOMEMORY;
6305                 goto cleanup;
6306         }
6307         dns_name_init(&noqname->name, NULL);
6308         noqname->neg = NULL;
6309         noqname->negsig = NULL;
6310         noqname->type = neg.type;
6311         result = dns_name_dup(&name, mctx, &noqname->name);
6312         if (result != ISC_R_SUCCESS)
6313                 goto cleanup;
6314         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6315         if (result != ISC_R_SUCCESS)
6316                 goto cleanup;
6317         noqname->neg = r.base;
6318         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6319         if (result != ISC_R_SUCCESS)
6320                 goto cleanup;
6321         noqname->negsig = r.base;
6322         dns_rdataset_disassociate(&neg);
6323         dns_rdataset_disassociate(&negsig);
6324         newheader->noqname = noqname;
6325         return (ISC_R_SUCCESS);
6326
6327 cleanup:
6328         dns_rdataset_disassociate(&neg);
6329         dns_rdataset_disassociate(&negsig);
6330         free_noqname(mctx, &noqname);
6331         return(result);
6332 }
6333
6334 static inline isc_result_t
6335 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6336            dns_rdataset_t *rdataset)
6337 {
6338         struct noqname *closest;
6339         isc_mem_t *mctx = rbtdb->common.mctx;
6340         dns_name_t name;
6341         dns_rdataset_t neg, negsig;
6342         isc_result_t result;
6343         isc_region_t r;
6344
6345         dns_name_init(&name, NULL);
6346         dns_rdataset_init(&neg);
6347         dns_rdataset_init(&negsig);
6348
6349         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6350         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6351
6352         closest = isc_mem_get(mctx, sizeof(*closest));
6353         if (closest == NULL) {
6354                 result = ISC_R_NOMEMORY;
6355                 goto cleanup;
6356         }
6357         dns_name_init(&closest->name, NULL);
6358         closest->neg = NULL;
6359         closest->negsig = NULL;
6360         closest->type = neg.type;
6361         result = dns_name_dup(&name, mctx, &closest->name);
6362         if (result != ISC_R_SUCCESS)
6363                 goto cleanup;
6364         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6365         if (result != ISC_R_SUCCESS)
6366                 goto cleanup;
6367         closest->neg = r.base;
6368         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6369         if (result != ISC_R_SUCCESS)
6370                 goto cleanup;
6371         closest->negsig = r.base;
6372         dns_rdataset_disassociate(&neg);
6373         dns_rdataset_disassociate(&negsig);
6374         newheader->closest = closest;
6375         return (ISC_R_SUCCESS);
6376
6377  cleanup:
6378         dns_rdataset_disassociate(&neg);
6379         dns_rdataset_disassociate(&negsig);
6380         free_noqname(mctx, &closest);
6381         return(result);
6382 }
6383
6384 static dns_dbmethods_t zone_methods;
6385
6386 static isc_result_t
6387 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6388             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6389             dns_rdataset_t *addedrdataset)
6390 {
6391         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6392         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6393         rbtdb_version_t *rbtversion = version;
6394         isc_region_t region;
6395         rdatasetheader_t *newheader;
6396         rdatasetheader_t *header;
6397         isc_result_t result;
6398         isc_boolean_t delegating;
6399         isc_boolean_t newnsec;
6400         isc_boolean_t tree_locked = ISC_FALSE;
6401         isc_boolean_t cache_is_overmem = ISC_FALSE;
6402
6403         REQUIRE(VALID_RBTDB(rbtdb));
6404
6405         if (rbtdb->common.methods == &zone_methods)
6406                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6407                           (rdataset->type == dns_rdatatype_nsec3 ||
6408                            rdataset->covers == dns_rdatatype_nsec3)) ||
6409                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6410                            rdataset->type != dns_rdatatype_nsec3 &&
6411                            rdataset->covers != dns_rdatatype_nsec3)));
6412
6413         if (rbtversion == NULL) {
6414                 if (now == 0)
6415                         isc_stdtime_get(&now);
6416         } else
6417                 now = 0;
6418
6419         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6420                                             &region,
6421                                             sizeof(rdatasetheader_t));
6422         if (result != ISC_R_SUCCESS)
6423                 return (result);
6424
6425         newheader = (rdatasetheader_t *)region.base;
6426         init_rdataset(rbtdb, newheader);
6427         set_ttl(rbtdb, newheader, rdataset->ttl + now);
6428         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6429                                                 rdataset->covers);
6430         newheader->attributes = 0;
6431         newheader->noqname = NULL;
6432         newheader->closest = NULL;
6433         newheader->count = init_count++;
6434         newheader->trust = rdataset->trust;
6435         newheader->additional_auth = NULL;
6436         newheader->additional_glue = NULL;
6437         newheader->last_used = now;
6438         newheader->node = rbtnode;
6439         if (rbtversion != NULL) {
6440                 newheader->serial = rbtversion->serial;
6441                 now = 0;
6442
6443                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6444                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6445                         newheader->resign = rdataset->resign;
6446                 } else
6447                         newheader->resign = 0;
6448         } else {
6449                 newheader->serial = 1;
6450                 newheader->resign = 0;
6451                 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6452                         newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6453                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6454                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6455                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6456                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6457                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6458                         result = addnoqname(rbtdb, newheader, rdataset);
6459                         if (result != ISC_R_SUCCESS) {
6460                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6461                                               newheader);
6462                                 return (result);
6463                         }
6464                 }
6465                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6466                         result = addclosest(rbtdb, newheader, rdataset);
6467                         if (result != ISC_R_SUCCESS) {
6468                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6469                                               newheader);
6470                                 return (result);
6471                         }
6472                 }
6473         }
6474
6475         /*
6476          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6477          * just DNAME for the cache), then we need to set the callback bit
6478          * on the node.
6479          */
6480         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6481                 delegating = ISC_TRUE;
6482         else
6483                 delegating = ISC_FALSE;
6484
6485         /*
6486          * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6487          */
6488         if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6489             rdataset->type == dns_rdatatype_nsec)
6490                 newnsec = ISC_TRUE;
6491         else
6492                 newnsec = ISC_FALSE;
6493
6494         /*
6495          * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6496          * or the DB is a cache in an overmem state, hold an exclusive lock on
6497          * the tree.  In the latter case the lock does not necessarily have to
6498          * be acquired but it will help purge stale entries more effectively.
6499          */
6500         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6501                 cache_is_overmem = ISC_TRUE;
6502         if (delegating || newnsec || cache_is_overmem) {
6503                 tree_locked = ISC_TRUE;
6504                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6505         }
6506
6507         if (cache_is_overmem)
6508                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6509
6510         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6511                   isc_rwlocktype_write);
6512
6513         if (rbtdb->rrsetstats != NULL) {
6514                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6515                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6516         }
6517
6518         if (IS_CACHE(rbtdb)) {
6519                 if (tree_locked)
6520                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6521
6522                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6523                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6524                         expire_header(rbtdb, header, tree_locked);
6525
6526                 /*
6527                  * If we've been holding a write lock on the tree just for
6528                  * cleaning, we can release it now.  However, we still need the
6529                  * node lock.
6530                  */
6531                 if (tree_locked && !delegating && !newnsec) {
6532                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6533                         tree_locked = ISC_FALSE;
6534                 }
6535         }
6536
6537         result = ISC_R_SUCCESS;
6538         if (newnsec) {
6539                 dns_fixedname_t fname;
6540                 dns_name_t *name;
6541                 dns_rbtnode_t *nsecnode;
6542
6543                 dns_fixedname_init(&fname);
6544                 name = dns_fixedname_name(&fname);
6545                 dns_rbt_fullnamefromnode(rbtnode, name);
6546                 nsecnode = NULL;
6547                 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6548                 if (result == ISC_R_SUCCESS) {
6549                         nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6550                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6551                 } else if (result == ISC_R_EXISTS) {
6552                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6553                         result = ISC_R_SUCCESS;
6554                 }
6555         }
6556
6557         if (result == ISC_R_SUCCESS)
6558                 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6559                              ISC_FALSE, addedrdataset, now);
6560         if (result == ISC_R_SUCCESS && delegating)
6561                 rbtnode->find_callback = 1;
6562
6563         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6564                     isc_rwlocktype_write);
6565
6566         if (tree_locked)
6567                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6568
6569         /*
6570          * Update the zone's secure status.  If version is non-NULL
6571          * this is deferred until closeversion() is called.
6572          */
6573         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6574                 iszonesecure(db, version, rbtdb->origin_node);
6575
6576         return (result);
6577 }
6578
6579 static isc_result_t
6580 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6581                  dns_rdataset_t *rdataset, unsigned int options,
6582                  dns_rdataset_t *newrdataset)
6583 {
6584         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6585         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6586         rbtdb_version_t *rbtversion = version;
6587         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6588         unsigned char *subresult;
6589         isc_region_t region;
6590         isc_result_t result;
6591         rbtdb_changed_t *changed;
6592
6593         REQUIRE(VALID_RBTDB(rbtdb));
6594
6595         if (rbtdb->common.methods == &zone_methods)
6596                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6597                           (rdataset->type == dns_rdatatype_nsec3 ||
6598                            rdataset->covers == dns_rdatatype_nsec3)) ||
6599                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6600                            rdataset->type != dns_rdatatype_nsec3 &&
6601                            rdataset->covers != dns_rdatatype_nsec3)));
6602
6603         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6604                                             &region,
6605                                             sizeof(rdatasetheader_t));
6606         if (result != ISC_R_SUCCESS)
6607                 return (result);
6608         newheader = (rdatasetheader_t *)region.base;
6609         init_rdataset(rbtdb, newheader);
6610         set_ttl(rbtdb, newheader, rdataset->ttl);
6611         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6612                                                 rdataset->covers);
6613         newheader->attributes = 0;
6614         newheader->serial = rbtversion->serial;
6615         newheader->trust = 0;
6616         newheader->noqname = NULL;
6617         newheader->closest = NULL;
6618         newheader->count = init_count++;
6619         newheader->additional_auth = NULL;
6620         newheader->additional_glue = NULL;
6621         newheader->last_used = 0;
6622         newheader->node = rbtnode;
6623         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6624                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6625                 newheader->resign = rdataset->resign;
6626         } else
6627                 newheader->resign = 0;
6628
6629         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6630                   isc_rwlocktype_write);
6631
6632         changed = add_changed(rbtdb, rbtversion, rbtnode);
6633         if (changed == NULL) {
6634                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6635                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6636                             isc_rwlocktype_write);
6637                 return (ISC_R_NOMEMORY);
6638         }
6639
6640         topheader_prev = NULL;
6641         for (topheader = rbtnode->data;
6642              topheader != NULL;
6643              topheader = topheader->next) {
6644                 if (topheader->type == newheader->type)
6645                         break;
6646                 topheader_prev = topheader;
6647         }
6648         /*
6649          * If header isn't NULL, we've found the right type.  There may be
6650          * IGNORE rdatasets between the top of the chain and the first real
6651          * data.  We skip over them.
6652          */
6653         header = topheader;
6654         while (header != NULL && IGNORE(header))
6655                 header = header->down;
6656         if (header != NULL && EXISTS(header)) {
6657                 unsigned int flags = 0;
6658                 subresult = NULL;
6659                 result = ISC_R_SUCCESS;
6660                 if ((options & DNS_DBSUB_EXACT) != 0) {
6661                         flags |= DNS_RDATASLAB_EXACT;
6662                         if (newheader->rdh_ttl != header->rdh_ttl)
6663                                 result = DNS_R_NOTEXACT;
6664                 }
6665                 if (result == ISC_R_SUCCESS)
6666                         result = dns_rdataslab_subtract(
6667                                         (unsigned char *)header,
6668                                         (unsigned char *)newheader,
6669                                         (unsigned int)(sizeof(*newheader)),
6670                                         rbtdb->common.mctx,
6671                                         rbtdb->common.rdclass,
6672                                         (dns_rdatatype_t)header->type,
6673                                         flags, &subresult);
6674                 if (result == ISC_R_SUCCESS) {
6675                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6676                         newheader = (rdatasetheader_t *)subresult;
6677                         init_rdataset(rbtdb, newheader);
6678                         /*
6679                          * We have to set the serial since the rdataslab
6680                          * subtraction routine copies the reserved portion of
6681                          * header, not newheader.
6682                          */
6683                         newheader->serial = rbtversion->serial;
6684                         /*
6685                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6686                          * to additional info.  We need to clear these fields
6687                          * to avoid having duplicated references.
6688                          */
6689                         newheader->additional_auth = NULL;
6690                         newheader->additional_glue = NULL;
6691                 } else if (result == DNS_R_NXRRSET) {
6692                         /*
6693                          * This subtraction would remove all of the rdata;
6694                          * add a nonexistent header instead.
6695                          */
6696                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6697                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6698                         if (newheader == NULL) {
6699                                 result = ISC_R_NOMEMORY;
6700                                 goto unlock;
6701                         }
6702                         set_ttl(rbtdb, newheader, 0);
6703                         newheader->type = topheader->type;
6704                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6705                         newheader->trust = 0;
6706                         newheader->serial = rbtversion->serial;
6707                         newheader->noqname = NULL;
6708                         newheader->closest = NULL;
6709                         newheader->count = 0;
6710                         newheader->additional_auth = NULL;
6711                         newheader->additional_glue = NULL;
6712                         newheader->node = rbtnode;
6713                         newheader->resign = 0;
6714                         newheader->last_used = 0;
6715                 } else {
6716                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6717                         goto unlock;
6718                 }
6719
6720                 /*
6721                  * If we're here, we want to link newheader in front of
6722                  * topheader.
6723                  */
6724                 INSIST(rbtversion->serial >= topheader->serial);
6725                 if (topheader_prev != NULL)
6726                         topheader_prev->next = newheader;
6727                 else
6728                         rbtnode->data = newheader;
6729                 newheader->next = topheader->next;
6730                 newheader->down = topheader;
6731                 topheader->next = newheader;
6732                 rbtnode->dirty = 1;
6733                 changed->dirty = ISC_TRUE;
6734         } else {
6735                 /*
6736                  * The rdataset doesn't exist, so we don't need to do anything
6737                  * to satisfy the deletion request.
6738                  */
6739                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6740                 if ((options & DNS_DBSUB_EXACT) != 0)
6741                         result = DNS_R_NOTEXACT;
6742                 else
6743                         result = DNS_R_UNCHANGED;
6744         }
6745
6746         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6747                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6748
6749  unlock:
6750         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6751                     isc_rwlocktype_write);
6752
6753         /*
6754          * Update the zone's secure status.  If version is non-NULL
6755          * this is deferred until closeversion() is called.
6756          */
6757         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6758                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6759
6760         return (result);
6761 }
6762
6763 static isc_result_t
6764 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6765                dns_rdatatype_t type, dns_rdatatype_t covers)
6766 {
6767         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6768         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6769         rbtdb_version_t *rbtversion = version;
6770         isc_result_t result;
6771         rdatasetheader_t *newheader;
6772
6773         REQUIRE(VALID_RBTDB(rbtdb));
6774
6775         if (type == dns_rdatatype_any)
6776                 return (ISC_R_NOTIMPLEMENTED);
6777         if (type == dns_rdatatype_rrsig && covers == 0)
6778                 return (ISC_R_NOTIMPLEMENTED);
6779
6780         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6781         if (newheader == NULL)
6782                 return (ISC_R_NOMEMORY);
6783         set_ttl(rbtdb, newheader, 0);
6784         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6785         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6786         newheader->trust = 0;
6787         newheader->noqname = NULL;
6788         newheader->closest = NULL;
6789         newheader->additional_auth = NULL;
6790         newheader->additional_glue = NULL;
6791         if (rbtversion != NULL)
6792                 newheader->serial = rbtversion->serial;
6793         else
6794                 newheader->serial = 0;
6795         newheader->count = 0;
6796         newheader->last_used = 0;
6797         newheader->node = rbtnode;
6798
6799         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6800                   isc_rwlocktype_write);
6801
6802         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6803                      ISC_FALSE, NULL, 0);
6804
6805         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6806                     isc_rwlocktype_write);
6807
6808         /*
6809          * Update the zone's secure status.  If version is non-NULL
6810          * this is deferred until closeversion() is called.
6811          */
6812         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6813                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6814
6815         return (result);
6816 }
6817
6818 /*
6819  * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6820  */
6821 static isc_result_t
6822 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6823          isc_boolean_t hasnsec)
6824 {
6825         isc_result_t noderesult, nsecresult;
6826         dns_rbtnode_t *nsecnode;
6827
6828         noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6829
6830 #ifdef BIND9
6831         if (noderesult == ISC_R_SUCCESS)
6832                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, name);
6833 #endif
6834
6835         if (!hasnsec)
6836                 return (noderesult);
6837         if (noderesult == ISC_R_EXISTS) {
6838                 /*
6839                  * Add a node to the auxiliary NSEC tree for an old node
6840                  * just now getting an NSEC record.
6841                  */
6842                 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6843                         return (noderesult);
6844         } else if (noderesult != ISC_R_SUCCESS) {
6845                 return (noderesult);
6846         }
6847
6848         /*
6849          * Build the auxiliary tree for NSECs as we go.
6850          * This tree speeds searches for closest NSECs that would otherwise
6851          * need to examine many irrelevant nodes in large TLDs.
6852          *
6853          * Add nodes to the auxiliary tree after corresponding nodes have
6854          * been added to the main tree.
6855          */
6856         nsecnode = NULL;
6857         nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6858         if (nsecresult == ISC_R_SUCCESS) {
6859                 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6860                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6861                 return (noderesult);
6862         }
6863
6864         if (nsecresult == ISC_R_EXISTS) {
6865 #if 1 /* 0 */
6866                 isc_log_write(dns_lctx,
6867                               DNS_LOGCATEGORY_DATABASE,
6868                               DNS_LOGMODULE_CACHE,
6869                               ISC_LOG_WARNING,
6870                               "addnode: NSEC node already exists");
6871 #endif
6872                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6873                 return (noderesult);
6874         }
6875
6876         nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6877         if (nsecresult != ISC_R_SUCCESS)
6878                 isc_log_write(dns_lctx,
6879                               DNS_LOGCATEGORY_DATABASE,
6880                               DNS_LOGMODULE_CACHE,
6881                               ISC_LOG_WARNING,
6882                               "loading_addrdataset: "
6883                               "dns_rbt_deletenode: %s after "
6884                               "dns_rbt_addnode(NSEC): %s",
6885                               isc_result_totext(nsecresult),
6886                               isc_result_totext(noderesult));
6887         return (noderesult);
6888 }
6889
6890 static isc_result_t
6891 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6892         rbtdb_load_t *loadctx = arg;
6893         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6894         dns_rbtnode_t *node;
6895         isc_result_t result;
6896         isc_region_t region;
6897         rdatasetheader_t *newheader;
6898
6899         /*
6900          * This routine does no node locking.  See comments in
6901          * 'load' below for more information on loading and
6902          * locking.
6903          */
6904
6905
6906         /*
6907          * SOA records are only allowed at top of zone.
6908          */
6909         if (rdataset->type == dns_rdatatype_soa &&
6910             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6911                 return (DNS_R_NOTZONETOP);
6912
6913         if (rdataset->type != dns_rdatatype_nsec3 &&
6914             rdataset->covers != dns_rdatatype_nsec3)
6915                 add_empty_wildcards(rbtdb, name);
6916
6917         if (dns_name_iswildcard(name)) {
6918                 /*
6919                  * NS record owners cannot legally be wild cards.
6920                  */
6921                 if (rdataset->type == dns_rdatatype_ns)
6922                         return (DNS_R_INVALIDNS);
6923                 /*
6924                  * NSEC3 record owners cannot legally be wild cards.
6925                  */
6926                 if (rdataset->type == dns_rdatatype_nsec3)
6927                         return (DNS_R_INVALIDNSEC3);
6928                 result = add_wildcard_magic(rbtdb, name);
6929                 if (result != ISC_R_SUCCESS)
6930                         return (result);
6931         }
6932
6933         node = NULL;
6934         if (rdataset->type == dns_rdatatype_nsec3 ||
6935             rdataset->covers == dns_rdatatype_nsec3) {
6936                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6937                 if (result == ISC_R_SUCCESS)
6938                         node->nsec = DNS_RBT_NSEC_NSEC3;
6939         } else if (rdataset->type == dns_rdatatype_nsec) {
6940                 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6941         } else {
6942                 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6943         }
6944         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6945                 return (result);
6946         if (result == ISC_R_SUCCESS) {
6947                 dns_name_t foundname;
6948                 dns_name_init(&foundname, NULL);
6949                 dns_rbt_namefromnode(node, &foundname);
6950 #ifdef DNS_RBT_USEHASH
6951                 node->locknum = node->hashval % rbtdb->node_lock_count;
6952 #else
6953                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6954                         rbtdb->node_lock_count;
6955 #endif
6956         }
6957
6958         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6959                                             &region,
6960                                             sizeof(rdatasetheader_t));
6961         if (result != ISC_R_SUCCESS)
6962                 return (result);
6963         newheader = (rdatasetheader_t *)region.base;
6964         init_rdataset(rbtdb, newheader);
6965         set_ttl(rbtdb, newheader,
6966                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6967         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6968                                                 rdataset->covers);
6969         newheader->attributes = 0;
6970         newheader->trust = rdataset->trust;
6971         newheader->serial = 1;
6972         newheader->noqname = NULL;
6973         newheader->closest = NULL;
6974         newheader->count = init_count++;
6975         newheader->additional_auth = NULL;
6976         newheader->additional_glue = NULL;
6977         newheader->last_used = 0;
6978         newheader->node = node;
6979         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6980                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6981                 newheader->resign = rdataset->resign;
6982         } else
6983                 newheader->resign = 0;
6984
6985         result = add(rbtdb, node, rbtdb->current_version, newheader,
6986                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6987         if (result == ISC_R_SUCCESS &&
6988             delegating_type(rbtdb, node, rdataset->type))
6989                 node->find_callback = 1;
6990         else if (result == DNS_R_UNCHANGED)
6991                 result = ISC_R_SUCCESS;
6992
6993         return (result);
6994 }
6995
6996 static isc_result_t
6997 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6998         rbtdb_load_t *loadctx;
6999         dns_rbtdb_t *rbtdb;
7000
7001         rbtdb = (dns_rbtdb_t *)db;
7002
7003         REQUIRE(VALID_RBTDB(rbtdb));
7004
7005         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
7006         if (loadctx == NULL)
7007                 return (ISC_R_NOMEMORY);
7008
7009         loadctx->rbtdb = rbtdb;
7010         if (IS_CACHE(rbtdb))
7011                 isc_stdtime_get(&loadctx->now);
7012         else
7013                 loadctx->now = 0;
7014
7015         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7016
7017         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7018                 == 0);
7019         rbtdb->attributes |= RBTDB_ATTR_LOADING;
7020
7021         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7022
7023         *addp = loading_addrdataset;
7024         *dbloadp = loadctx;
7025
7026         return (ISC_R_SUCCESS);
7027 }
7028
7029 static isc_result_t
7030 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
7031         rbtdb_load_t *loadctx;
7032         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7033
7034         REQUIRE(VALID_RBTDB(rbtdb));
7035         REQUIRE(dbloadp != NULL);
7036         loadctx = *dbloadp;
7037         REQUIRE(loadctx->rbtdb == rbtdb);
7038
7039         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7040
7041         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7042         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7043
7044         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7045         rbtdb->attributes |= RBTDB_ATTR_LOADED;
7046
7047         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7048
7049         /*
7050          * If there's a KEY rdataset at the zone origin containing a
7051          * zone key, we consider the zone secure.
7052          */
7053         if (! IS_CACHE(rbtdb))
7054                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7055
7056         *dbloadp = NULL;
7057
7058         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7059
7060         return (ISC_R_SUCCESS);
7061 }
7062
7063 static isc_result_t
7064 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7065      dns_masterformat_t masterformat) {
7066         dns_rbtdb_t *rbtdb;
7067
7068         rbtdb = (dns_rbtdb_t *)db;
7069
7070         REQUIRE(VALID_RBTDB(rbtdb));
7071
7072 #ifdef BIND9
7073         return (dns_master_dump2(rbtdb->common.mctx, db, version,
7074                                  &dns_master_style_default,
7075                                  filename, masterformat));
7076 #else
7077         UNUSED(version);
7078         UNUSED(filename);
7079         UNUSED(masterformat);
7080
7081         return (ISC_R_NOTIMPLEMENTED);
7082 #endif /* BIND9 */
7083 }
7084
7085 static void
7086 delete_callback(void *data, void *arg) {
7087         dns_rbtdb_t *rbtdb = arg;
7088         rdatasetheader_t *current, *next;
7089         unsigned int locknum;
7090
7091         current = data;
7092         locknum = current->node->locknum;
7093         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7094         while (current != NULL) {
7095                 next = current->next;
7096                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
7097                 current = next;
7098         }
7099         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7100 }
7101
7102 static isc_boolean_t
7103 issecure(dns_db_t *db) {
7104         dns_rbtdb_t *rbtdb;
7105         isc_boolean_t secure;
7106
7107         rbtdb = (dns_rbtdb_t *)db;
7108
7109         REQUIRE(VALID_RBTDB(rbtdb));
7110
7111         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7112         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7113         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7114
7115         return (secure);
7116 }
7117
7118 static isc_boolean_t
7119 isdnssec(dns_db_t *db) {
7120         dns_rbtdb_t *rbtdb;
7121         isc_boolean_t dnssec;
7122
7123         rbtdb = (dns_rbtdb_t *)db;
7124
7125         REQUIRE(VALID_RBTDB(rbtdb));
7126
7127         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7128         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7129         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7130
7131         return (dnssec);
7132 }
7133
7134 static unsigned int
7135 nodecount(dns_db_t *db) {
7136         dns_rbtdb_t *rbtdb;
7137         unsigned int count;
7138
7139         rbtdb = (dns_rbtdb_t *)db;
7140
7141         REQUIRE(VALID_RBTDB(rbtdb));
7142
7143         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7144         count = dns_rbt_nodecount(rbtdb->tree);
7145         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7146
7147         return (count);
7148 }
7149
7150 static void
7151 settask(dns_db_t *db, isc_task_t *task) {
7152         dns_rbtdb_t *rbtdb;
7153
7154         rbtdb = (dns_rbtdb_t *)db;
7155
7156         REQUIRE(VALID_RBTDB(rbtdb));
7157
7158         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7159         if (rbtdb->task != NULL)
7160                 isc_task_detach(&rbtdb->task);
7161         if (task != NULL)
7162                 isc_task_attach(task, &rbtdb->task);
7163         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7164 }
7165
7166 static isc_boolean_t
7167 ispersistent(dns_db_t *db) {
7168         UNUSED(db);
7169         return (ISC_FALSE);
7170 }
7171
7172 static isc_result_t
7173 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7174         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7175         dns_rbtnode_t *onode;
7176         isc_result_t result = ISC_R_SUCCESS;
7177
7178         REQUIRE(VALID_RBTDB(rbtdb));
7179         REQUIRE(nodep != NULL && *nodep == NULL);
7180
7181         /* Note that the access to origin_node doesn't require a DB lock */
7182         onode = (dns_rbtnode_t *)rbtdb->origin_node;
7183         if (onode != NULL) {
7184                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7185                 new_reference(rbtdb, onode);
7186                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7187
7188                 *nodep = rbtdb->origin_node;
7189         } else {
7190                 INSIST(IS_CACHE(rbtdb));
7191                 result = ISC_R_NOTFOUND;
7192         }
7193
7194         return (result);
7195 }
7196
7197 static isc_result_t
7198 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7199                    isc_uint8_t *flags, isc_uint16_t *iterations,
7200                    unsigned char *salt, size_t *salt_length)
7201 {
7202         dns_rbtdb_t *rbtdb;
7203         isc_result_t result = ISC_R_NOTFOUND;
7204         rbtdb_version_t *rbtversion = version;
7205
7206         rbtdb = (dns_rbtdb_t *)db;
7207
7208         REQUIRE(VALID_RBTDB(rbtdb));
7209
7210         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7211
7212         if (rbtversion == NULL)
7213                 rbtversion = rbtdb->current_version;
7214
7215         if (rbtversion->havensec3) {
7216                 if (hash != NULL)
7217                         *hash = rbtversion->hash;
7218                 if (salt != NULL && salt_length != NULL) {
7219                         REQUIRE(*salt_length >= rbtversion->salt_length);
7220                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
7221                 }
7222                 if (salt_length != NULL)
7223                         *salt_length = rbtversion->salt_length;
7224                 if (iterations != NULL)
7225                         *iterations = rbtversion->iterations;
7226                 if (flags != NULL)
7227                         *flags = rbtversion->flags;
7228                 result = ISC_R_SUCCESS;
7229         }
7230         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7231
7232         return (result);
7233 }
7234
7235 static isc_result_t
7236 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7237         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7238         isc_stdtime_t oldresign;
7239         isc_result_t result = ISC_R_SUCCESS;
7240         rdatasetheader_t *header;
7241
7242         REQUIRE(VALID_RBTDB(rbtdb));
7243         REQUIRE(!IS_CACHE(rbtdb));
7244         REQUIRE(rdataset != NULL);
7245
7246         header = rdataset->private3;
7247         header--;
7248
7249         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7250                   isc_rwlocktype_write);
7251
7252         oldresign = header->resign;
7253         header->resign = resign;
7254         if (header->heap_index != 0) {
7255                 INSIST(RESIGN(header));
7256                 if (resign == 0) {
7257                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
7258                                         header->heap_index);
7259                         header->heap_index = 0;
7260                 } else if (resign < oldresign)
7261                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
7262                                            header->heap_index);
7263                 else if (resign > oldresign)
7264                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7265                                            header->heap_index);
7266         } else if (resign && header->heap_index == 0) {
7267                 header->attributes |= RDATASET_ATTR_RESIGN;
7268                 result = resign_insert(rbtdb, header->node->locknum, header);
7269         }
7270         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7271                     isc_rwlocktype_write);
7272         return (result);
7273 }
7274
7275 static isc_result_t
7276 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7277                dns_name_t *foundname)
7278 {
7279         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7280         rdatasetheader_t *header = NULL, *this;
7281         unsigned int i;
7282         isc_result_t result = ISC_R_NOTFOUND;
7283         unsigned int locknum;
7284
7285         REQUIRE(VALID_RBTDB(rbtdb));
7286
7287         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7288
7289         for (i = 0; i < rbtdb->node_lock_count; i++) {
7290                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7291                 this = isc_heap_element(rbtdb->heaps[i], 1);
7292                 if (this == NULL) {
7293                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7294                                     isc_rwlocktype_read);
7295                         continue;
7296                 }
7297                 if (header == NULL)
7298                         header = this;
7299                 else if (isc_serial_lt(this->resign, header->resign)) {
7300                         locknum = header->node->locknum;
7301                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7302                                     isc_rwlocktype_read);
7303                         header = this;
7304                 } else
7305                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7306                                     isc_rwlocktype_read);
7307         }
7308
7309         if (header == NULL)
7310                 goto unlock;
7311
7312         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7313
7314         if (foundname != NULL)
7315                 dns_rbt_fullnamefromnode(header->node, foundname);
7316
7317         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7318                     isc_rwlocktype_read);
7319
7320         result = ISC_R_SUCCESS;
7321
7322  unlock:
7323         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7324
7325         return (result);
7326 }
7327
7328 static void
7329 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7330 {
7331         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7332         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7333         dns_rbtnode_t *node;
7334         rdatasetheader_t *header;
7335
7336         REQUIRE(VALID_RBTDB(rbtdb));
7337         REQUIRE(rdataset != NULL);
7338         REQUIRE(rbtdb->future_version == rbtversion);
7339         REQUIRE(rbtversion->writer);
7340
7341         node = rdataset->private2;
7342         header = rdataset->private3;
7343         header--;
7344
7345         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7346         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7347                   isc_rwlocktype_write);
7348         /*
7349          * Delete from heap and save to re-signed list so that it can
7350          * be restored if we backout of this change.
7351          */
7352         new_reference(rbtdb, node);
7353         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7354         header->heap_index = 0;
7355         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7356
7357         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7358                     isc_rwlocktype_write);
7359         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7360 }
7361
7362 static dns_stats_t *
7363 getrrsetstats(dns_db_t *db) {
7364         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7365
7366         REQUIRE(VALID_RBTDB(rbtdb));
7367         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7368
7369         return (rbtdb->rrsetstats);
7370 }
7371
7372 static dns_dbmethods_t zone_methods = {
7373         attach,
7374         detach,
7375         beginload,
7376         endload,
7377         dump,
7378         currentversion,
7379         newversion,
7380         attachversion,
7381         closeversion,
7382         findnode,
7383         zone_find,
7384         zone_findzonecut,
7385         attachnode,
7386         detachnode,
7387         expirenode,
7388         printnode,
7389         createiterator,
7390         zone_findrdataset,
7391         allrdatasets,
7392         addrdataset,
7393         subtractrdataset,
7394         deleterdataset,
7395         issecure,
7396         nodecount,
7397         ispersistent,
7398         overmem,
7399         settask,
7400         getoriginnode,
7401         NULL,
7402         getnsec3parameters,
7403         findnsec3node,
7404         setsigningtime,
7405         getsigningtime,
7406         resigned,
7407         isdnssec,
7408         NULL,
7409 #ifdef BIND9
7410         get_rpz_enabled,
7411         rpz_findips
7412 #else
7413         NULL,
7414         NULL
7415 #endif
7416 };
7417
7418 static dns_dbmethods_t cache_methods = {
7419         attach,
7420         detach,
7421         beginload,
7422         endload,
7423         dump,
7424         currentversion,
7425         newversion,
7426         attachversion,
7427         closeversion,
7428         findnode,
7429         cache_find,
7430         cache_findzonecut,
7431         attachnode,
7432         detachnode,
7433         expirenode,
7434         printnode,
7435         createiterator,
7436         cache_findrdataset,
7437         allrdatasets,
7438         addrdataset,
7439         subtractrdataset,
7440         deleterdataset,
7441         issecure,
7442         nodecount,
7443         ispersistent,
7444         overmem,
7445         settask,
7446         getoriginnode,
7447         NULL,
7448         NULL,
7449         NULL,
7450         NULL,
7451         NULL,
7452         NULL,
7453         isdnssec,
7454         getrrsetstats,
7455         NULL,
7456         NULL
7457 };
7458
7459 isc_result_t
7460 #ifdef DNS_RBTDB_VERSION64
7461 dns_rbtdb64_create
7462 #else
7463 dns_rbtdb_create
7464 #endif
7465                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7466                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7467                  void *driverarg, dns_db_t **dbp)
7468 {
7469         dns_rbtdb_t *rbtdb;
7470         isc_result_t result;
7471         int i;
7472         dns_name_t name;
7473         isc_boolean_t (*sooner)(void *, void *);
7474         isc_mem_t *hmctx = mctx;
7475
7476         /* Keep the compiler happy. */
7477         UNUSED(driverarg);
7478
7479         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7480         if (rbtdb == NULL)
7481                 return (ISC_R_NOMEMORY);
7482
7483         /*
7484          * If argv[0] exists, it points to a memory context to use for heap
7485          */
7486         if (argc != 0)
7487                 hmctx = (isc_mem_t *) argv[0];
7488
7489         memset(rbtdb, '\0', sizeof(*rbtdb));
7490         dns_name_init(&rbtdb->common.origin, NULL);
7491         rbtdb->common.attributes = 0;
7492         if (type == dns_dbtype_cache) {
7493                 rbtdb->common.methods = &cache_methods;
7494                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7495         } else if (type == dns_dbtype_stub) {
7496                 rbtdb->common.methods = &zone_methods;
7497                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7498         } else
7499                 rbtdb->common.methods = &zone_methods;
7500         rbtdb->common.rdclass = rdclass;
7501         rbtdb->common.mctx = NULL;
7502
7503         result = RBTDB_INITLOCK(&rbtdb->lock);
7504         if (result != ISC_R_SUCCESS)
7505                 goto cleanup_rbtdb;
7506
7507         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7508         if (result != ISC_R_SUCCESS)
7509                 goto cleanup_lock;
7510
7511         /*
7512          * Initialize node_lock_count in a generic way to support future
7513          * extension which allows the user to specify this value on creation.
7514          * Note that when specified for a cache DB it must be larger than 1
7515          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7516          */
7517         if (rbtdb->node_lock_count == 0) {
7518                 if (IS_CACHE(rbtdb))
7519                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7520                 else
7521                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7522         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7523                 result = ISC_R_RANGE;
7524                 goto cleanup_tree_lock;
7525         }
7526         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7527         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7528                                         sizeof(rbtdb_nodelock_t));
7529         if (rbtdb->node_locks == NULL) {
7530                 result = ISC_R_NOMEMORY;
7531                 goto cleanup_tree_lock;
7532         }
7533
7534         rbtdb->rrsetstats = NULL;
7535         if (IS_CACHE(rbtdb)) {
7536                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7537                 if (result != ISC_R_SUCCESS)
7538                         goto cleanup_node_locks;
7539                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7540                                                sizeof(rdatasetheaderlist_t));
7541                 if (rbtdb->rdatasets == NULL) {
7542                         result = ISC_R_NOMEMORY;
7543                         goto cleanup_rrsetstats;
7544                 }
7545                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7546                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
7547         } else
7548                 rbtdb->rdatasets = NULL;
7549
7550         /*
7551          * Create the heaps.
7552          */
7553         rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7554                                    sizeof(isc_heap_t *));
7555         if (rbtdb->heaps == NULL) {
7556                 result = ISC_R_NOMEMORY;
7557                 goto cleanup_rdatasets;
7558         }
7559         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7560                 rbtdb->heaps[i] = NULL;
7561         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7562         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7563                 result = isc_heap_create(hmctx, sooner, set_index, 0,
7564                                          &rbtdb->heaps[i]);
7565                 if (result != ISC_R_SUCCESS)
7566                         goto cleanup_heaps;
7567         }
7568
7569         /*
7570          * Create deadnode lists.
7571          */
7572         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7573                                        sizeof(rbtnodelist_t));
7574         if (rbtdb->deadnodes == NULL) {
7575                 result = ISC_R_NOMEMORY;
7576                 goto cleanup_heaps;
7577         }
7578         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7579                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7580
7581         rbtdb->active = rbtdb->node_lock_count;
7582
7583         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7584                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7585                 if (result == ISC_R_SUCCESS) {
7586                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7587                         if (result != ISC_R_SUCCESS)
7588                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7589                 }
7590                 if (result != ISC_R_SUCCESS) {
7591                         while (i-- > 0) {
7592                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7593                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7594                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7595                         }
7596                         goto cleanup_deadnodes;
7597                 }
7598                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7599         }
7600
7601         /*
7602          * Attach to the mctx.  The database will persist so long as there
7603          * are references to it, and attaching to the mctx ensures that our
7604          * mctx won't disappear out from under us.
7605          */
7606         isc_mem_attach(mctx, &rbtdb->common.mctx);
7607         isc_mem_attach(hmctx, &rbtdb->hmctx);
7608
7609         /*
7610          * Must be initialized before free_rbtdb() is called.
7611          */
7612         isc_ondestroy_init(&rbtdb->common.ondest);
7613
7614         /*
7615          * Make a copy of the origin name.
7616          */
7617         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7618         if (result != ISC_R_SUCCESS) {
7619                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7620                 return (result);
7621         }
7622
7623         /*
7624          * Make the Red-Black Trees.
7625          */
7626         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7627         if (result != ISC_R_SUCCESS) {
7628                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7629                 return (result);
7630         }
7631
7632         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7633         if (result != ISC_R_SUCCESS) {
7634                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7635                 return (result);
7636         }
7637
7638         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7639         if (result != ISC_R_SUCCESS) {
7640                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7641                 return (result);
7642         }
7643
7644 #ifdef BIND9
7645         /*
7646          * Get ready for response policy IP address searching if at least one
7647          * zone has been configured as a response policy zone and this
7648          * is not a cache zone.
7649          * It would be better to know that this database is for a policy
7650          * zone named for a view, but that would require knowledge from
7651          * above such as an argv[] set from data in the zone.
7652          */
7653         if (type == dns_dbtype_zone && !dns_name_equal(origin, dns_rootname)) {
7654                 result = dns_rpz_new_cidr(mctx, origin, &rbtdb->rpz_cidr);
7655                 if (result != ISC_R_SUCCESS) {
7656                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7657                         return (result);
7658                 }
7659         }
7660 #endif
7661
7662         /*
7663          * In order to set the node callback bit correctly in zone databases,
7664          * we need to know if the node has the origin name of the zone.
7665          * In loading_addrdataset() we could simply compare the new name
7666          * to the origin name, but this is expensive.  Also, we don't know the
7667          * node name in addrdataset(), so we need another way of knowing the
7668          * zone's top.
7669          *
7670          * We now explicitly create a node for the zone's origin, and then
7671          * we simply remember the node's address.  This is safe, because
7672          * the top-of-zone node can never be deleted, nor can its address
7673          * change.
7674          */
7675         if (!IS_CACHE(rbtdb)) {
7676                 dns_rbtnode_t *nsec3node;
7677
7678                 rbtdb->origin_node = NULL;
7679                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7680                                          &rbtdb->origin_node);
7681                 if (result != ISC_R_SUCCESS) {
7682                         INSIST(result != ISC_R_EXISTS);
7683                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7684                         return (result);
7685                 }
7686                 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7687                 /*
7688                  * We need to give the origin node the right locknum.
7689                  */
7690                 dns_name_init(&name, NULL);
7691                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7692 #ifdef DNS_RBT_USEHASH
7693                 rbtdb->origin_node->locknum =
7694                         rbtdb->origin_node->hashval %
7695                         rbtdb->node_lock_count;
7696 #else
7697                 rbtdb->origin_node->locknum =
7698                         dns_name_hash(&name, ISC_TRUE) %
7699                         rbtdb->node_lock_count;
7700 #endif
7701                 /*
7702                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7703                  * return partial matches when there is only a single NSEC3
7704                  * record in the tree.
7705                  */
7706                 nsec3node = NULL;
7707                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7708                                          &nsec3node);
7709                 if (result != ISC_R_SUCCESS) {
7710                         INSIST(result != ISC_R_EXISTS);
7711                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7712                         return (result);
7713                 }
7714                 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
7715                 /*
7716                  * We need to give the nsec3 origin node the right locknum.
7717                  */
7718                 dns_name_init(&name, NULL);
7719                 dns_rbt_namefromnode(nsec3node, &name);
7720 #ifdef DNS_RBT_USEHASH
7721                 nsec3node->locknum = nsec3node->hashval %
7722                         rbtdb->node_lock_count;
7723 #else
7724                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7725                         rbtdb->node_lock_count;
7726 #endif
7727         }
7728
7729         /*
7730          * Misc. Initialization.
7731          */
7732         result = isc_refcount_init(&rbtdb->references, 1);
7733         if (result != ISC_R_SUCCESS) {
7734                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7735                 return (result);
7736         }
7737         rbtdb->attributes = 0;
7738         rbtdb->task = NULL;
7739
7740         /*
7741          * Version Initialization.
7742          */
7743         rbtdb->current_serial = 1;
7744         rbtdb->least_serial = 1;
7745         rbtdb->next_serial = 2;
7746         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7747         if (rbtdb->current_version == NULL) {
7748                 isc_refcount_decrement(&rbtdb->references, NULL);
7749                 isc_refcount_destroy(&rbtdb->references);
7750                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7751                 return (ISC_R_NOMEMORY);
7752         }
7753         rbtdb->current_version->secure = dns_db_insecure;
7754         rbtdb->current_version->havensec3 = ISC_FALSE;
7755         rbtdb->current_version->flags = 0;
7756         rbtdb->current_version->iterations = 0;
7757         rbtdb->current_version->hash = 0;
7758         rbtdb->current_version->salt_length = 0;
7759         memset(rbtdb->current_version->salt, 0,
7760                sizeof(rbtdb->current_version->salt));
7761         rbtdb->future_version = NULL;
7762         ISC_LIST_INIT(rbtdb->open_versions);
7763         /*
7764          * Keep the current version in the open list so that list operation
7765          * won't happen in normal lookup operations.
7766          */
7767         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7768
7769         rbtdb->common.magic = DNS_DB_MAGIC;
7770         rbtdb->common.impmagic = RBTDB_MAGIC;
7771
7772         *dbp = (dns_db_t *)rbtdb;
7773
7774         return (ISC_R_SUCCESS);
7775
7776  cleanup_deadnodes:
7777         isc_mem_put(mctx, rbtdb->deadnodes,
7778                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7779
7780  cleanup_heaps:
7781         if (rbtdb->heaps != NULL) {
7782                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7783                         if (rbtdb->heaps[i] != NULL)
7784                                 isc_heap_destroy(&rbtdb->heaps[i]);
7785                 isc_mem_put(mctx, rbtdb->heaps,
7786                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7787         }
7788
7789  cleanup_rdatasets:
7790         if (rbtdb->rdatasets != NULL)
7791                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7792                             sizeof(rdatasetheaderlist_t));
7793  cleanup_rrsetstats:
7794         if (rbtdb->rrsetstats != NULL)
7795                 dns_stats_detach(&rbtdb->rrsetstats);
7796
7797  cleanup_node_locks:
7798         isc_mem_put(mctx, rbtdb->node_locks,
7799                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7800
7801  cleanup_tree_lock:
7802         isc_rwlock_destroy(&rbtdb->tree_lock);
7803
7804  cleanup_lock:
7805         RBTDB_DESTROYLOCK(&rbtdb->lock);
7806
7807  cleanup_rbtdb:
7808         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7809         return (result);
7810 }
7811
7812
7813 /*
7814  * Slabbed Rdataset Methods
7815  */
7816
7817 static void
7818 rdataset_disassociate(dns_rdataset_t *rdataset) {
7819         dns_db_t *db = rdataset->private1;
7820         dns_dbnode_t *node = rdataset->private2;
7821
7822         detachnode(db, &node);
7823 }
7824
7825 static isc_result_t
7826 rdataset_first(dns_rdataset_t *rdataset) {
7827         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7828         unsigned int count;
7829
7830         count = raw[0] * 256 + raw[1];
7831         if (count == 0) {
7832                 rdataset->private5 = NULL;
7833                 return (ISC_R_NOMORE);
7834         }
7835
7836 #if DNS_RDATASET_FIXED
7837         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7838                 raw += 2 + (4 * count);
7839         else
7840 #endif
7841                 raw += 2;
7842
7843         /*
7844          * The privateuint4 field is the number of rdata beyond the
7845          * cursor position, so we decrement the total count by one
7846          * before storing it.
7847          *
7848          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7849          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7850          * to the first entry in the offset table.
7851          */
7852         count--;
7853         rdataset->privateuint4 = count;
7854         rdataset->private5 = raw;
7855
7856         return (ISC_R_SUCCESS);
7857 }
7858
7859 static isc_result_t
7860 rdataset_next(dns_rdataset_t *rdataset) {
7861         unsigned int count;
7862         unsigned int length;
7863         unsigned char *raw;     /* RDATASLAB */
7864
7865         count = rdataset->privateuint4;
7866         if (count == 0)
7867                 return (ISC_R_NOMORE);
7868         count--;
7869         rdataset->privateuint4 = count;
7870
7871         /*
7872          * Skip forward one record (length + 4) or one offset (4).
7873          */
7874         raw = rdataset->private5;
7875 #if DNS_RDATASET_FIXED
7876         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7877 #endif
7878                 length = raw[0] * 256 + raw[1];
7879                 raw += length;
7880 #if DNS_RDATASET_FIXED
7881         }
7882         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7883 #else
7884         rdataset->private5 = raw + 2;           /* length(2) */
7885 #endif
7886
7887         return (ISC_R_SUCCESS);
7888 }
7889
7890 static void
7891 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7892         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7893 #if DNS_RDATASET_FIXED
7894         unsigned int offset;
7895 #endif
7896         unsigned int length;
7897         isc_region_t r;
7898         unsigned int flags = 0;
7899
7900         REQUIRE(raw != NULL);
7901
7902         /*
7903          * Find the start of the record if not already in private5
7904          * then skip the length and order fields.
7905          */
7906 #if DNS_RDATASET_FIXED
7907         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7908                 offset = (raw[0] << 24) + (raw[1] << 16) +
7909                          (raw[2] << 8) + raw[3];
7910                 raw = rdataset->private3;
7911                 raw += offset;
7912         }
7913 #endif
7914         length = raw[0] * 256 + raw[1];
7915 #if DNS_RDATASET_FIXED
7916         raw += 4;
7917 #else
7918         raw += 2;
7919 #endif
7920         if (rdataset->type == dns_rdatatype_rrsig) {
7921                 if (*raw & DNS_RDATASLAB_OFFLINE)
7922                         flags |= DNS_RDATA_OFFLINE;
7923                 length--;
7924                 raw++;
7925         }
7926         r.length = length;
7927         r.base = raw;
7928         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7929         rdata->flags |= flags;
7930 }
7931
7932 static void
7933 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7934         dns_db_t *db = source->private1;
7935         dns_dbnode_t *node = source->private2;
7936         dns_dbnode_t *cloned_node = NULL;
7937
7938         attachnode(db, node, &cloned_node);
7939         *target = *source;
7940
7941         /*
7942          * Reset iterator state.
7943          */
7944         target->privateuint4 = 0;
7945         target->private5 = NULL;
7946 }
7947
7948 static unsigned int
7949 rdataset_count(dns_rdataset_t *rdataset) {
7950         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7951         unsigned int count;
7952
7953         count = raw[0] * 256 + raw[1];
7954
7955         return (count);
7956 }
7957
7958 static isc_result_t
7959 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7960                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7961 {
7962         dns_db_t *db = rdataset->private1;
7963         dns_dbnode_t *node = rdataset->private2;
7964         dns_dbnode_t *cloned_node;
7965         struct noqname *noqname = rdataset->private6;
7966
7967         cloned_node = NULL;
7968         attachnode(db, node, &cloned_node);
7969         nsec->methods = &rdataset_methods;
7970         nsec->rdclass = db->rdclass;
7971         nsec->type = noqname->type;
7972         nsec->covers = 0;
7973         nsec->ttl = rdataset->ttl;
7974         nsec->trust = rdataset->trust;
7975         nsec->private1 = rdataset->private1;
7976         nsec->private2 = rdataset->private2;
7977         nsec->private3 = noqname->neg;
7978         nsec->privateuint4 = 0;
7979         nsec->private5 = NULL;
7980         nsec->private6 = NULL;
7981         nsec->private7 = NULL;
7982
7983         cloned_node = NULL;
7984         attachnode(db, node, &cloned_node);
7985         nsecsig->methods = &rdataset_methods;
7986         nsecsig->rdclass = db->rdclass;
7987         nsecsig->type = dns_rdatatype_rrsig;
7988         nsecsig->covers = noqname->type;
7989         nsecsig->ttl = rdataset->ttl;
7990         nsecsig->trust = rdataset->trust;
7991         nsecsig->private1 = rdataset->private1;
7992         nsecsig->private2 = rdataset->private2;
7993         nsecsig->private3 = noqname->negsig;
7994         nsecsig->privateuint4 = 0;
7995         nsecsig->private5 = NULL;
7996         nsec->private6 = NULL;
7997         nsec->private7 = NULL;
7998
7999         dns_name_clone(&noqname->name, name);
8000
8001         return (ISC_R_SUCCESS);
8002 }
8003
8004 static isc_result_t
8005 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
8006                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8007 {
8008         dns_db_t *db = rdataset->private1;
8009         dns_dbnode_t *node = rdataset->private2;
8010         dns_dbnode_t *cloned_node;
8011         struct noqname *closest = rdataset->private7;
8012
8013         cloned_node = NULL;
8014         attachnode(db, node, &cloned_node);
8015         nsec->methods = &rdataset_methods;
8016         nsec->rdclass = db->rdclass;
8017         nsec->type = closest->type;
8018         nsec->covers = 0;
8019         nsec->ttl = rdataset->ttl;
8020         nsec->trust = rdataset->trust;
8021         nsec->private1 = rdataset->private1;
8022         nsec->private2 = rdataset->private2;
8023         nsec->private3 = closest->neg;
8024         nsec->privateuint4 = 0;
8025         nsec->private5 = NULL;
8026         nsec->private6 = NULL;
8027         nsec->private7 = NULL;
8028
8029         cloned_node = NULL;
8030         attachnode(db, node, &cloned_node);
8031         nsecsig->methods = &rdataset_methods;
8032         nsecsig->rdclass = db->rdclass;
8033         nsecsig->type = dns_rdatatype_rrsig;
8034         nsecsig->covers = closest->type;
8035         nsecsig->ttl = rdataset->ttl;
8036         nsecsig->trust = rdataset->trust;
8037         nsecsig->private1 = rdataset->private1;
8038         nsecsig->private2 = rdataset->private2;
8039         nsecsig->private3 = closest->negsig;
8040         nsecsig->privateuint4 = 0;
8041         nsecsig->private5 = NULL;
8042         nsec->private6 = NULL;
8043         nsec->private7 = NULL;
8044
8045         dns_name_clone(&closest->name, name);
8046
8047         return (ISC_R_SUCCESS);
8048 }
8049
8050 static void
8051 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8052         dns_rbtdb_t *rbtdb = rdataset->private1;
8053         dns_rbtnode_t *rbtnode = rdataset->private2;
8054         rdatasetheader_t *header = rdataset->private3;
8055
8056         header--;
8057         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8058                   isc_rwlocktype_write);
8059         header->trust = rdataset->trust = trust;
8060         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8061                   isc_rwlocktype_write);
8062 }
8063
8064 static void
8065 rdataset_expire(dns_rdataset_t *rdataset) {
8066         dns_rbtdb_t *rbtdb = rdataset->private1;
8067         dns_rbtnode_t *rbtnode = rdataset->private2;
8068         rdatasetheader_t *header = rdataset->private3;
8069
8070         header--;
8071         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8072                   isc_rwlocktype_write);
8073         expire_header(rbtdb, header, ISC_FALSE);
8074         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8075                   isc_rwlocktype_write);
8076 }
8077
8078 /*
8079  * Rdataset Iterator Methods
8080  */
8081
8082 static void
8083 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8084         rbtdb_rdatasetiter_t *rbtiterator;
8085
8086         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8087
8088         if (rbtiterator->common.version != NULL)
8089                 closeversion(rbtiterator->common.db,
8090                              &rbtiterator->common.version, ISC_FALSE);
8091         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8092         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8093                     sizeof(*rbtiterator));
8094
8095         *iteratorp = NULL;
8096 }
8097
8098 static isc_result_t
8099 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8100         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8101         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8102         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8103         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8104         rdatasetheader_t *header, *top_next;
8105         rbtdb_serial_t serial;
8106         isc_stdtime_t now;
8107
8108         if (IS_CACHE(rbtdb)) {
8109                 serial = 1;
8110                 now = rbtiterator->common.now;
8111         } else {
8112                 serial = rbtversion->serial;
8113                 now = 0;
8114         }
8115
8116         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8117                   isc_rwlocktype_read);
8118
8119         for (header = rbtnode->data; header != NULL; header = top_next) {
8120                 top_next = header->next;
8121                 do {
8122                         if (header->serial <= serial && !IGNORE(header)) {
8123                                 /*
8124                                  * Is this a "this rdataset doesn't exist"
8125                                  * record?  Or is it too old in the cache?
8126                                  *
8127                                  * Note: unlike everywhere else, we
8128                                  * check for now > header->rdh_ttl instead
8129                                  * of now >= header->rdh_ttl.  This allows
8130                                  * ANY and RRSIG queries for 0 TTL
8131                                  * rdatasets to work.
8132                                  */
8133                                 if (NONEXISTENT(header) ||
8134                                     (now != 0 && now > header->rdh_ttl))
8135                                         header = NULL;
8136                                 break;
8137                         } else
8138                                 header = header->down;
8139                 } while (header != NULL);
8140                 if (header != NULL)
8141                         break;
8142         }
8143
8144         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8145                     isc_rwlocktype_read);
8146
8147         rbtiterator->current = header;
8148
8149         if (header == NULL)
8150                 return (ISC_R_NOMORE);
8151
8152         return (ISC_R_SUCCESS);
8153 }
8154
8155 static isc_result_t
8156 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8157         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8158         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8159         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8160         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8161         rdatasetheader_t *header, *top_next;
8162         rbtdb_serial_t serial;
8163         isc_stdtime_t now;
8164         rbtdb_rdatatype_t type, negtype;
8165         dns_rdatatype_t rdtype, covers;
8166
8167         header = rbtiterator->current;
8168         if (header == NULL)
8169                 return (ISC_R_NOMORE);
8170
8171         if (IS_CACHE(rbtdb)) {
8172                 serial = 1;
8173                 now = rbtiterator->common.now;
8174         } else {
8175                 serial = rbtversion->serial;
8176                 now = 0;
8177         }
8178
8179         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8180                   isc_rwlocktype_read);
8181
8182         type = header->type;
8183         rdtype = RBTDB_RDATATYPE_BASE(header->type);
8184         if (NEGATIVE(header)) {
8185                 covers = RBTDB_RDATATYPE_EXT(header->type);
8186                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8187         } else
8188                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8189         for (header = header->next; header != NULL; header = top_next) {
8190                 top_next = header->next;
8191                 /*
8192                  * If not walking back up the down list.
8193                  */
8194                 if (header->type != type && header->type != negtype) {
8195                         do {
8196                                 if (header->serial <= serial &&
8197                                     !IGNORE(header)) {
8198                                         /*
8199                                          * Is this a "this rdataset doesn't
8200                                          * exist" record?
8201                                          *
8202                                          * Note: unlike everywhere else, we
8203                                          * check for now > header->ttl instead
8204                                          * of now >= header->ttl.  This allows
8205                                          * ANY and RRSIG queries for 0 TTL
8206                                          * rdatasets to work.
8207                                          */
8208                                         if ((header->attributes &
8209                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
8210                                             (now != 0 && now > header->rdh_ttl))
8211                                                 header = NULL;
8212                                         break;
8213                                 } else
8214                                         header = header->down;
8215                         } while (header != NULL);
8216                         if (header != NULL)
8217                                 break;
8218                 }
8219         }
8220
8221         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8222                     isc_rwlocktype_read);
8223
8224         rbtiterator->current = header;
8225
8226         if (header == NULL)
8227                 return (ISC_R_NOMORE);
8228
8229         return (ISC_R_SUCCESS);
8230 }
8231
8232 static void
8233 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8234         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8235         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8236         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8237         rdatasetheader_t *header;
8238
8239         header = rbtiterator->current;
8240         REQUIRE(header != NULL);
8241
8242         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8243                   isc_rwlocktype_read);
8244
8245         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8246                       rdataset);
8247
8248         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8249                     isc_rwlocktype_read);
8250 }
8251
8252
8253 /*
8254  * Database Iterator Methods
8255  */
8256
8257 static inline void
8258 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8259         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8260         dns_rbtnode_t *node = rbtdbiter->node;
8261
8262         if (node == NULL)
8263                 return;
8264
8265         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8266         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8267 }
8268
8269 static inline void
8270 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8271         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8272         dns_rbtnode_t *node = rbtdbiter->node;
8273         nodelock_t *lock;
8274
8275         if (node == NULL)
8276                 return;
8277
8278         lock = &rbtdb->node_locks[node->locknum].lock;
8279         NODE_LOCK(lock, isc_rwlocktype_read);
8280         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8281                             rbtdbiter->tree_locked, ISC_FALSE);
8282         NODE_UNLOCK(lock, isc_rwlocktype_read);
8283
8284         rbtdbiter->node = NULL;
8285 }
8286
8287 static void
8288 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8289         dns_rbtnode_t *node;
8290         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8291         isc_boolean_t was_read_locked = ISC_FALSE;
8292         nodelock_t *lock;
8293         int i;
8294
8295         if (rbtdbiter->delete != 0) {
8296                 /*
8297                  * Note that "%d node of %d in tree" can report things like
8298                  * "flush_deletions: 59 nodes of 41 in tree".  This means
8299                  * That some nodes appear on the deletions list more than
8300                  * once.  Only the last occurence will actually be deleted.
8301                  */
8302                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8303                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8304                               "flush_deletions: %d nodes of %d in tree",
8305                               rbtdbiter->delete,
8306                               dns_rbt_nodecount(rbtdb->tree));
8307
8308                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8309                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8310                         was_read_locked = ISC_TRUE;
8311                 }
8312                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8313                 rbtdbiter->tree_locked = isc_rwlocktype_write;
8314
8315                 for (i = 0; i < rbtdbiter->delete; i++) {
8316                         node = rbtdbiter->deletions[i];
8317                         lock = &rbtdb->node_locks[node->locknum].lock;
8318
8319                         NODE_LOCK(lock, isc_rwlocktype_read);
8320                         decrement_reference(rbtdb, node, 0,
8321                                             isc_rwlocktype_read,
8322                                             rbtdbiter->tree_locked, ISC_FALSE);
8323                         NODE_UNLOCK(lock, isc_rwlocktype_read);
8324                 }
8325
8326                 rbtdbiter->delete = 0;
8327
8328                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8329                 if (was_read_locked) {
8330                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8331                         rbtdbiter->tree_locked = isc_rwlocktype_read;
8332
8333                 } else {
8334                         rbtdbiter->tree_locked = isc_rwlocktype_none;
8335                 }
8336         }
8337 }
8338
8339 static inline void
8340 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8341         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8342
8343         REQUIRE(rbtdbiter->paused);
8344         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8345
8346         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8347         rbtdbiter->tree_locked = isc_rwlocktype_read;
8348
8349         rbtdbiter->paused = ISC_FALSE;
8350 }
8351
8352 static void
8353 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8354         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8355         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8356         dns_db_t *db = NULL;
8357
8358         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8359                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8360                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8361         } else
8362                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8363
8364         dereference_iter_node(rbtdbiter);
8365
8366         flush_deletions(rbtdbiter);
8367
8368         dns_db_attach(rbtdbiter->common.db, &db);
8369         dns_db_detach(&rbtdbiter->common.db);
8370
8371         dns_rbtnodechain_reset(&rbtdbiter->chain);
8372         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8373         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8374         dns_db_detach(&db);
8375
8376         *iteratorp = NULL;
8377 }
8378
8379 static isc_result_t
8380 dbiterator_first(dns_dbiterator_t *iterator) {
8381         isc_result_t result;
8382         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8383         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8384         dns_name_t *name, *origin;
8385
8386         if (rbtdbiter->result != ISC_R_SUCCESS &&
8387             rbtdbiter->result != ISC_R_NOMORE)
8388                 return (rbtdbiter->result);
8389
8390         if (rbtdbiter->paused)
8391                 resume_iteration(rbtdbiter);
8392
8393         dereference_iter_node(rbtdbiter);
8394
8395         name = dns_fixedname_name(&rbtdbiter->name);
8396         origin = dns_fixedname_name(&rbtdbiter->origin);
8397         dns_rbtnodechain_reset(&rbtdbiter->chain);
8398         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8399
8400         if (rbtdbiter->nsec3only) {
8401                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8402                 result = dns_rbtnodechain_first(rbtdbiter->current,
8403                                                 rbtdb->nsec3, name, origin);
8404         } else {
8405                 rbtdbiter->current = &rbtdbiter->chain;
8406                 result = dns_rbtnodechain_first(rbtdbiter->current,
8407                                                 rbtdb->tree, name, origin);
8408                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8409                         rbtdbiter->current = &rbtdbiter->nsec3chain;
8410                         result = dns_rbtnodechain_first(rbtdbiter->current,
8411                                                         rbtdb->nsec3, name,
8412                                                         origin);
8413                 }
8414         }
8415         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8416                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8417                                                   NULL, &rbtdbiter->node);
8418                 if (result == ISC_R_SUCCESS) {
8419                         rbtdbiter->new_origin = ISC_TRUE;
8420                         reference_iter_node(rbtdbiter);
8421                 }
8422         } else {
8423                 INSIST(result == ISC_R_NOTFOUND);
8424                 result = ISC_R_NOMORE; /* The tree is empty. */
8425         }
8426
8427         rbtdbiter->result = result;
8428
8429         return (result);
8430 }
8431
8432 static isc_result_t
8433 dbiterator_last(dns_dbiterator_t *iterator) {
8434         isc_result_t result;
8435         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8436         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8437         dns_name_t *name, *origin;
8438
8439         if (rbtdbiter->result != ISC_R_SUCCESS &&
8440             rbtdbiter->result != ISC_R_NOMORE)
8441                 return (rbtdbiter->result);
8442
8443         if (rbtdbiter->paused)
8444                 resume_iteration(rbtdbiter);
8445
8446         dereference_iter_node(rbtdbiter);
8447
8448         name = dns_fixedname_name(&rbtdbiter->name);
8449         origin = dns_fixedname_name(&rbtdbiter->origin);
8450         dns_rbtnodechain_reset(&rbtdbiter->chain);
8451         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8452
8453         result = ISC_R_NOTFOUND;
8454         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8455                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8456                 result = dns_rbtnodechain_last(rbtdbiter->current,
8457                                                rbtdb->nsec3, name, origin);
8458         }
8459         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8460                 rbtdbiter->current = &rbtdbiter->chain;
8461                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8462                                                name, origin);
8463         }
8464         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8465                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8466                                                   NULL, &rbtdbiter->node);
8467                 if (result == ISC_R_SUCCESS) {
8468                         rbtdbiter->new_origin = ISC_TRUE;
8469                         reference_iter_node(rbtdbiter);
8470                 }
8471         } else {
8472                 INSIST(result == ISC_R_NOTFOUND);
8473                 result = ISC_R_NOMORE; /* The tree is empty. */
8474         }
8475
8476         rbtdbiter->result = result;
8477
8478         return (result);
8479 }
8480
8481 static isc_result_t
8482 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8483         isc_result_t result;
8484         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8485         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8486         dns_name_t *iname, *origin;
8487
8488         if (rbtdbiter->result != ISC_R_SUCCESS &&
8489             rbtdbiter->result != ISC_R_NOTFOUND &&
8490             rbtdbiter->result != ISC_R_NOMORE)
8491                 return (rbtdbiter->result);
8492
8493         if (rbtdbiter->paused)
8494                 resume_iteration(rbtdbiter);
8495
8496         dereference_iter_node(rbtdbiter);
8497
8498         iname = dns_fixedname_name(&rbtdbiter->name);
8499         origin = dns_fixedname_name(&rbtdbiter->origin);
8500         dns_rbtnodechain_reset(&rbtdbiter->chain);
8501         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8502
8503         if (rbtdbiter->nsec3only) {
8504                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8505                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8506                                           &rbtdbiter->node,
8507                                           rbtdbiter->current,
8508                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8509         } else if (rbtdbiter->nonsec3) {
8510                 rbtdbiter->current = &rbtdbiter->chain;
8511                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8512                                           &rbtdbiter->node,
8513                                           rbtdbiter->current,
8514                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8515         } else {
8516                 /*
8517                  * Stay on main chain if not found on either chain.
8518                  */
8519                 rbtdbiter->current = &rbtdbiter->chain;
8520                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8521                                           &rbtdbiter->node,
8522                                           rbtdbiter->current,
8523                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8524                 if (result == DNS_R_PARTIALMATCH) {
8525                         dns_rbtnode_t *node = NULL;
8526                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8527                                                   &node, &rbtdbiter->nsec3chain,
8528                                                   DNS_RBTFIND_EMPTYDATA,
8529                                                   NULL, NULL);
8530                         if (result == ISC_R_SUCCESS) {
8531                                 rbtdbiter->node = node;
8532                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8533                         }
8534                 }
8535         }
8536
8537 #if 1
8538         if (result == ISC_R_SUCCESS) {
8539                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8540                                                   origin, NULL);
8541                 if (result == ISC_R_SUCCESS) {
8542                         rbtdbiter->new_origin = ISC_TRUE;
8543                         reference_iter_node(rbtdbiter);
8544                 }
8545         } else if (result == DNS_R_PARTIALMATCH) {
8546                 result = ISC_R_NOTFOUND;
8547                 rbtdbiter->node = NULL;
8548         }
8549
8550         rbtdbiter->result = result;
8551 #else
8552         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8553                 isc_result_t tresult;
8554                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8555                                                    origin, NULL);
8556                 if (tresult == ISC_R_SUCCESS) {
8557                         rbtdbiter->new_origin = ISC_TRUE;
8558                         reference_iter_node(rbtdbiter);
8559                 } else {
8560                         result = tresult;
8561                         rbtdbiter->node = NULL;
8562                 }
8563         } else
8564                 rbtdbiter->node = NULL;
8565
8566         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8567                             ISC_R_SUCCESS : result;
8568 #endif
8569
8570         return (result);
8571 }
8572
8573 static isc_result_t
8574 dbiterator_prev(dns_dbiterator_t *iterator) {
8575         isc_result_t result;
8576         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8577         dns_name_t *name, *origin;
8578         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8579
8580         REQUIRE(rbtdbiter->node != NULL);
8581
8582         if (rbtdbiter->result != ISC_R_SUCCESS)
8583                 return (rbtdbiter->result);
8584
8585         if (rbtdbiter->paused)
8586                 resume_iteration(rbtdbiter);
8587
8588         name = dns_fixedname_name(&rbtdbiter->name);
8589         origin = dns_fixedname_name(&rbtdbiter->origin);
8590         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8591         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8592             !rbtdbiter->nonsec3 &&
8593             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8594                 rbtdbiter->current = &rbtdbiter->chain;
8595                 dns_rbtnodechain_reset(rbtdbiter->current);
8596                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8597                                                name, origin);
8598                 if (result == ISC_R_NOTFOUND)
8599                         result = ISC_R_NOMORE;
8600         }
8601
8602         dereference_iter_node(rbtdbiter);
8603
8604         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8605                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8606                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8607                                                   NULL, &rbtdbiter->node);
8608         }
8609
8610         if (result == ISC_R_SUCCESS)
8611                 reference_iter_node(rbtdbiter);
8612
8613         rbtdbiter->result = result;
8614
8615         return (result);
8616 }
8617
8618 static isc_result_t
8619 dbiterator_next(dns_dbiterator_t *iterator) {
8620         isc_result_t result;
8621         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8622         dns_name_t *name, *origin;
8623         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8624
8625         REQUIRE(rbtdbiter->node != NULL);
8626
8627         if (rbtdbiter->result != ISC_R_SUCCESS)
8628                 return (rbtdbiter->result);
8629
8630         if (rbtdbiter->paused)
8631                 resume_iteration(rbtdbiter);
8632
8633         name = dns_fixedname_name(&rbtdbiter->name);
8634         origin = dns_fixedname_name(&rbtdbiter->origin);
8635         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8636         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8637             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8638                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8639                 dns_rbtnodechain_reset(rbtdbiter->current);
8640                 result = dns_rbtnodechain_first(rbtdbiter->current,
8641                                                 rbtdb->nsec3, name, origin);
8642                 if (result == ISC_R_NOTFOUND)
8643                         result = ISC_R_NOMORE;
8644         }
8645
8646         dereference_iter_node(rbtdbiter);
8647
8648         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8649                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8650                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8651                                                   NULL, &rbtdbiter->node);
8652         }
8653         if (result == ISC_R_SUCCESS)
8654                 reference_iter_node(rbtdbiter);
8655
8656         rbtdbiter->result = result;
8657
8658         return (result);
8659 }
8660
8661 static isc_result_t
8662 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8663                    dns_name_t *name)
8664 {
8665         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8666         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8667         dns_rbtnode_t *node = rbtdbiter->node;
8668         isc_result_t result;
8669         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8670         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8671
8672         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8673         REQUIRE(rbtdbiter->node != NULL);
8674
8675         if (rbtdbiter->paused)
8676                 resume_iteration(rbtdbiter);
8677
8678         if (name != NULL) {
8679                 if (rbtdbiter->common.relative_names)
8680                         origin = NULL;
8681                 result = dns_name_concatenate(nodename, origin, name, NULL);
8682                 if (result != ISC_R_SUCCESS)
8683                         return (result);
8684                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8685                         result = DNS_R_NEWORIGIN;
8686         } else
8687                 result = ISC_R_SUCCESS;
8688
8689         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8690         new_reference(rbtdb, node);
8691         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8692
8693         *nodep = rbtdbiter->node;
8694
8695         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8696                 isc_result_t expire_result;
8697
8698                 /*
8699                  * If the deletion array is full, flush it before trying
8700                  * to expire the current node.  The current node can't
8701                  * fully deleted while the iteration cursor is still on it.
8702                  */
8703                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8704                         flush_deletions(rbtdbiter);
8705
8706                 expire_result = expirenode(iterator->db, *nodep, 0);
8707
8708                 /*
8709                  * expirenode() currently always returns success.
8710                  */
8711                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8712                         unsigned int refs;
8713
8714                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8715                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8716                         dns_rbtnode_refincrement(node, &refs);
8717                         INSIST(refs != 0);
8718                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8719                 }
8720         }
8721
8722         return (result);
8723 }
8724
8725 static isc_result_t
8726 dbiterator_pause(dns_dbiterator_t *iterator) {
8727         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8728         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8729
8730         if (rbtdbiter->result != ISC_R_SUCCESS &&
8731             rbtdbiter->result != ISC_R_NOMORE)
8732                 return (rbtdbiter->result);
8733
8734         if (rbtdbiter->paused)
8735                 return (ISC_R_SUCCESS);
8736
8737         rbtdbiter->paused = ISC_TRUE;
8738
8739         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8740                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8741                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8742                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8743         }
8744
8745         flush_deletions(rbtdbiter);
8746
8747         return (ISC_R_SUCCESS);
8748 }
8749
8750 static isc_result_t
8751 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8752         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8753         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8754
8755         if (rbtdbiter->result != ISC_R_SUCCESS)
8756                 return (rbtdbiter->result);
8757
8758         return (dns_name_copy(origin, name, NULL));
8759 }
8760
8761 /*%
8762  * Additional cache routines.
8763  */
8764 static isc_result_t
8765 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8766                        dns_rdatatype_t qtype, dns_acache_t *acache,
8767                        dns_zone_t **zonep, dns_db_t **dbp,
8768                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8769                        dns_name_t *fname, dns_message_t *msg,
8770                        isc_stdtime_t now)
8771 {
8772 #ifndef BIND9
8773         UNUSED(rdataset);
8774         UNUSED(type);
8775         UNUSED(qtype);
8776         UNUSED(acache);
8777         UNUSED(zonep);
8778         UNUSED(dbp);
8779         UNUSED(versionp);
8780         UNUSED(nodep);
8781         UNUSED(fname);
8782         UNUSED(msg);
8783         UNUSED(now);
8784
8785         return (ISC_R_NOTIMPLEMENTED);
8786 #else
8787         dns_rbtdb_t *rbtdb = rdataset->private1;
8788         dns_rbtnode_t *rbtnode = rdataset->private2;
8789         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8790         unsigned int current_count = rdataset->privateuint4;
8791         unsigned int count;
8792         rdatasetheader_t *header;
8793         nodelock_t *nodelock;
8794         unsigned int total_count;
8795         acachectl_t *acarray;
8796         dns_acacheentry_t *entry;
8797         isc_result_t result;
8798
8799         UNUSED(qtype); /* we do not use this value at least for now */
8800         UNUSED(acache);
8801
8802         header = (struct rdatasetheader *)(raw - sizeof(*header));
8803
8804         total_count = raw[0] * 256 + raw[1];
8805         INSIST(total_count > current_count);
8806         count = total_count - current_count - 1;
8807
8808         acarray = NULL;
8809
8810         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8811         NODE_LOCK(nodelock, isc_rwlocktype_read);
8812
8813         switch (type) {
8814         case dns_rdatasetadditional_fromauth:
8815                 acarray = header->additional_auth;
8816                 break;
8817         case dns_rdatasetadditional_fromcache:
8818                 acarray = NULL;
8819                 break;
8820         case dns_rdatasetadditional_fromglue:
8821                 acarray = header->additional_glue;
8822                 break;
8823         default:
8824                 INSIST(0);
8825         }
8826
8827         if (acarray == NULL) {
8828                 if (type != dns_rdatasetadditional_fromcache)
8829                         dns_acache_countquerymiss(acache);
8830                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8831                 return (ISC_R_NOTFOUND);
8832         }
8833
8834         if (acarray[count].entry == NULL) {
8835                 dns_acache_countquerymiss(acache);
8836                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8837                 return (ISC_R_NOTFOUND);
8838         }
8839
8840         entry = NULL;
8841         dns_acache_attachentry(acarray[count].entry, &entry);
8842
8843         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8844
8845         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8846                                      nodep, fname, msg, now);
8847
8848         dns_acache_detachentry(&entry);
8849
8850         return (result);
8851 }
8852
8853 static void
8854 acache_callback(dns_acacheentry_t *entry, void **arg) {
8855         dns_rbtdb_t *rbtdb;
8856         dns_rbtnode_t *rbtnode;
8857         nodelock_t *nodelock;
8858         acachectl_t *acarray = NULL;
8859         acache_cbarg_t *cbarg;
8860         unsigned int count;
8861
8862         REQUIRE(arg != NULL);
8863         cbarg = *arg;
8864
8865         /*
8866          * The caller must hold the entry lock.
8867          */
8868
8869         rbtdb = (dns_rbtdb_t *)cbarg->db;
8870         rbtnode = (dns_rbtnode_t *)cbarg->node;
8871
8872         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8873         NODE_LOCK(nodelock, isc_rwlocktype_write);
8874
8875         switch (cbarg->type) {
8876         case dns_rdatasetadditional_fromauth:
8877                 acarray = cbarg->header->additional_auth;
8878                 break;
8879         case dns_rdatasetadditional_fromglue:
8880                 acarray = cbarg->header->additional_glue;
8881                 break;
8882         default:
8883                 INSIST(0);
8884         }
8885
8886         count = cbarg->count;
8887         if (acarray != NULL && acarray[count].entry == entry) {
8888                 acarray[count].entry = NULL;
8889                 INSIST(acarray[count].cbarg == cbarg);
8890                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8891                 acarray[count].cbarg = NULL;
8892         } else
8893                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8894
8895         dns_acache_detachentry(&entry);
8896
8897         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8898
8899         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8900         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8901
8902         *arg = NULL;
8903 #endif /* BIND9 */
8904 }
8905
8906 #ifdef BIND9
8907 static void
8908 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8909                       acache_cbarg_t **cbargp)
8910 {
8911         acache_cbarg_t *cbarg;
8912
8913         REQUIRE(mctx != NULL);
8914         REQUIRE(entry != NULL);
8915         REQUIRE(cbargp != NULL && *cbargp != NULL);
8916
8917         cbarg = *cbargp;
8918
8919         dns_acache_cancelentry(entry);
8920         dns_db_detachnode(cbarg->db, &cbarg->node);
8921         dns_db_detach(&cbarg->db);
8922
8923         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8924
8925         *cbargp = NULL;
8926 }
8927 #endif /* BIND9 */
8928
8929 static isc_result_t
8930 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8931                        dns_rdatatype_t qtype, dns_acache_t *acache,
8932                        dns_zone_t *zone, dns_db_t *db,
8933                        dns_dbversion_t *version, dns_dbnode_t *node,
8934                        dns_name_t *fname)
8935 {
8936 #ifndef BIND9
8937         UNUSED(rdataset);
8938         UNUSED(type);
8939         UNUSED(qtype);
8940         UNUSED(acache);
8941         UNUSED(zone);
8942         UNUSED(db);
8943         UNUSED(version);
8944         UNUSED(node);
8945         UNUSED(fname);
8946
8947         return (ISC_R_NOTIMPLEMENTED);
8948 #else
8949         dns_rbtdb_t *rbtdb = rdataset->private1;
8950         dns_rbtnode_t *rbtnode = rdataset->private2;
8951         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8952         unsigned int current_count = rdataset->privateuint4;
8953         rdatasetheader_t *header;
8954         unsigned int total_count, count;
8955         nodelock_t *nodelock;
8956         isc_result_t result;
8957         acachectl_t *acarray;
8958         dns_acacheentry_t *newentry, *oldentry = NULL;
8959         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8960
8961         UNUSED(qtype);
8962
8963         if (type == dns_rdatasetadditional_fromcache)
8964                 return (ISC_R_SUCCESS);
8965
8966         header = (struct rdatasetheader *)(raw - sizeof(*header));
8967
8968         total_count = raw[0] * 256 + raw[1];
8969         INSIST(total_count > current_count);
8970         count = total_count - current_count - 1; /* should be private data */
8971
8972         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8973         if (newcbarg == NULL)
8974                 return (ISC_R_NOMEMORY);
8975         newcbarg->type = type;
8976         newcbarg->count = count;
8977         newcbarg->header = header;
8978         newcbarg->db = NULL;
8979         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8980         newcbarg->node = NULL;
8981         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8982                           &newcbarg->node);
8983         newentry = NULL;
8984         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8985                                         acache_callback, newcbarg, &newentry);
8986         if (result != ISC_R_SUCCESS)
8987                 goto fail;
8988         /* Set cache data in the new entry. */
8989         result = dns_acache_setentry(acache, newentry, zone, db,
8990                                      version, node, fname);
8991         if (result != ISC_R_SUCCESS)
8992                 goto fail;
8993
8994         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8995         NODE_LOCK(nodelock, isc_rwlocktype_write);
8996
8997         acarray = NULL;
8998         switch (type) {
8999         case dns_rdatasetadditional_fromauth:
9000                 acarray = header->additional_auth;
9001                 break;
9002         case dns_rdatasetadditional_fromglue:
9003                 acarray = header->additional_glue;
9004                 break;
9005         default:
9006                 INSIST(0);
9007         }
9008
9009         if (acarray == NULL) {
9010                 unsigned int i;
9011
9012                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
9013                                       sizeof(acachectl_t));
9014
9015                 if (acarray == NULL) {
9016                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9017                         goto fail;
9018                 }
9019
9020                 for (i = 0; i < total_count; i++) {
9021                         acarray[i].entry = NULL;
9022                         acarray[i].cbarg = NULL;
9023                 }
9024         }
9025         switch (type) {
9026         case dns_rdatasetadditional_fromauth:
9027                 header->additional_auth = acarray;
9028                 break;
9029         case dns_rdatasetadditional_fromglue:
9030                 header->additional_glue = acarray;
9031                 break;
9032         default:
9033                 INSIST(0);
9034         }
9035
9036         if (acarray[count].entry != NULL) {
9037                 /*
9038                  * Swap the entry.  Delay cleaning-up the old entry since
9039                  * it would require a node lock.
9040                  */
9041                 oldentry = acarray[count].entry;
9042                 INSIST(acarray[count].cbarg != NULL);
9043                 oldcbarg = acarray[count].cbarg;
9044         }
9045         acarray[count].entry = newentry;
9046         acarray[count].cbarg = newcbarg;
9047
9048         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9049
9050         if (oldentry != NULL) {
9051                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9052                 dns_acache_detachentry(&oldentry);
9053         }
9054
9055         return (ISC_R_SUCCESS);
9056
9057  fail:
9058         if (newcbarg != NULL) {
9059                 if (newentry != NULL) {
9060                         acache_cancelentry(rbtdb->common.mctx, newentry,
9061                                            &newcbarg);
9062                         dns_acache_detachentry(&newentry);
9063                 } else {
9064                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9065                         dns_db_detach(&newcbarg->db);
9066                         isc_mem_put(rbtdb->common.mctx, newcbarg,
9067                             sizeof(*newcbarg));
9068                 }
9069         }
9070
9071         return (result);
9072 #endif
9073 }
9074
9075 static isc_result_t
9076 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9077                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9078 {
9079 #ifndef BIND9
9080         UNUSED(acache);
9081         UNUSED(rdataset);
9082         UNUSED(type);
9083         UNUSED(qtype);
9084
9085         return (ISC_R_NOTIMPLEMENTED);
9086 #else
9087         dns_rbtdb_t *rbtdb = rdataset->private1;
9088         dns_rbtnode_t *rbtnode = rdataset->private2;
9089         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
9090         unsigned int current_count = rdataset->privateuint4;
9091         rdatasetheader_t *header;
9092         nodelock_t *nodelock;
9093         unsigned int total_count, count;
9094         acachectl_t *acarray;
9095         dns_acacheentry_t *entry;
9096         acache_cbarg_t *cbarg;
9097
9098         UNUSED(qtype);          /* we do not use this value at least for now */
9099         UNUSED(acache);
9100
9101         if (type == dns_rdatasetadditional_fromcache)
9102                 return (ISC_R_SUCCESS);
9103
9104         header = (struct rdatasetheader *)(raw - sizeof(*header));
9105
9106         total_count = raw[0] * 256 + raw[1];
9107         INSIST(total_count > current_count);
9108         count = total_count - current_count - 1;
9109
9110         acarray = NULL;
9111         entry = NULL;
9112
9113         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9114         NODE_LOCK(nodelock, isc_rwlocktype_write);
9115
9116         switch (type) {
9117         case dns_rdatasetadditional_fromauth:
9118                 acarray = header->additional_auth;
9119                 break;
9120         case dns_rdatasetadditional_fromglue:
9121                 acarray = header->additional_glue;
9122                 break;
9123         default:
9124                 INSIST(0);
9125         }
9126
9127         if (acarray == NULL) {
9128                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9129                 return (ISC_R_NOTFOUND);
9130         }
9131
9132         entry = acarray[count].entry;
9133         if (entry == NULL) {
9134                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9135                 return (ISC_R_NOTFOUND);
9136         }
9137
9138         acarray[count].entry = NULL;
9139         cbarg = acarray[count].cbarg;
9140         acarray[count].cbarg = NULL;
9141
9142         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9143
9144         if (entry != NULL) {
9145                 if (cbarg != NULL)
9146                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9147                 dns_acache_detachentry(&entry);
9148         }
9149
9150         return (ISC_R_SUCCESS);
9151 #endif
9152 }
9153
9154 /*%
9155  * Routines for LRU-based cache management.
9156  */
9157
9158 /*%
9159  * See if a given cache entry that is being reused needs to be updated
9160  * in the LRU-list.  From the LRU management point of view, this function is
9161  * expected to return true for almost all cases.  When used with threads,
9162  * however, this may cause a non-negligible performance penalty because a
9163  * writer lock will have to be acquired before updating the list.
9164  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9165  * function returns true if the entry has not been updated for some period of
9166  * time.  We differentiate the NS or glue address case and the others since
9167  * experiments have shown that the former tends to be accessed relatively
9168  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9169  * may cause external queries at a higher level zone, involving more
9170  * transactions).
9171  *
9172  * Caller must hold the node (read or write) lock.
9173  */
9174 static inline isc_boolean_t
9175 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9176         if ((header->attributes &
9177              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9178                 return (ISC_FALSE);
9179
9180 #if DNS_RBTDB_LIMITLRUUPDATE
9181         if (header->type == dns_rdatatype_ns ||
9182             (header->trust == dns_trust_glue &&
9183              (header->type == dns_rdatatype_a ||
9184               header->type == dns_rdatatype_aaaa))) {
9185                 /*
9186                  * Glue records are updated if at least 60 seconds have passed
9187                  * since the previous update time.
9188                  */
9189                 return (header->last_used + 60 <= now);
9190         }
9191
9192         /* Other records are updated if 5 minutes have passed. */
9193         return (header->last_used + 300 <= now);
9194 #else
9195         UNUSED(now);
9196
9197         return (ISC_TRUE);
9198 #endif
9199 }
9200
9201 /*%
9202  * Update the timestamp of a given cache entry and move it to the head
9203  * of the corresponding LRU list.
9204  *
9205  * Caller must hold the node (write) lock.
9206  *
9207  * Note that the we do NOT touch the heap here, as the TTL has not changed.
9208  */
9209 static void
9210 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9211               isc_stdtime_t now)
9212 {
9213         INSIST(IS_CACHE(rbtdb));
9214
9215         /* To be checked: can we really assume this? XXXMLG */
9216         INSIST(ISC_LINK_LINKED(header, link));
9217
9218         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9219         header->last_used = now;
9220         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9221 }
9222
9223 /*%
9224  * Purge some expired and/or stale (i.e. unused for some period) cache entries
9225  * under an overmem condition.  To recover from this condition quickly, up to
9226  * 2 entries will be purged.  This process is triggered while adding a new
9227  * entry, and we specifically avoid purging entries in the same LRU bucket as
9228  * the one to which the new entry will belong.  Otherwise, we might purge
9229  * entries of the same name of different RR types while adding RRsets from a
9230  * single response (consider the case where we're adding A and AAAA glue records
9231  * of the same NS name).
9232  */
9233 static void
9234 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9235               isc_stdtime_t now, isc_boolean_t tree_locked)
9236 {
9237         rdatasetheader_t *header, *header_prev;
9238         unsigned int locknum;
9239         int purgecount = 2;
9240
9241         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9242              locknum != locknum_start && purgecount > 0;
9243              locknum = (locknum + 1) % rbtdb->node_lock_count) {
9244                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9245                           isc_rwlocktype_write);
9246
9247                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
9248                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
9249                         expire_header(rbtdb, header, tree_locked);
9250                         purgecount--;
9251                 }
9252
9253                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9254                      header != NULL && purgecount > 0;
9255                      header = header_prev) {
9256                         header_prev = ISC_LIST_PREV(header, link);
9257                         /*
9258                          * Unlink the entry at this point to avoid checking it
9259                          * again even if it's currently used someone else and
9260                          * cannot be purged at this moment.  This entry won't be
9261                          * referenced any more (so unlinking is safe) since the
9262                          * TTL was reset to 0.
9263                          */
9264                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9265                                         link);
9266                         expire_header(rbtdb, header, tree_locked);
9267                         purgecount--;
9268                 }
9269
9270                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9271                                     isc_rwlocktype_write);
9272         }
9273 }
9274
9275 static void
9276 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9277               isc_boolean_t tree_locked)
9278 {
9279         set_ttl(rbtdb, header, 0);
9280         header->attributes |= RDATASET_ATTR_STALE;
9281         header->node->dirty = 1;
9282
9283         /*
9284          * Caller must hold the node (write) lock.
9285          */
9286
9287         if (dns_rbtnode_refcurrent(header->node) == 0) {
9288                 /*
9289                  * If no one else is using the node, we can clean it up now.
9290                  * We first need to gain a new reference to the node to meet a
9291                  * requirement of decrement_reference().
9292                  */
9293                 new_reference(rbtdb, header->node);
9294                 decrement_reference(rbtdb, header->node, 0,
9295                                     isc_rwlocktype_write,
9296                                     tree_locked ? isc_rwlocktype_write :
9297                                     isc_rwlocktype_none, ISC_FALSE);
9298         }
9299 }