]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/bind9/lib/dns/rbtdb.c
IFH@204581
[FreeBSD/FreeBSD.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2009  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.6.10.2 2009/12/31 21:44:36 each Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) lru_link;
262         /*%<
263          * Used for LRU-based cache management.  We should probably make
264          * these cache-DB specific.  We might also make it a pointer and
265          * ensure only the top header has a valid link to save memory.
266          * The linked-list is locked by the rbtdb->lrulock.
267          */
268
269         /*
270          * It's possible this should not be here anymore, but instead
271          * referenced from the bucket's heap directly.
272          */
273 #if 0
274         isc_heap_t                      *heap;
275 #endif
276         unsigned int                    heap_index;
277         /*%<
278          * Used for TTL-based cache cleaning.
279          */
280         isc_stdtime_t                   resign;
281 } rdatasetheader_t;
282
283 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
284 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
285
286 #define RDATASET_ATTR_NONEXISTENT       0x0001
287 #define RDATASET_ATTR_STALE             0x0002
288 #define RDATASET_ATTR_IGNORE            0x0004
289 #define RDATASET_ATTR_RETAIN            0x0008
290 #define RDATASET_ATTR_NXDOMAIN          0x0010
291 #define RDATASET_ATTR_RESIGN            0x0020
292 #define RDATASET_ATTR_STATCOUNT         0x0040
293 #define RDATASET_ATTR_OPTOUT            0x0080
294
295 typedef struct acache_cbarg {
296         dns_rdatasetadditional_t        type;
297         unsigned int                    count;
298         dns_db_t                        *db;
299         dns_dbnode_t                    *node;
300         rdatasetheader_t                *header;
301 } acache_cbarg_t;
302
303 struct acachectl {
304         dns_acacheentry_t               *entry;
305         acache_cbarg_t                  *cbarg;
306 };
307
308 /*
309  * XXX
310  * When the cache will pre-expire data (due to memory low or other
311  * situations) before the rdataset's TTL has expired, it MUST
312  * respect the RETAIN bit and not expire the data until its TTL is
313  * expired.
314  */
315
316 #undef IGNORE                   /* WIN32 winbase.h defines this. */
317
318 #define EXISTS(header) \
319         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
320 #define NONEXISTENT(header) \
321         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
322 #define IGNORE(header) \
323         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
324 #define RETAIN(header) \
325         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
326 #define NXDOMAIN(header) \
327         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
328 #define RESIGN(header) \
329         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
330 #define OPTOUT(header) \
331         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
332
333 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
334
335 /*%
336  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
337  * There is a tradeoff issue about configuring this value: if this is too
338  * small, it may cause heavier contention between threads; if this is too large,
339  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
340  * The default value should work well for most environments, but this can
341  * also be configurable at compilation time via the
342  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
343  * 1 due to the assumption of overmem_purge().
344  */
345 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
346 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
347 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
348 #else
349 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
350 #endif
351 #else
352 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
353 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
354
355 typedef struct {
356         nodelock_t                      lock;
357         /* Protected in the refcount routines. */
358         isc_refcount_t                  references;
359         /* Locked by lock. */
360         isc_boolean_t                   exiting;
361 } rbtdb_nodelock_t;
362
363 typedef struct rbtdb_changed {
364         dns_rbtnode_t *                 node;
365         isc_boolean_t                   dirty;
366         ISC_LINK(struct rbtdb_changed)  link;
367 } rbtdb_changed_t;
368
369 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
370
371 typedef enum {
372         dns_db_insecure,
373         dns_db_partial,
374         dns_db_secure
375 } dns_db_secure_t;
376
377 typedef struct rbtdb_version {
378         /* Not locked */
379         rbtdb_serial_t                  serial;
380         /*
381          * Protected in the refcount routines.
382          * XXXJT: should we change the lock policy based on the refcount
383          * performance?
384          */
385         isc_refcount_t                  references;
386         /* Locked by database lock. */
387         isc_boolean_t                   writer;
388         isc_boolean_t                   commit_ok;
389         rbtdb_changedlist_t             changed_list;
390         rdatasetheaderlist_t            resigned_list;
391         ISC_LINK(struct rbtdb_version)  link;
392         dns_db_secure_t                 secure;
393         isc_boolean_t                   havensec3;
394         /* NSEC3 parameters */
395         dns_hash_t                      hash;
396         isc_uint8_t                     flags;
397         isc_uint16_t                    iterations;
398         isc_uint8_t                     salt_length;
399         unsigned char                   salt[NSEC3_MAX_HASH_LENGTH];
400 } rbtdb_version_t;
401
402 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
403
404 typedef struct {
405         /* Unlocked. */
406         dns_db_t                        common;
407 #if DNS_RBTDB_USERWLOCK
408         isc_rwlock_t                    lock;
409 #else
410         isc_mutex_t                     lock;
411 #endif
412         isc_rwlock_t                    tree_lock;
413         unsigned int                    node_lock_count;
414         rbtdb_nodelock_t *              node_locks;
415         dns_rbtnode_t *                 origin_node;
416         dns_stats_t *                   rrsetstats; /* cache DB only */
417         /* Locked by lock. */
418         unsigned int                    active;
419         isc_refcount_t                  references;
420         unsigned int                    attributes;
421         rbtdb_serial_t                  current_serial;
422         rbtdb_serial_t                  least_serial;
423         rbtdb_serial_t                  next_serial;
424         rbtdb_version_t *               current_version;
425         rbtdb_version_t *               future_version;
426         rbtdb_versionlist_t             open_versions;
427         isc_boolean_t                   overmem;
428         isc_task_t *                    task;
429         dns_dbnode_t                    *soanode;
430         dns_dbnode_t                    *nsnode;
431
432         /*
433          * This is a linked list used to implement the LRU cache.  There will
434          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
435          * placed on the linked list rdatasets[1].
436          */
437         rdatasetheaderlist_t            *rdatasets;
438
439         /*%
440          * Temporary storage for stale cache nodes and dynamically deleted
441          * nodes that await being cleaned up.
442          */
443         rbtnodelist_t                   *deadnodes;
444
445         /*
446          * Heaps.  Each of these is used for TTL based expiry.
447          */
448         isc_heap_t                      **heaps;
449
450         /* Locked by tree_lock. */
451         dns_rbt_t *                     tree;
452         dns_rbt_t *                     nsec3;
453
454         /* Unlocked */
455         unsigned int                    quantum;
456 } dns_rbtdb_t;
457
458 #define RBTDB_ATTR_LOADED               0x01
459 #define RBTDB_ATTR_LOADING              0x02
460
461 /*%
462  * Search Context
463  */
464 typedef struct {
465         dns_rbtdb_t *           rbtdb;
466         rbtdb_version_t *       rbtversion;
467         rbtdb_serial_t          serial;
468         unsigned int            options;
469         dns_rbtnodechain_t      chain;
470         isc_boolean_t           copy_name;
471         isc_boolean_t           need_cleanup;
472         isc_boolean_t           wild;
473         dns_rbtnode_t *         zonecut;
474         rdatasetheader_t *      zonecut_rdataset;
475         rdatasetheader_t *      zonecut_sigrdataset;
476         dns_fixedname_t         zonecut_name;
477         isc_stdtime_t           now;
478 } rbtdb_search_t;
479
480 /*%
481  * Load Context
482  */
483 typedef struct {
484         dns_rbtdb_t *           rbtdb;
485         isc_stdtime_t           now;
486 } rbtdb_load_t;
487
488 static void rdataset_disassociate(dns_rdataset_t *rdataset);
489 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
490 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
491 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
492 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
493 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
494 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
495                                         dns_name_t *name,
496                                         dns_rdataset_t *neg,
497                                         dns_rdataset_t *negsig);
498 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
499                                         dns_name_t *name,
500                                         dns_rdataset_t *neg,
501                                         dns_rdataset_t *negsig);
502 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
503                                            dns_rdatasetadditional_t type,
504                                            dns_rdatatype_t qtype,
505                                            dns_acache_t *acache,
506                                            dns_zone_t **zonep,
507                                            dns_db_t **dbp,
508                                            dns_dbversion_t **versionp,
509                                            dns_dbnode_t **nodep,
510                                            dns_name_t *fname,
511                                            dns_message_t *msg,
512                                            isc_stdtime_t now);
513 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
514                                            dns_rdatasetadditional_t type,
515                                            dns_rdatatype_t qtype,
516                                            dns_acache_t *acache,
517                                            dns_zone_t *zone,
518                                            dns_db_t *db,
519                                            dns_dbversion_t *version,
520                                            dns_dbnode_t *node,
521                                            dns_name_t *fname);
522 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
523                                            dns_rdataset_t *rdataset,
524                                            dns_rdatasetadditional_t type,
525                                            dns_rdatatype_t qtype);
526 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
527                                               isc_stdtime_t now);
528 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
529                           isc_stdtime_t now);
530 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
531                           isc_boolean_t tree_locked);
532 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
533                           isc_stdtime_t now, isc_boolean_t tree_locked);
534 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
535                                   rdatasetheader_t *newheader);
536 static void prune_tree(isc_task_t *task, isc_event_t *event);
537
538 static dns_rdatasetmethods_t rdataset_methods = {
539         rdataset_disassociate,
540         rdataset_first,
541         rdataset_next,
542         rdataset_current,
543         rdataset_clone,
544         rdataset_count,
545         NULL,
546         rdataset_getnoqname,
547         NULL,
548         rdataset_getclosest,
549         rdataset_getadditional,
550         rdataset_setadditional,
551         rdataset_putadditional
552 };
553
554 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
555 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
556 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
557 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
558                                  dns_rdataset_t *rdataset);
559
560 static dns_rdatasetitermethods_t rdatasetiter_methods = {
561         rdatasetiter_destroy,
562         rdatasetiter_first,
563         rdatasetiter_next,
564         rdatasetiter_current
565 };
566
567 typedef struct rbtdb_rdatasetiter {
568         dns_rdatasetiter_t              common;
569         rdatasetheader_t *              current;
570 } rbtdb_rdatasetiter_t;
571
572 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
573 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
574 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
575 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
576                                         dns_name_t *name);
577 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
578 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
579 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
580                                            dns_dbnode_t **nodep,
581                                            dns_name_t *name);
582 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
583 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
584                                           dns_name_t *name);
585
586 static dns_dbiteratormethods_t dbiterator_methods = {
587         dbiterator_destroy,
588         dbiterator_first,
589         dbiterator_last,
590         dbiterator_seek,
591         dbiterator_prev,
592         dbiterator_next,
593         dbiterator_current,
594         dbiterator_pause,
595         dbiterator_origin
596 };
597
598 #define DELETION_BATCH_MAX 64
599
600 /*
601  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
602  */
603 typedef struct rbtdb_dbiterator {
604         dns_dbiterator_t                common;
605         isc_boolean_t                   paused;
606         isc_boolean_t                   new_origin;
607         isc_rwlocktype_t                tree_locked;
608         isc_result_t                    result;
609         dns_fixedname_t                 name;
610         dns_fixedname_t                 origin;
611         dns_rbtnodechain_t              chain;
612         dns_rbtnodechain_t              nsec3chain;
613         dns_rbtnodechain_t              *current;
614         dns_rbtnode_t                   *node;
615         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
616         int                             delete;
617         isc_boolean_t                   nsec3only;
618         isc_boolean_t                   nonsec3;
619 } rbtdb_dbiterator_t;
620
621
622 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
623 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
624
625 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
626                        isc_event_t *event);
627 static void overmem(dns_db_t *db, isc_boolean_t overmem);
628 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
629                                isc_boolean_t *nsec3createflag);
630
631 /*%
632  * 'init_count' is used to initialize 'newheader->count' which inturn
633  * is used to determine where in the cycle rrset-order cyclic starts.
634  * We don't lock this as we don't care about simultaneous updates.
635  *
636  * Note:
637  *      Both init_count and header->count can be ISC_UINT32_MAX.
638  *      The count on the returned rdataset however can't be as
639  *      that indicates that the database does not implement cyclic
640  *      processing.
641  */
642 static unsigned int init_count;
643
644 /*
645  * Locking
646  *
647  * If a routine is going to lock more than one lock in this module, then
648  * the locking must be done in the following order:
649  *
650  *      Tree Lock
651  *
652  *      Node Lock       (Only one from the set may be locked at one time by
653  *                       any caller)
654  *
655  *      Database Lock
656  *
657  * Failure to follow this hierarchy can result in deadlock.
658  */
659
660 /*
661  * Deleting Nodes
662  *
663  * For zone databases the node for the origin of the zone MUST NOT be deleted.
664  */
665
666
667 /*
668  * DB Routines
669  */
670
671 static void
672 attach(dns_db_t *source, dns_db_t **targetp) {
673         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
674
675         REQUIRE(VALID_RBTDB(rbtdb));
676
677         isc_refcount_increment(&rbtdb->references, NULL);
678
679         *targetp = source;
680 }
681
682 static void
683 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
684         dns_rbtdb_t *rbtdb = event->ev_arg;
685
686         UNUSED(task);
687
688         free_rbtdb(rbtdb, ISC_TRUE, event);
689 }
690
691 static void
692 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
693                   isc_boolean_t increment)
694 {
695         dns_rdatastatstype_t statattributes = 0;
696         dns_rdatastatstype_t base = 0;
697         dns_rdatastatstype_t type;
698
699         /* At the moment we count statistics only for cache DB */
700         INSIST(IS_CACHE(rbtdb));
701
702         if (NXDOMAIN(header))
703                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
704         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
705                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
706                 base = RBTDB_RDATATYPE_EXT(header->type);
707         } else
708                 base = RBTDB_RDATATYPE_BASE(header->type);
709
710         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
711         if (increment)
712                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
713         else
714                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
715 }
716
717 static void
718 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
719         int idx;
720         isc_heap_t *heap;
721         dns_ttl_t oldttl;
722
723         oldttl = header->rdh_ttl;
724         header->rdh_ttl = newttl;
725
726         if (!IS_CACHE(rbtdb))
727                 return;
728
729         /*
730          * It's possible the rbtdb is not a cache.  If this is the case,
731          * we will not have a heap, and we move on.  If we do, though,
732          * we might need to adjust things.
733          */
734         if (header->heap_index == 0 || newttl == oldttl)
735                 return;
736         idx = header->node->locknum;
737         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
738             return;
739         heap = rbtdb->heaps[idx];
740
741         if (newttl < oldttl)
742                 isc_heap_increased(heap, header->heap_index);
743         else
744                 isc_heap_decreased(heap, header->heap_index);
745 }
746
747 /*%
748  * These functions allow the heap code to rank the priority of each
749  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
750  */
751 static isc_boolean_t
752 ttl_sooner(void *v1, void *v2) {
753         rdatasetheader_t *h1 = v1;
754         rdatasetheader_t *h2 = v2;
755
756         if (h1->rdh_ttl < h2->rdh_ttl)
757                 return (ISC_TRUE);
758         return (ISC_FALSE);
759 }
760
761 static isc_boolean_t
762 resign_sooner(void *v1, void *v2) {
763         rdatasetheader_t *h1 = v1;
764         rdatasetheader_t *h2 = v2;
765
766         if (h1->resign < h2->resign)
767                 return (ISC_TRUE);
768         return (ISC_FALSE);
769 }
770
771 /*%
772  * This function sets the heap index into the header.
773  */
774 static void
775 set_index(void *what, unsigned int index) {
776         rdatasetheader_t *h = what;
777
778         h->heap_index = index;
779 }
780
781 /*%
782  * Work out how many nodes can be deleted in the time between two
783  * requests to the nameserver.  Smooth the resulting number and use it
784  * as a estimate for the number of nodes to be deleted in the next
785  * iteration.
786  */
787 static unsigned int
788 adjust_quantum(unsigned int old, isc_time_t *start) {
789         unsigned int pps = dns_pps;     /* packets per second */
790         unsigned int interval;
791         isc_uint64_t usecs;
792         isc_time_t end;
793         unsigned int new;
794
795         if (pps < 100)
796                 pps = 100;
797         isc_time_now(&end);
798
799         interval = 1000000 / pps;       /* interval in usec */
800         if (interval == 0)
801                 interval = 1;
802         usecs = isc_time_microdiff(&end, start);
803         if (usecs == 0) {
804                 /*
805                  * We were unable to measure the amount of time taken.
806                  * Double the nodes deleted next time.
807                  */
808                 old *= 2;
809                 if (old > 1000)
810                         old = 1000;
811                 return (old);
812         }
813         new = old * interval;
814         new /= (unsigned int)usecs;
815         if (new == 0)
816                 new = 1;
817         else if (new > 1000)
818                 new = 1000;
819
820         /* Smooth */
821         new = (new + old * 3) / 4;
822
823         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
824                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
825
826         return (new);
827 }
828
829 static void
830 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
831         unsigned int i;
832         isc_ondestroy_t ondest;
833         isc_result_t result;
834         char buf[DNS_NAME_FORMATSIZE];
835         isc_time_t start;
836
837         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
838                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
839
840         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
841         REQUIRE(rbtdb->future_version == NULL);
842
843         if (rbtdb->current_version != NULL) {
844                 unsigned int refs;
845
846                 isc_refcount_decrement(&rbtdb->current_version->references,
847                                        &refs);
848                 INSIST(refs == 0);
849                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
850                 isc_refcount_destroy(&rbtdb->current_version->references);
851                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
852                             sizeof(rbtdb_version_t));
853         }
854
855         /*
856          * We assume the number of remaining dead nodes is reasonably small;
857          * the overhead of unlinking all nodes here should be negligible.
858          */
859         for (i = 0; i < rbtdb->node_lock_count; i++) {
860                 dns_rbtnode_t *node;
861
862                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
863                 while (node != NULL) {
864                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
865                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
866                 }
867         }
868
869         if (event == NULL)
870                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
871  again:
872         if (rbtdb->tree != NULL) {
873                 isc_time_now(&start);
874                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
875                 if (result == ISC_R_QUOTA) {
876                         INSIST(rbtdb->task != NULL);
877                         if (rbtdb->quantum != 0)
878                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
879                                                                 &start);
880                         if (event == NULL)
881                                 event = isc_event_allocate(rbtdb->common.mctx,
882                                                            NULL,
883                                                          DNS_EVENT_FREESTORAGE,
884                                                            free_rbtdb_callback,
885                                                            rbtdb,
886                                                            sizeof(isc_event_t));
887                         if (event == NULL)
888                                 goto again;
889                         isc_task_send(rbtdb->task, &event);
890                         return;
891                 }
892                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
893         }
894
895         if (rbtdb->nsec3 != NULL) {
896                 isc_time_now(&start);
897                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
898                 if (result == ISC_R_QUOTA) {
899                         INSIST(rbtdb->task != NULL);
900                         if (rbtdb->quantum != 0)
901                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
902                                                                 &start);
903                         if (event == NULL)
904                                 event = isc_event_allocate(rbtdb->common.mctx,
905                                                            NULL,
906                                                          DNS_EVENT_FREESTORAGE,
907                                                            free_rbtdb_callback,
908                                                            rbtdb,
909                                                            sizeof(isc_event_t));
910                         if (event == NULL)
911                                 goto again;
912                         isc_task_send(rbtdb->task, &event);
913                         return;
914                 }
915                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
916         }
917
918         if (event != NULL)
919                 isc_event_free(&event);
920         if (log) {
921                 if (dns_name_dynamic(&rbtdb->common.origin))
922                         dns_name_format(&rbtdb->common.origin, buf,
923                                         sizeof(buf));
924                 else
925                         strcpy(buf, "<UNKNOWN>");
926                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
927                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
928                               "done free_rbtdb(%s)", buf);
929         }
930         if (dns_name_dynamic(&rbtdb->common.origin))
931                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
932         for (i = 0; i < rbtdb->node_lock_count; i++) {
933                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
934                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
935         }
936
937         /*
938          * Clean up LRU / re-signing order lists.
939          */
940         if (rbtdb->rdatasets != NULL) {
941                 for (i = 0; i < rbtdb->node_lock_count; i++)
942                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
943                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
944                             rbtdb->node_lock_count *
945                             sizeof(rdatasetheaderlist_t));
946         }
947         /*
948          * Clean up dead node buckets.
949          */
950         if (rbtdb->deadnodes != NULL) {
951                 for (i = 0; i < rbtdb->node_lock_count; i++)
952                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
953                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
954                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
955         }
956         /*
957          * Clean up heap objects.
958          */
959         if (rbtdb->heaps != NULL) {
960                 for (i = 0; i < rbtdb->node_lock_count; i++)
961                         isc_heap_destroy(&rbtdb->heaps[i]);
962                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
963                             rbtdb->node_lock_count *
964                             sizeof(isc_heap_t *));
965         }
966
967         if (rbtdb->rrsetstats != NULL)
968                 dns_stats_detach(&rbtdb->rrsetstats);
969
970         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
971                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
972         isc_rwlock_destroy(&rbtdb->tree_lock);
973         isc_refcount_destroy(&rbtdb->references);
974         if (rbtdb->task != NULL)
975                 isc_task_detach(&rbtdb->task);
976
977         RBTDB_DESTROYLOCK(&rbtdb->lock);
978         rbtdb->common.magic = 0;
979         rbtdb->common.impmagic = 0;
980         ondest = rbtdb->common.ondest;
981         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
982         isc_ondestroy_notify(&ondest, rbtdb);
983 }
984
985 static inline void
986 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
987         isc_boolean_t want_free = ISC_FALSE;
988         unsigned int i;
989         unsigned int inactive = 0;
990
991         /* XXX check for open versions here */
992
993         if (rbtdb->soanode != NULL)
994                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
995         if (rbtdb->nsnode != NULL)
996                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
997
998         /*
999          * Even though there are no external direct references, there still
1000          * may be nodes in use.
1001          */
1002         for (i = 0; i < rbtdb->node_lock_count; i++) {
1003                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1004                 rbtdb->node_locks[i].exiting = ISC_TRUE;
1005                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1006                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1007                     == 0) {
1008                         inactive++;
1009                 }
1010         }
1011
1012         if (inactive != 0) {
1013                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1014                 rbtdb->active -= inactive;
1015                 if (rbtdb->active == 0)
1016                         want_free = ISC_TRUE;
1017                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1018                 if (want_free) {
1019                         char buf[DNS_NAME_FORMATSIZE];
1020                         if (dns_name_dynamic(&rbtdb->common.origin))
1021                                 dns_name_format(&rbtdb->common.origin, buf,
1022                                                 sizeof(buf));
1023                         else
1024                                 strcpy(buf, "<UNKNOWN>");
1025                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1026                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1027                                       "calling free_rbtdb(%s)", buf);
1028                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1029                 }
1030         }
1031 }
1032
1033 static void
1034 detach(dns_db_t **dbp) {
1035         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1036         unsigned int refs;
1037
1038         REQUIRE(VALID_RBTDB(rbtdb));
1039
1040         isc_refcount_decrement(&rbtdb->references, &refs);
1041
1042         if (refs == 0)
1043                 maybe_free_rbtdb(rbtdb);
1044
1045         *dbp = NULL;
1046 }
1047
1048 static void
1049 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1050         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1051         rbtdb_version_t *version;
1052         unsigned int refs;
1053
1054         REQUIRE(VALID_RBTDB(rbtdb));
1055
1056         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1057         version = rbtdb->current_version;
1058         isc_refcount_increment(&version->references, &refs);
1059         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1060
1061         *versionp = (dns_dbversion_t *)version;
1062 }
1063
1064 static inline rbtdb_version_t *
1065 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1066                  unsigned int references, isc_boolean_t writer)
1067 {
1068         isc_result_t result;
1069         rbtdb_version_t *version;
1070
1071         version = isc_mem_get(mctx, sizeof(*version));
1072         if (version == NULL)
1073                 return (NULL);
1074         version->serial = serial;
1075         result = isc_refcount_init(&version->references, references);
1076         if (result != ISC_R_SUCCESS) {
1077                 isc_mem_put(mctx, version, sizeof(*version));
1078                 return (NULL);
1079         }
1080         version->writer = writer;
1081         version->commit_ok = ISC_FALSE;
1082         ISC_LIST_INIT(version->changed_list);
1083         ISC_LIST_INIT(version->resigned_list);
1084         ISC_LINK_INIT(version, link);
1085
1086         return (version);
1087 }
1088
1089 static isc_result_t
1090 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1091         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1092         rbtdb_version_t *version;
1093
1094         REQUIRE(VALID_RBTDB(rbtdb));
1095         REQUIRE(versionp != NULL && *versionp == NULL);
1096         REQUIRE(rbtdb->future_version == NULL);
1097
1098         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1099         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1100         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1101                                    ISC_TRUE);
1102         if (version != NULL) {
1103                 version->commit_ok = ISC_TRUE;
1104                 version->secure = rbtdb->current_version->secure;
1105                 version->havensec3 = rbtdb->current_version->havensec3;
1106                 if (version->havensec3) {
1107                         version->flags = rbtdb->current_version->flags;
1108                         version->iterations =
1109                                 rbtdb->current_version->iterations;
1110                         version->hash = rbtdb->current_version->hash;
1111                         version->salt_length =
1112                                 rbtdb->current_version->salt_length;
1113                         memcpy(version->salt, rbtdb->current_version->salt,
1114                                version->salt_length);
1115                 } else {
1116                         version->flags = 0;
1117                         version->iterations = 0;
1118                         version->hash = 0;
1119                         version->salt_length = 0;
1120                         memset(version->salt, 0, sizeof(version->salt));
1121                 }
1122                 rbtdb->next_serial++;
1123                 rbtdb->future_version = version;
1124         }
1125         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1126
1127         if (version == NULL)
1128                 return (ISC_R_NOMEMORY);
1129
1130         *versionp = version;
1131
1132         return (ISC_R_SUCCESS);
1133 }
1134
1135 static void
1136 attachversion(dns_db_t *db, dns_dbversion_t *source,
1137               dns_dbversion_t **targetp)
1138 {
1139         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1140         rbtdb_version_t *rbtversion = source;
1141         unsigned int refs;
1142
1143         REQUIRE(VALID_RBTDB(rbtdb));
1144
1145         isc_refcount_increment(&rbtversion->references, &refs);
1146         INSIST(refs > 1);
1147
1148         *targetp = rbtversion;
1149 }
1150
1151 static rbtdb_changed_t *
1152 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1153             dns_rbtnode_t *node)
1154 {
1155         rbtdb_changed_t *changed;
1156         unsigned int refs;
1157
1158         /*
1159          * Caller must be holding the node lock if its reference must be
1160          * protected by the lock.
1161          */
1162
1163         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1164
1165         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1166
1167         REQUIRE(version->writer);
1168
1169         if (changed != NULL) {
1170                 dns_rbtnode_refincrement(node, &refs);
1171                 INSIST(refs != 0);
1172                 changed->node = node;
1173                 changed->dirty = ISC_FALSE;
1174                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1175         } else
1176                 version->commit_ok = ISC_FALSE;
1177
1178         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1179
1180         return (changed);
1181 }
1182
1183 static void
1184 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1185                  acachectl_t *array)
1186 {
1187         unsigned int count;
1188         unsigned int i;
1189         unsigned char *raw;     /* RDATASLAB */
1190
1191         /*
1192          * The caller must be holding the corresponding node lock.
1193          */
1194
1195         if (array == NULL)
1196                 return;
1197
1198         raw = (unsigned char *)header + sizeof(*header);
1199         count = raw[0] * 256 + raw[1];
1200
1201         /*
1202          * Sanity check: since an additional cache entry has a reference to
1203          * the original DB node (in the callback arg), there should be no
1204          * acache entries when the node can be freed.
1205          */
1206         for (i = 0; i < count; i++)
1207                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1208
1209         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1210 }
1211
1212 static inline void
1213 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1214
1215         if (dns_name_dynamic(&(*noqname)->name))
1216                 dns_name_free(&(*noqname)->name, mctx);
1217         if ((*noqname)->neg != NULL)
1218                 isc_mem_put(mctx, (*noqname)->neg,
1219                             dns_rdataslab_size((*noqname)->neg, 0));
1220         if ((*noqname)->negsig != NULL)
1221                 isc_mem_put(mctx, (*noqname)->negsig,
1222                             dns_rdataslab_size((*noqname)->negsig, 0));
1223         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1224         *noqname = NULL;
1225 }
1226
1227 static inline void
1228 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1229 {
1230         ISC_LINK_INIT(h, lru_link);
1231         h->heap_index = 0;
1232
1233 #if TRACE_HEADER
1234         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1235                 fprintf(stderr, "initialized header: %p\n", h);
1236 #else
1237         UNUSED(rbtdb);
1238 #endif
1239 }
1240
1241 static inline rdatasetheader_t *
1242 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1243 {
1244         rdatasetheader_t *h;
1245
1246         h = isc_mem_get(mctx, sizeof(*h));
1247         if (h == NULL)
1248                 return (NULL);
1249
1250 #if TRACE_HEADER
1251         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1252                 fprintf(stderr, "allocated header: %p\n", h);
1253 #endif
1254         init_rdataset(rbtdb, h);
1255         return (h);
1256 }
1257
1258 static inline void
1259 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1260 {
1261         unsigned int size;
1262         int idx;
1263
1264         if (EXISTS(rdataset) &&
1265             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1266                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1267         }
1268
1269         idx = rdataset->node->locknum;
1270         if (ISC_LINK_LINKED(rdataset, lru_link))
1271                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, lru_link);
1272         if (rdataset->heap_index != 0)
1273                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1274         rdataset->heap_index = 0;
1275
1276         if (rdataset->noqname != NULL)
1277                 free_noqname(mctx, &rdataset->noqname);
1278         if (rdataset->closest != NULL)
1279                 free_noqname(mctx, &rdataset->closest);
1280
1281         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1282         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1283
1284         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1285                 size = sizeof(*rdataset);
1286         else
1287                 size = dns_rdataslab_size((unsigned char *)rdataset,
1288                                           sizeof(*rdataset));
1289         isc_mem_put(mctx, rdataset, size);
1290 }
1291
1292 static inline void
1293 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1294         rdatasetheader_t *header, *dcurrent;
1295         isc_boolean_t make_dirty = ISC_FALSE;
1296
1297         /*
1298          * Caller must hold the node lock.
1299          */
1300
1301         /*
1302          * We set the IGNORE attribute on rdatasets with serial number
1303          * 'serial'.  When the reference count goes to zero, these rdatasets
1304          * will be cleaned up; until that time, they will be ignored.
1305          */
1306         for (header = node->data; header != NULL; header = header->next) {
1307                 if (header->serial == serial) {
1308                         header->attributes |= RDATASET_ATTR_IGNORE;
1309                         make_dirty = ISC_TRUE;
1310                 }
1311                 for (dcurrent = header->down;
1312                      dcurrent != NULL;
1313                      dcurrent = dcurrent->down) {
1314                         if (dcurrent->serial == serial) {
1315                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1316                                 make_dirty = ISC_TRUE;
1317                         }
1318                 }
1319         }
1320         if (make_dirty)
1321                 node->dirty = 1;
1322 }
1323
1324 static inline void
1325 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1326 {
1327         rdatasetheader_t *d, *down_next;
1328
1329         for (d = top->down; d != NULL; d = down_next) {
1330                 down_next = d->down;
1331                 free_rdataset(rbtdb, mctx, d);
1332         }
1333         top->down = NULL;
1334 }
1335
1336 static inline void
1337 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1338         rdatasetheader_t *current, *top_prev, *top_next;
1339         isc_mem_t *mctx = rbtdb->common.mctx;
1340
1341         /*
1342          * Caller must be holding the node lock.
1343          */
1344
1345         top_prev = NULL;
1346         for (current = node->data; current != NULL; current = top_next) {
1347                 top_next = current->next;
1348                 clean_stale_headers(rbtdb, mctx, current);
1349                 /*
1350                  * If current is nonexistent or stale, we can clean it up.
1351                  */
1352                 if ((current->attributes &
1353                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1354                         if (top_prev != NULL)
1355                                 top_prev->next = current->next;
1356                         else
1357                                 node->data = current->next;
1358                         free_rdataset(rbtdb, mctx, current);
1359                 } else
1360                         top_prev = current;
1361         }
1362         node->dirty = 0;
1363 }
1364
1365 static inline void
1366 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1367                 rbtdb_serial_t least_serial)
1368 {
1369         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1370         rdatasetheader_t *top_prev, *top_next;
1371         isc_mem_t *mctx = rbtdb->common.mctx;
1372         isc_boolean_t still_dirty = ISC_FALSE;
1373
1374         /*
1375          * Caller must be holding the node lock.
1376          */
1377         REQUIRE(least_serial != 0);
1378
1379         top_prev = NULL;
1380         for (current = node->data; current != NULL; current = top_next) {
1381                 top_next = current->next;
1382
1383                 /*
1384                  * First, we clean up any instances of multiple rdatasets
1385                  * with the same serial number, or that have the IGNORE
1386                  * attribute.
1387                  */
1388                 dparent = current;
1389                 for (dcurrent = current->down;
1390                      dcurrent != NULL;
1391                      dcurrent = down_next) {
1392                         down_next = dcurrent->down;
1393                         INSIST(dcurrent->serial <= dparent->serial);
1394                         if (dcurrent->serial == dparent->serial ||
1395                             IGNORE(dcurrent)) {
1396                                 if (down_next != NULL)
1397                                         down_next->next = dparent;
1398                                 dparent->down = down_next;
1399                                 free_rdataset(rbtdb, mctx, dcurrent);
1400                         } else
1401                                 dparent = dcurrent;
1402                 }
1403
1404                 /*
1405                  * We've now eliminated all IGNORE datasets with the possible
1406                  * exception of current, which we now check.
1407                  */
1408                 if (IGNORE(current)) {
1409                         down_next = current->down;
1410                         if (down_next == NULL) {
1411                                 if (top_prev != NULL)
1412                                         top_prev->next = current->next;
1413                                 else
1414                                         node->data = current->next;
1415                                 free_rdataset(rbtdb, mctx, current);
1416                                 /*
1417                                  * current no longer exists, so we can
1418                                  * just continue with the loop.
1419                                  */
1420                                 continue;
1421                         } else {
1422                                 /*
1423                                  * Pull up current->down, making it the new
1424                                  * current.
1425                                  */
1426                                 if (top_prev != NULL)
1427                                         top_prev->next = down_next;
1428                                 else
1429                                         node->data = down_next;
1430                                 down_next->next = top_next;
1431                                 free_rdataset(rbtdb, mctx, current);
1432                                 current = down_next;
1433                         }
1434                 }
1435
1436                 /*
1437                  * We now try to find the first down node less than the
1438                  * least serial.
1439                  */
1440                 dparent = current;
1441                 for (dcurrent = current->down;
1442                      dcurrent != NULL;
1443                      dcurrent = down_next) {
1444                         down_next = dcurrent->down;
1445                         if (dcurrent->serial < least_serial)
1446                                 break;
1447                         dparent = dcurrent;
1448                 }
1449
1450                 /*
1451                  * If there is a such an rdataset, delete it and any older
1452                  * versions.
1453                  */
1454                 if (dcurrent != NULL) {
1455                         do {
1456                                 down_next = dcurrent->down;
1457                                 INSIST(dcurrent->serial <= least_serial);
1458                                 free_rdataset(rbtdb, mctx, dcurrent);
1459                                 dcurrent = down_next;
1460                         } while (dcurrent != NULL);
1461                         dparent->down = NULL;
1462                 }
1463
1464                 /*
1465                  * Note.  The serial number of 'current' might be less than
1466                  * least_serial too, but we cannot delete it because it is
1467                  * the most recent version, unless it is a NONEXISTENT
1468                  * rdataset.
1469                  */
1470                 if (current->down != NULL) {
1471                         still_dirty = ISC_TRUE;
1472                         top_prev = current;
1473                 } else {
1474                         /*
1475                          * If this is a NONEXISTENT rdataset, we can delete it.
1476                          */
1477                         if (NONEXISTENT(current)) {
1478                                 if (top_prev != NULL)
1479                                         top_prev->next = current->next;
1480                                 else
1481                                         node->data = current->next;
1482                                 free_rdataset(rbtdb, mctx, current);
1483                         } else
1484                                 top_prev = current;
1485                 }
1486         }
1487         if (!still_dirty)
1488                 node->dirty = 0;
1489 }
1490
1491 /*%
1492  * Clean up dead nodes.  These are nodes which have no references, and
1493  * have no data.  They are dead but we could not or chose not to delete
1494  * them when we deleted all the data at that node because we did not want
1495  * to wait for the tree write lock.
1496  *
1497  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1498  */
1499 static void
1500 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1501         dns_rbtnode_t *node;
1502         isc_result_t result;
1503         int count = 10;         /* XXXJT: should be adjustable */
1504
1505         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1506         while (node != NULL && count > 0) {
1507                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1508
1509                 /*
1510                  * Since we're holding a tree write lock, it should be
1511                  * impossible for this node to be referenced by others.
1512                  */
1513                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1514                        node->data == NULL);
1515
1516                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1517                 if (node->nsec3)
1518                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1519                                                     ISC_FALSE);
1520                 else
1521                         result = dns_rbt_deletenode(rbtdb->tree, node,
1522                                                     ISC_FALSE);
1523                 if (result != ISC_R_SUCCESS)
1524                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1525                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1526                                       "cleanup_dead_nodes: "
1527                                       "dns_rbt_deletenode: %s",
1528                                       isc_result_totext(result));
1529                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1530                 count--;
1531         }
1532 }
1533
1534 /*
1535  * Caller must be holding the node lock if its reference must be protected
1536  * by the lock.
1537  */
1538 static inline void
1539 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1540         unsigned int lockrefs, noderefs;
1541         isc_refcount_t *lockref;
1542
1543         dns_rbtnode_refincrement0(node, &noderefs);
1544         if (noderefs == 1) {    /* this is the first reference to the node */
1545                 lockref = &rbtdb->node_locks[node->locknum].references;
1546                 isc_refcount_increment0(lockref, &lockrefs);
1547                 INSIST(lockrefs != 0);
1548         }
1549         INSIST(noderefs != 0);
1550 }
1551
1552 /*
1553  * This function is assumed to be called when a node is newly referenced
1554  * and can be in the deadnode list.  In that case the node must be retrieved
1555  * from the list because it is going to be used.  In addition, if the caller
1556  * happens to hold a write lock on the tree, it's a good chance to purge dead
1557  * nodes.
1558  * Note: while a new reference is gained in multiple places, there are only very
1559  * few cases where the node can be in the deadnode list (only empty nodes can
1560  * have been added to the list).
1561  */
1562 static inline void
1563 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1564                 isc_rwlocktype_t treelocktype)
1565 {
1566         isc_boolean_t need_relock = ISC_FALSE;
1567
1568         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1569         new_reference(rbtdb, node);
1570
1571         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1572                       isc_rwlocktype_read);
1573         if (ISC_LINK_LINKED(node, deadlink))
1574                 need_relock = ISC_TRUE;
1575         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1576                  treelocktype == isc_rwlocktype_write)
1577                 need_relock = ISC_TRUE;
1578         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1579                         isc_rwlocktype_read);
1580         if (need_relock) {
1581                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1582                               isc_rwlocktype_write);
1583                 if (ISC_LINK_LINKED(node, deadlink))
1584                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1585                                         node, deadlink);
1586                 if (treelocktype == isc_rwlocktype_write)
1587                         cleanup_dead_nodes(rbtdb, node->locknum);
1588                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1589                                 isc_rwlocktype_write);
1590         }
1591
1592         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1593 }
1594
1595 /*
1596  * Caller must be holding the node lock; either the "strong", read or write
1597  * lock.  Note that the lock must be held even when node references are
1598  * atomically modified; in that case the decrement operation itself does not
1599  * have to be protected, but we must avoid a race condition where multiple
1600  * threads are decreasing the reference to zero simultaneously and at least
1601  * one of them is going to free the node.
1602  * This function returns ISC_TRUE if and only if the node reference decreases
1603  * to zero.
1604  */
1605 static isc_boolean_t
1606 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1607                     rbtdb_serial_t least_serial,
1608                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1609                     isc_boolean_t pruning)
1610 {
1611         isc_result_t result;
1612         isc_boolean_t write_locked;
1613         rbtdb_nodelock_t *nodelock;
1614         unsigned int refs, nrefs;
1615         int bucket = node->locknum;
1616         isc_boolean_t no_reference;
1617
1618         nodelock = &rbtdb->node_locks[bucket];
1619
1620         /* Handle easy and typical case first. */
1621         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1622                 dns_rbtnode_refdecrement(node, &nrefs);
1623                 INSIST((int)nrefs >= 0);
1624                 if (nrefs == 0) {
1625                         isc_refcount_decrement(&nodelock->references, &refs);
1626                         INSIST((int)refs >= 0);
1627                 }
1628                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1629         }
1630
1631         /* Upgrade the lock? */
1632         if (nlock == isc_rwlocktype_read) {
1633                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1634                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1635         }
1636         dns_rbtnode_refdecrement(node, &nrefs);
1637         INSIST((int)nrefs >= 0);
1638         if (nrefs > 0) {
1639                 /* Restore the lock? */
1640                 if (nlock == isc_rwlocktype_read)
1641                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1642                 return (ISC_FALSE);
1643         }
1644
1645         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1646                 if (IS_CACHE(rbtdb))
1647                         clean_cache_node(rbtdb, node);
1648                 else {
1649                         if (least_serial == 0) {
1650                                 /*
1651                                  * Caller doesn't know the least serial.
1652                                  * Get it.
1653                                  */
1654                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1655                                 least_serial = rbtdb->least_serial;
1656                                 RBTDB_UNLOCK(&rbtdb->lock,
1657                                              isc_rwlocktype_read);
1658                         }
1659                         clean_zone_node(rbtdb, node, least_serial);
1660                 }
1661         }
1662
1663         isc_refcount_decrement(&nodelock->references, &refs);
1664         INSIST((int)refs >= 0);
1665
1666         /*
1667          * XXXDCL should this only be done for cache zones?
1668          */
1669         if (node->data != NULL || node->down != NULL) {
1670                 /* Restore the lock? */
1671                 if (nlock == isc_rwlocktype_read)
1672                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1673                 return (ISC_TRUE);
1674         }
1675
1676         /*
1677          * Attempt to switch to a write lock on the tree.  If this fails,
1678          * we will add this node to a linked list of nodes in this locking
1679          * bucket which we will free later.
1680          */
1681         if (tlock != isc_rwlocktype_write) {
1682                 /*
1683                  * Locking hierarchy notwithstanding, we don't need to free
1684                  * the node lock before acquiring the tree write lock because
1685                  * we only do a trylock.
1686                  */
1687                 if (tlock == isc_rwlocktype_read)
1688                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1689                 else
1690                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1691                                                     isc_rwlocktype_write);
1692                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1693                               result == ISC_R_LOCKBUSY);
1694
1695                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1696         } else
1697                 write_locked = ISC_TRUE;
1698
1699         no_reference = ISC_TRUE;
1700         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1701                 /*
1702                  * We can now delete the node if the reference counter is
1703                  * zero.  This should be typically the case, but a different
1704                  * thread may still gain a (new) reference just before the
1705                  * current thread locks the tree (e.g., in findnode()).
1706                  */
1707
1708                 /*
1709                  * If this node is the only one in the level it's in, deleting
1710                  * this node may recursively make its parent the only node in
1711                  * the parent level; if so, and if no one is currently using
1712                  * the parent node, this is almost the only opportunity to
1713                  * clean it up.  But the recursive cleanup is not that trivial
1714                  * since the child and parent may be in different lock buckets,
1715                  * which would cause a lock order reversal problem.  To avoid
1716                  * the trouble, we'll dispatch a separate event for batch
1717                  * cleaning.  We need to check whether we're deleting the node
1718                  * as a result of pruning to avoid infinite dispatching.
1719                  * Note: pruning happens only when a task has been set for the
1720                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1721                  * it's their responsibility to purge stale leaves (e.g. by
1722                  * periodic walk-through).
1723                  */
1724                 if (!pruning && node->parent != NULL &&
1725                     node->parent->down == node && node->left == NULL &&
1726                     node->right == NULL && rbtdb->task != NULL) {
1727                         isc_event_t *ev;
1728                         dns_db_t *db;
1729
1730                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1731                                                 DNS_EVENT_RBTPRUNE,
1732                                                 prune_tree, node,
1733                                                 sizeof(isc_event_t));
1734                         if (ev != NULL) {
1735                                 new_reference(rbtdb, node);
1736                                 db = NULL;
1737                                 attach((dns_db_t *)rbtdb, &db);
1738                                 ev->ev_sender = db;
1739                                 isc_task_send(rbtdb->task, &ev);
1740                                 no_reference = ISC_FALSE;
1741                         } else {
1742                                 /*
1743                                  * XXX: this is a weird situation.  We could
1744                                  * ignore this error case, but then the stale
1745                                  * node will unlikely be purged except via a
1746                                  * rare condition such as manual cleanup.  So
1747                                  * we queue it in the deadnodes list, hoping
1748                                  * the memory shortage is temporary and the node
1749                                  * will be deleted later.
1750                                  */
1751                                 isc_log_write(dns_lctx,
1752                                               DNS_LOGCATEGORY_DATABASE,
1753                                               DNS_LOGMODULE_CACHE,
1754                                               ISC_LOG_INFO,
1755                                               "decrement_reference: failed to "
1756                                               "allocate pruning event");
1757                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1758                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1759                                                 deadlink);
1760                         }
1761                 } else {
1762                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1763                                 char printname[DNS_NAME_FORMATSIZE];
1764
1765                                 isc_log_write(dns_lctx,
1766                                               DNS_LOGCATEGORY_DATABASE,
1767                                               DNS_LOGMODULE_CACHE,
1768                                               ISC_LOG_DEBUG(1),
1769                                               "decrement_reference: "
1770                                               "delete from rbt: %p %s",
1771                                               node,
1772                                               dns_rbt_formatnodename(node,
1773                                                         printname,
1774                                                         sizeof(printname)));
1775                         }
1776
1777                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1778                         if (node->nsec3)
1779                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1780                                                             ISC_FALSE);
1781                         else
1782                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1783                                                             ISC_FALSE);
1784                         if (result != ISC_R_SUCCESS) {
1785                                 isc_log_write(dns_lctx,
1786                                               DNS_LOGCATEGORY_DATABASE,
1787                                               DNS_LOGMODULE_CACHE,
1788                                               ISC_LOG_WARNING,
1789                                               "decrement_reference: "
1790                                               "dns_rbt_deletenode: %s",
1791                                               isc_result_totext(result));
1792                         }
1793                 }
1794         } else if (dns_rbtnode_refcurrent(node) == 0) {
1795                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1796                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1797         } else
1798                 no_reference = ISC_FALSE;
1799
1800         /* Restore the lock? */
1801         if (nlock == isc_rwlocktype_read)
1802                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1803
1804         /*
1805          * Relock a read lock, or unlock the write lock if no lock was held.
1806          */
1807         if (tlock == isc_rwlocktype_none)
1808                 if (write_locked)
1809                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1810
1811         if (tlock == isc_rwlocktype_read)
1812                 if (write_locked)
1813                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1814
1815         return (no_reference);
1816 }
1817
1818 /*
1819  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1820  * case, the number of iteration is the number of tree levels, which is at
1821  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1822  * should be much smaller (only a few times), and even the worst case would be
1823  * acceptable for a single event.
1824  */
1825 static void
1826 prune_tree(isc_task_t *task, isc_event_t *event) {
1827         dns_rbtdb_t *rbtdb = event->ev_sender;
1828         dns_rbtnode_t *node = event->ev_arg;
1829         dns_rbtnode_t *parent;
1830         unsigned int locknum;
1831
1832         UNUSED(task);
1833
1834         isc_event_free(&event);
1835
1836         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1837         locknum = node->locknum;
1838         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1839         do {
1840                 parent = node->parent;
1841                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1842                                     isc_rwlocktype_write, ISC_TRUE);
1843
1844                 if (parent != NULL && parent->down == NULL) {
1845                         /*
1846                          * node was the only down child of the parent and has
1847                          * just been removed.  We'll then need to examine the
1848                          * parent.  Keep the lock if possible; otherwise,
1849                          * release the old lock and acquire one for the parent.
1850                          */
1851                         if (parent->locknum != locknum) {
1852                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1853                                             isc_rwlocktype_write);
1854                                 locknum = parent->locknum;
1855                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1856                                           isc_rwlocktype_write);
1857                         }
1858
1859                         /*
1860                          * We need to gain a reference to the node before
1861                          * decrementing it in the next iteration.  In addition,
1862                          * if the node is in the dead-nodes list, extract it
1863                          * from the list beforehand as we do in
1864                          * reactivate_node().
1865                          */
1866                         new_reference(rbtdb, parent);
1867                         if (ISC_LINK_LINKED(parent, deadlink)) {
1868                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1869                                                 parent, deadlink);
1870                         }
1871                 } else
1872                         parent = NULL;
1873
1874                 node = parent;
1875         } while (node != NULL);
1876         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1877         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1878
1879         detach((dns_db_t **)&rbtdb);
1880 }
1881
1882 static inline void
1883 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1884                    rbtdb_changedlist_t *cleanup_list)
1885 {
1886         /*
1887          * Caller must be holding the database lock.
1888          */
1889
1890         rbtdb->least_serial = version->serial;
1891         *cleanup_list = version->changed_list;
1892         ISC_LIST_INIT(version->changed_list);
1893 }
1894
1895 static inline void
1896 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1897         rbtdb_changed_t *changed, *next_changed;
1898
1899         /*
1900          * If the changed record is dirty, then
1901          * an update created multiple versions of
1902          * a given rdataset.  We keep this list
1903          * until we're the least open version, at
1904          * which point it's safe to get rid of any
1905          * older versions.
1906          *
1907          * If the changed record isn't dirty, then
1908          * we don't need it anymore since we're
1909          * committing and not rolling back.
1910          *
1911          * The caller must be holding the database lock.
1912          */
1913         for (changed = HEAD(version->changed_list);
1914              changed != NULL;
1915              changed = next_changed) {
1916                 next_changed = NEXT(changed, link);
1917                 if (!changed->dirty) {
1918                         UNLINK(version->changed_list,
1919                                changed, link);
1920                         APPEND(*cleanup_list,
1921                                changed, link);
1922                 }
1923         }
1924 }
1925
1926 static void
1927 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1928         dns_rdataset_t keyset;
1929         dns_rdataset_t nsecset, signsecset;
1930         dns_rdata_t rdata = DNS_RDATA_INIT;
1931         isc_boolean_t haszonekey = ISC_FALSE;
1932         isc_boolean_t hasnsec = ISC_FALSE;
1933         isc_boolean_t hasoptbit = ISC_FALSE;
1934         isc_boolean_t nsec3createflag = ISC_FALSE;
1935         isc_result_t result;
1936
1937         dns_rdataset_init(&keyset);
1938         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1939                                      0, 0, &keyset, NULL);
1940         if (result == ISC_R_SUCCESS) {
1941                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1942                 result = dns_rdataset_first(&keyset);
1943                 while (result == ISC_R_SUCCESS) {
1944                         dns_rdataset_current(&keyset, &keyrdata);
1945                         if (dns_zonekey_iszonekey(&keyrdata)) {
1946                                 haszonekey = ISC_TRUE;
1947                                 break;
1948                         }
1949                         result = dns_rdataset_next(&keyset);
1950                 }
1951                 dns_rdataset_disassociate(&keyset);
1952         }
1953         if (!haszonekey) {
1954                 version->secure = dns_db_insecure;
1955                 version->havensec3 = ISC_FALSE;
1956                 return;
1957         }
1958
1959         dns_rdataset_init(&nsecset);
1960         dns_rdataset_init(&signsecset);
1961         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1962                                      0, 0, &nsecset, &signsecset);
1963         if (result == ISC_R_SUCCESS) {
1964                 if (dns_rdataset_isassociated(&signsecset)) {
1965                         hasnsec = ISC_TRUE;
1966                         result = dns_rdataset_first(&nsecset);
1967                         if (result == ISC_R_SUCCESS) {
1968                                 dns_rdataset_current(&nsecset, &rdata);
1969                                 hasoptbit = dns_nsec_typepresent(&rdata,
1970                                                              dns_rdatatype_opt);
1971                         }
1972                         dns_rdataset_disassociate(&signsecset);
1973                 }
1974                 dns_rdataset_disassociate(&nsecset);
1975         }
1976
1977         setnsec3parameters(db, version, &nsec3createflag);
1978
1979         /*
1980          * Do we have a valid NSEC/NSEC3 chain?
1981          */
1982         if (version->havensec3 || (hasnsec && !hasoptbit))
1983                 version->secure = dns_db_secure;
1984         /*
1985          * Do we have a NSEC/NSEC3 chain under creation?
1986          */
1987         else if (hasoptbit || nsec3createflag)
1988                 version->secure = dns_db_partial;
1989         else
1990                 version->secure = dns_db_insecure;
1991 }
1992
1993 /*%<
1994  * Walk the origin node looking for NSEC3PARAM records.
1995  * Cache the nsec3 parameters.
1996  */
1997 static void
1998 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1999                    isc_boolean_t *nsec3createflag)
2000 {
2001         dns_rbtnode_t *node;
2002         dns_rdata_nsec3param_t nsec3param;
2003         dns_rdata_t rdata = DNS_RDATA_INIT;
2004         isc_region_t region;
2005         isc_result_t result;
2006         rdatasetheader_t *header, *header_next;
2007         unsigned char *raw;             /* RDATASLAB */
2008         unsigned int count, length;
2009         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2010
2011         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2012         version->havensec3 = ISC_FALSE;
2013         node = rbtdb->origin_node;
2014         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2015                   isc_rwlocktype_read);
2016         for (header = node->data;
2017              header != NULL;
2018              header = header_next) {
2019                 header_next = header->next;
2020                 do {
2021                         if (header->serial <= version->serial &&
2022                             !IGNORE(header)) {
2023                                 if (NONEXISTENT(header))
2024                                         header = NULL;
2025                                 break;
2026                         } else
2027                                 header = header->down;
2028                 } while (header != NULL);
2029
2030                 if (header != NULL &&
2031                     header->type == dns_rdatatype_nsec3param) {
2032                         /*
2033                          * Find A NSEC3PARAM with a supported algorithm.
2034                          */
2035                         raw = (unsigned char *)header + sizeof(*header);
2036                         count = raw[0] * 256 + raw[1]; /* count */
2037 #if DNS_RDATASET_FIXED
2038                         raw += count * 4 + 2;
2039 #else
2040                         raw += 2;
2041 #endif
2042                         while (count-- > 0U) {
2043                                 length = raw[0] * 256 + raw[1];
2044 #if DNS_RDATASET_FIXED
2045                                 raw += 4;
2046 #else
2047                                 raw += 2;
2048 #endif
2049                                 region.base = raw;
2050                                 region.length = length;
2051                                 raw += length;
2052                                 dns_rdata_fromregion(&rdata,
2053                                                      rbtdb->common.rdclass,
2054                                                      dns_rdatatype_nsec3param,
2055                                                      &region);
2056                                 result = dns_rdata_tostruct(&rdata,
2057                                                             &nsec3param,
2058                                                             NULL);
2059                                 INSIST(result == ISC_R_SUCCESS);
2060                                 dns_rdata_reset(&rdata);
2061
2062                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2063                                     !dns_nsec3_supportedhash(nsec3param.hash))
2064                                         continue;
2065
2066 #ifdef RFC5155_STRICT
2067                                 if (nsec3param.flags != 0)
2068                                         continue;
2069 #else
2070                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2071                                     != 0)
2072                                         *nsec3createflag = ISC_TRUE;
2073                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2074                                     != 0)
2075                                         continue;
2076 #endif
2077
2078                                 INSIST(nsec3param.salt_length <=
2079                                        sizeof(version->salt));
2080                                 memcpy(version->salt, nsec3param.salt,
2081                                        nsec3param.salt_length);
2082                                 version->hash = nsec3param.hash;
2083                                 version->salt_length = nsec3param.salt_length;
2084                                 version->iterations = nsec3param.iterations;
2085                                 version->flags = nsec3param.flags;
2086                                 version->havensec3 = ISC_TRUE;
2087                                 /*
2088                                  * Look for a better algorithm than the
2089                                  * unknown test algorithm.
2090                                  */
2091                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2092                                         goto unlock;
2093                         }
2094                 }
2095         }
2096  unlock:
2097         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2098                     isc_rwlocktype_read);
2099         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2100 }
2101
2102 static void
2103 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2104         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2105         rbtdb_version_t *version, *cleanup_version, *least_greater;
2106         isc_boolean_t rollback = ISC_FALSE;
2107         rbtdb_changedlist_t cleanup_list;
2108         rdatasetheaderlist_t resigned_list;
2109         rbtdb_changed_t *changed, *next_changed;
2110         rbtdb_serial_t serial, least_serial;
2111         dns_rbtnode_t *rbtnode;
2112         unsigned int refs;
2113         rdatasetheader_t *header;
2114         isc_boolean_t writer;
2115
2116         REQUIRE(VALID_RBTDB(rbtdb));
2117         version = (rbtdb_version_t *)*versionp;
2118
2119         cleanup_version = NULL;
2120         ISC_LIST_INIT(cleanup_list);
2121         ISC_LIST_INIT(resigned_list);
2122
2123         isc_refcount_decrement(&version->references, &refs);
2124         if (refs > 0) {         /* typical and easy case first */
2125                 if (commit) {
2126                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2127                         INSIST(!version->writer);
2128                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2129                 }
2130                 goto end;
2131         }
2132
2133         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2134         serial = version->serial;
2135         writer = version->writer;
2136         if (version->writer) {
2137                 if (commit) {
2138                         unsigned cur_ref;
2139                         rbtdb_version_t *cur_version;
2140
2141                         INSIST(version->commit_ok);
2142                         INSIST(version == rbtdb->future_version);
2143                         /*
2144                          * The current version is going to be replaced.
2145                          * Release the (likely last) reference to it from the
2146                          * DB itself and unlink it from the open list.
2147                          */
2148                         cur_version = rbtdb->current_version;
2149                         isc_refcount_decrement(&cur_version->references,
2150                                                &cur_ref);
2151                         if (cur_ref == 0) {
2152                                 if (cur_version->serial == rbtdb->least_serial)
2153                                         INSIST(EMPTY(cur_version->changed_list));
2154                                 UNLINK(rbtdb->open_versions,
2155                                        cur_version, link);
2156                         }
2157                         if (EMPTY(rbtdb->open_versions)) {
2158                                 /*
2159                                  * We're going to become the least open
2160                                  * version.
2161                                  */
2162                                 make_least_version(rbtdb, version,
2163                                                    &cleanup_list);
2164                         } else {
2165                                 /*
2166                                  * Some other open version is the
2167                                  * least version.  We can't cleanup
2168                                  * records that were changed in this
2169                                  * version because the older versions
2170                                  * may still be in use by an open
2171                                  * version.
2172                                  *
2173                                  * We can, however, discard the
2174                                  * changed records for things that
2175                                  * we've added that didn't exist in
2176                                  * prior versions.
2177                                  */
2178                                 cleanup_nondirty(version, &cleanup_list);
2179                         }
2180                         /*
2181                          * If the (soon to be former) current version
2182                          * isn't being used by anyone, we can clean
2183                          * it up.
2184                          */
2185                         if (cur_ref == 0) {
2186                                 cleanup_version = cur_version;
2187                                 APPENDLIST(version->changed_list,
2188                                            cleanup_version->changed_list,
2189                                            link);
2190                         }
2191                         /*
2192                          * Become the current version.
2193                          */
2194                         version->writer = ISC_FALSE;
2195                         rbtdb->current_version = version;
2196                         rbtdb->current_serial = version->serial;
2197                         rbtdb->future_version = NULL;
2198
2199                         /*
2200                          * Keep the current version in the open list, and
2201                          * gain a reference for the DB itself (see the DB
2202                          * creation function below).  This must be the only
2203                          * case where we need to increment the counter from
2204                          * zero and need to use isc_refcount_increment0().
2205                          */
2206                         isc_refcount_increment0(&version->references,
2207                                                 &cur_ref);
2208                         INSIST(cur_ref == 1);
2209                         PREPEND(rbtdb->open_versions,
2210                                 rbtdb->current_version, link);
2211                         resigned_list = version->resigned_list;
2212                         ISC_LIST_INIT(version->resigned_list);
2213                 } else {
2214                         /*
2215                          * We're rolling back this transaction.
2216                          */
2217                         cleanup_list = version->changed_list;
2218                         ISC_LIST_INIT(version->changed_list);
2219                         resigned_list = version->resigned_list;
2220                         ISC_LIST_INIT(version->resigned_list);
2221                         rollback = ISC_TRUE;
2222                         cleanup_version = version;
2223                         rbtdb->future_version = NULL;
2224                 }
2225         } else {
2226                 if (version != rbtdb->current_version) {
2227                         /*
2228                          * There are no external or internal references
2229                          * to this version and it can be cleaned up.
2230                          */
2231                         cleanup_version = version;
2232
2233                         /*
2234                          * Find the version with the least serial
2235                          * number greater than ours.
2236                          */
2237                         least_greater = PREV(version, link);
2238                         if (least_greater == NULL)
2239                                 least_greater = rbtdb->current_version;
2240
2241                         INSIST(version->serial < least_greater->serial);
2242                         /*
2243                          * Is this the least open version?
2244                          */
2245                         if (version->serial == rbtdb->least_serial) {
2246                                 /*
2247                                  * Yes.  Install the new least open
2248                                  * version.
2249                                  */
2250                                 make_least_version(rbtdb,
2251                                                    least_greater,
2252                                                    &cleanup_list);
2253                         } else {
2254                                 /*
2255                                  * Add any unexecuted cleanups to
2256                                  * those of the least greater version.
2257                                  */
2258                                 APPENDLIST(least_greater->changed_list,
2259                                            version->changed_list,
2260                                            link);
2261                         }
2262                 } else if (version->serial == rbtdb->least_serial)
2263                         INSIST(EMPTY(version->changed_list));
2264                 UNLINK(rbtdb->open_versions, version, link);
2265         }
2266         least_serial = rbtdb->least_serial;
2267         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2268
2269         /*
2270          * Update the zone's secure status.
2271          */
2272         if (writer && commit && !IS_CACHE(rbtdb))
2273                 iszonesecure(db, version, rbtdb->origin_node);
2274
2275         if (cleanup_version != NULL) {
2276                 INSIST(EMPTY(cleanup_version->changed_list));
2277                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2278                             sizeof(*cleanup_version));
2279         }
2280
2281         /*
2282          * Commit/rollback re-signed headers.
2283          */
2284         for (header = HEAD(resigned_list);
2285              header != NULL;
2286              header = HEAD(resigned_list)) {
2287                 ISC_LIST_UNLINK(resigned_list, header, lru_link);
2288                 if (rollback) {
2289                         nodelock_t *lock;
2290                         lock = &rbtdb->node_locks[header->node->locknum].lock;
2291                         NODE_LOCK(lock, isc_rwlocktype_write);
2292                         resign_insert(rbtdb, header->node->locknum, header);
2293                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2294                 }
2295                 decrement_reference(rbtdb, header->node, least_serial,
2296                                     isc_rwlocktype_write, isc_rwlocktype_none,
2297                                     ISC_FALSE);
2298         }
2299
2300         if (!EMPTY(cleanup_list)) {
2301                 /*
2302                  * We acquire a tree write lock here in order to make sure
2303                  * that stale nodes will be removed in decrement_reference().
2304                  * If we didn't have the lock, those nodes could miss the
2305                  * chance to be removed until the server stops.  The write lock
2306                  * is expensive, but this event should be rare enough to justify
2307                  * the cost.
2308                  */
2309                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2310                 for (changed = HEAD(cleanup_list);
2311                      changed != NULL;
2312                      changed = next_changed) {
2313                         nodelock_t *lock;
2314
2315                         next_changed = NEXT(changed, link);
2316                         rbtnode = changed->node;
2317                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2318
2319                         NODE_LOCK(lock, isc_rwlocktype_write);
2320                         /*
2321                          * This is a good opportunity to purge any dead nodes,
2322                          * so use it.
2323                          */
2324                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2325
2326                         if (rollback)
2327                                 rollback_node(rbtnode, serial);
2328                         decrement_reference(rbtdb, rbtnode, least_serial,
2329                                             isc_rwlocktype_write,
2330                                             isc_rwlocktype_write, ISC_FALSE);
2331
2332                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2333
2334                         isc_mem_put(rbtdb->common.mctx, changed,
2335                                     sizeof(*changed));
2336                 }
2337                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2338         }
2339
2340  end:
2341         *versionp = NULL;
2342 }
2343
2344 /*
2345  * Add the necessary magic for the wildcard name 'name'
2346  * to be found in 'rbtdb'.
2347  *
2348  * In order for wildcard matching to work correctly in
2349  * zone_find(), we must ensure that a node for the wildcarding
2350  * level exists in the database, and has its 'find_callback'
2351  * and 'wild' bits set.
2352  *
2353  * E.g. if the wildcard name is "*.sub.example." then we
2354  * must ensure that "sub.example." exists and is marked as
2355  * a wildcard level.
2356  */
2357 static isc_result_t
2358 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2359         isc_result_t result;
2360         dns_name_t foundname;
2361         dns_offsets_t offsets;
2362         unsigned int n;
2363         dns_rbtnode_t *node = NULL;
2364
2365         dns_name_init(&foundname, offsets);
2366         n = dns_name_countlabels(name);
2367         INSIST(n >= 2);
2368         n--;
2369         dns_name_getlabelsequence(name, 1, n, &foundname);
2370         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2371         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2372                 return (result);
2373         node->nsec3 = 0;
2374         node->find_callback = 1;
2375         node->wild = 1;
2376         return (ISC_R_SUCCESS);
2377 }
2378
2379 static isc_result_t
2380 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2381         isc_result_t result;
2382         dns_name_t foundname;
2383         dns_offsets_t offsets;
2384         unsigned int n, l, i;
2385
2386         dns_name_init(&foundname, offsets);
2387         n = dns_name_countlabels(name);
2388         l = dns_name_countlabels(&rbtdb->common.origin);
2389         i = l + 1;
2390         while (i < n) {
2391                 dns_rbtnode_t *node = NULL;     /* dummy */
2392                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2393                 if (dns_name_iswildcard(&foundname)) {
2394                         result = add_wildcard_magic(rbtdb, &foundname);
2395                         if (result != ISC_R_SUCCESS)
2396                                 return (result);
2397                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2398                                                  &node);
2399                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2400                                 return (result);
2401                         node->nsec3 = 0;
2402                 }
2403                 i++;
2404         }
2405         return (ISC_R_SUCCESS);
2406 }
2407
2408 static isc_result_t
2409 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2410          dns_dbnode_t **nodep)
2411 {
2412         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2413         dns_rbtnode_t *node = NULL;
2414         dns_name_t nodename;
2415         isc_result_t result;
2416         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2417
2418         REQUIRE(VALID_RBTDB(rbtdb));
2419
2420         dns_name_init(&nodename, NULL);
2421         RWLOCK(&rbtdb->tree_lock, locktype);
2422         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2423                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2424         if (result != ISC_R_SUCCESS) {
2425                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2426                 if (!create) {
2427                         if (result == DNS_R_PARTIALMATCH)
2428                                 result = ISC_R_NOTFOUND;
2429                         return (result);
2430                 }
2431                 /*
2432                  * It would be nice to try to upgrade the lock instead of
2433                  * unlocking then relocking.
2434                  */
2435                 locktype = isc_rwlocktype_write;
2436                 RWLOCK(&rbtdb->tree_lock, locktype);
2437                 node = NULL;
2438                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2439                 if (result == ISC_R_SUCCESS) {
2440                         dns_rbt_namefromnode(node, &nodename);
2441 #ifdef DNS_RBT_USEHASH
2442                         node->locknum = node->hashval % rbtdb->node_lock_count;
2443 #else
2444                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2445                                 rbtdb->node_lock_count;
2446 #endif
2447                         node->nsec3 = 0;
2448                         add_empty_wildcards(rbtdb, name);
2449
2450                         if (dns_name_iswildcard(name)) {
2451                                 result = add_wildcard_magic(rbtdb, name);
2452                                 if (result != ISC_R_SUCCESS) {
2453                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2454                                         return (result);
2455                                 }
2456                         }
2457                 } else if (result != ISC_R_EXISTS) {
2458                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2459                         return (result);
2460                 }
2461         }
2462         reactivate_node(rbtdb, node, locktype);
2463         RWUNLOCK(&rbtdb->tree_lock, locktype);
2464
2465         *nodep = (dns_dbnode_t *)node;
2466
2467         return (ISC_R_SUCCESS);
2468 }
2469
2470 static isc_result_t
2471 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2472               dns_dbnode_t **nodep)
2473 {
2474         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2475         dns_rbtnode_t *node = NULL;
2476         dns_name_t nodename;
2477         isc_result_t result;
2478         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2479
2480         REQUIRE(VALID_RBTDB(rbtdb));
2481
2482         dns_name_init(&nodename, NULL);
2483         RWLOCK(&rbtdb->tree_lock, locktype);
2484         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2485                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2486         if (result != ISC_R_SUCCESS) {
2487                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2488                 if (!create) {
2489                         if (result == DNS_R_PARTIALMATCH)
2490                                 result = ISC_R_NOTFOUND;
2491                         return (result);
2492                 }
2493                 /*
2494                  * It would be nice to try to upgrade the lock instead of
2495                  * unlocking then relocking.
2496                  */
2497                 locktype = isc_rwlocktype_write;
2498                 RWLOCK(&rbtdb->tree_lock, locktype);
2499                 node = NULL;
2500                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2501                 if (result == ISC_R_SUCCESS) {
2502                         dns_rbt_namefromnode(node, &nodename);
2503 #ifdef DNS_RBT_USEHASH
2504                         node->locknum = node->hashval % rbtdb->node_lock_count;
2505 #else
2506                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2507                                 rbtdb->node_lock_count;
2508 #endif
2509                         node->nsec3 = 1U;
2510                 } else if (result != ISC_R_EXISTS) {
2511                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2512                         return (result);
2513                 }
2514         } else
2515                 INSIST(node->nsec3);
2516         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2517         new_reference(rbtdb, node);
2518         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2519         RWUNLOCK(&rbtdb->tree_lock, locktype);
2520
2521         *nodep = (dns_dbnode_t *)node;
2522
2523         return (ISC_R_SUCCESS);
2524 }
2525
2526 static isc_result_t
2527 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2528         rbtdb_search_t *search = arg;
2529         rdatasetheader_t *header, *header_next;
2530         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2531         rdatasetheader_t *found;
2532         isc_result_t result;
2533         dns_rbtnode_t *onode;
2534
2535         /*
2536          * We only want to remember the topmost zone cut, since it's the one
2537          * that counts, so we'll just continue if we've already found a
2538          * zonecut.
2539          */
2540         if (search->zonecut != NULL)
2541                 return (DNS_R_CONTINUE);
2542
2543         found = NULL;
2544         result = DNS_R_CONTINUE;
2545         onode = search->rbtdb->origin_node;
2546
2547         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2548                   isc_rwlocktype_read);
2549
2550         /*
2551          * Look for an NS or DNAME rdataset active in our version.
2552          */
2553         ns_header = NULL;
2554         dname_header = NULL;
2555         sigdname_header = NULL;
2556         for (header = node->data; header != NULL; header = header_next) {
2557                 header_next = header->next;
2558                 if (header->type == dns_rdatatype_ns ||
2559                     header->type == dns_rdatatype_dname ||
2560                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2561                         do {
2562                                 if (header->serial <= search->serial &&
2563                                     !IGNORE(header)) {
2564                                         /*
2565                                          * Is this a "this rdataset doesn't
2566                                          * exist" record?
2567                                          */
2568                                         if (NONEXISTENT(header))
2569                                                 header = NULL;
2570                                         break;
2571                                 } else
2572                                         header = header->down;
2573                         } while (header != NULL);
2574                         if (header != NULL) {
2575                                 if (header->type == dns_rdatatype_dname)
2576                                         dname_header = header;
2577                                 else if (header->type ==
2578                                            RBTDB_RDATATYPE_SIGDNAME)
2579                                         sigdname_header = header;
2580                                 else if (node != onode ||
2581                                          IS_STUB(search->rbtdb)) {
2582                                         /*
2583                                          * We've found an NS rdataset that
2584                                          * isn't at the origin node.  We check
2585                                          * that they're not at the origin node,
2586                                          * because otherwise we'd erroneously
2587                                          * treat the zone top as if it were
2588                                          * a delegation.
2589                                          */
2590                                         ns_header = header;
2591                                 }
2592                         }
2593                 }
2594         }
2595
2596         /*
2597          * Did we find anything?
2598          */
2599         if (dname_header != NULL) {
2600                 /*
2601                  * Note that DNAME has precedence over NS if both exist.
2602                  */
2603                 found = dname_header;
2604                 search->zonecut_sigrdataset = sigdname_header;
2605         } else if (ns_header != NULL) {
2606                 found = ns_header;
2607                 search->zonecut_sigrdataset = NULL;
2608         }
2609
2610         if (found != NULL) {
2611                 /*
2612                  * We increment the reference count on node to ensure that
2613                  * search->zonecut_rdataset will still be valid later.
2614                  */
2615                 new_reference(search->rbtdb, node);
2616                 search->zonecut = node;
2617                 search->zonecut_rdataset = found;
2618                 search->need_cleanup = ISC_TRUE;
2619                 /*
2620                  * Since we've found a zonecut, anything beneath it is
2621                  * glue and is not subject to wildcard matching, so we
2622                  * may clear search->wild.
2623                  */
2624                 search->wild = ISC_FALSE;
2625                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2626                         /*
2627                          * If the caller does not want to find glue, then
2628                          * this is the best answer and the search should
2629                          * stop now.
2630                          */
2631                         result = DNS_R_PARTIALMATCH;
2632                 } else {
2633                         dns_name_t *zcname;
2634
2635                         /*
2636                          * The search will continue beneath the zone cut.
2637                          * This may or may not be the best match.  In case it
2638                          * is, we need to remember the node name.
2639                          */
2640                         zcname = dns_fixedname_name(&search->zonecut_name);
2641                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2642                                       ISC_R_SUCCESS);
2643                         search->copy_name = ISC_TRUE;
2644                 }
2645         } else {
2646                 /*
2647                  * There is no zonecut at this node which is active in this
2648                  * version.
2649                  *
2650                  * If this is a "wild" node and the caller hasn't disabled
2651                  * wildcard matching, remember that we've seen a wild node
2652                  * in case we need to go searching for wildcard matches
2653                  * later on.
2654                  */
2655                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2656                         search->wild = ISC_TRUE;
2657         }
2658
2659         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2660                     isc_rwlocktype_read);
2661
2662         return (result);
2663 }
2664
2665 static inline void
2666 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2667               rdatasetheader_t *header, isc_stdtime_t now,
2668               dns_rdataset_t *rdataset)
2669 {
2670         unsigned char *raw;     /* RDATASLAB */
2671
2672         /*
2673          * Caller must be holding the node reader lock.
2674          * XXXJT: technically, we need a writer lock, since we'll increment
2675          * the header count below.  However, since the actual counter value
2676          * doesn't matter, we prioritize performance here.  (We may want to
2677          * use atomic increment when available).
2678          */
2679
2680         if (rdataset == NULL)
2681                 return;
2682
2683         new_reference(rbtdb, node);
2684
2685         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2686
2687         rdataset->methods = &rdataset_methods;
2688         rdataset->rdclass = rbtdb->common.rdclass;
2689         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2690         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2691         rdataset->ttl = header->rdh_ttl - now;
2692         rdataset->trust = header->trust;
2693         if (NXDOMAIN(header))
2694                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2695         if (OPTOUT(header))
2696                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2697         rdataset->private1 = rbtdb;
2698         rdataset->private2 = node;
2699         raw = (unsigned char *)header + sizeof(*header);
2700         rdataset->private3 = raw;
2701         rdataset->count = header->count++;
2702         if (rdataset->count == ISC_UINT32_MAX)
2703                 rdataset->count = 0;
2704
2705         /*
2706          * Reset iterator state.
2707          */
2708         rdataset->privateuint4 = 0;
2709         rdataset->private5 = NULL;
2710
2711         /*
2712          * Add noqname proof.
2713          */
2714         rdataset->private6 = header->noqname;
2715         if (rdataset->private6 != NULL)
2716                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2717         rdataset->private7 = header->closest;
2718         if (rdataset->private7 != NULL)
2719                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2720
2721         /*
2722          * Copy out re-signing information.
2723          */
2724         if (RESIGN(header)) {
2725                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2726                 rdataset->resign = header->resign;
2727         } else
2728                 rdataset->resign = 0;
2729 }
2730
2731 static inline isc_result_t
2732 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2733                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2734                  dns_rdataset_t *sigrdataset)
2735 {
2736         isc_result_t result;
2737         dns_name_t *zcname;
2738         rbtdb_rdatatype_t type;
2739         dns_rbtnode_t *node;
2740
2741         /*
2742          * The caller MUST NOT be holding any node locks.
2743          */
2744
2745         node = search->zonecut;
2746         type = search->zonecut_rdataset->type;
2747
2748         /*
2749          * If we have to set foundname, we do it before anything else.
2750          * If we were to set foundname after we had set nodep or bound the
2751          * rdataset, then we'd have to undo that work if dns_name_copy()
2752          * failed.  By setting foundname first, there's nothing to undo if
2753          * we have trouble.
2754          */
2755         if (foundname != NULL && search->copy_name) {
2756                 zcname = dns_fixedname_name(&search->zonecut_name);
2757                 result = dns_name_copy(zcname, foundname, NULL);
2758                 if (result != ISC_R_SUCCESS)
2759                         return (result);
2760         }
2761         if (nodep != NULL) {
2762                 /*
2763                  * Note that we don't have to increment the node's reference
2764                  * count here because we're going to use the reference we
2765                  * already have in the search block.
2766                  */
2767                 *nodep = node;
2768                 search->need_cleanup = ISC_FALSE;
2769         }
2770         if (rdataset != NULL) {
2771                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2772                           isc_rwlocktype_read);
2773                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2774                               search->now, rdataset);
2775                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2776                         bind_rdataset(search->rbtdb, node,
2777                                       search->zonecut_sigrdataset,
2778                                       search->now, sigrdataset);
2779                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2780                             isc_rwlocktype_read);
2781         }
2782
2783         if (type == dns_rdatatype_dname)
2784                 return (DNS_R_DNAME);
2785         return (DNS_R_DELEGATION);
2786 }
2787
2788 static inline isc_boolean_t
2789 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2790            dns_rbtnode_t *node)
2791 {
2792         unsigned char *raw;     /* RDATASLAB */
2793         unsigned int count, size;
2794         dns_name_t ns_name;
2795         isc_boolean_t valid = ISC_FALSE;
2796         dns_offsets_t offsets;
2797         isc_region_t region;
2798         rdatasetheader_t *header;
2799
2800         /*
2801          * No additional locking is required.
2802          */
2803
2804         /*
2805          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2806          * if it occurs at a zone cut, but is not valid below it.
2807          */
2808         if (type == dns_rdatatype_ns) {
2809                 if (node != search->zonecut) {
2810                         return (ISC_FALSE);
2811                 }
2812         } else if (type != dns_rdatatype_a &&
2813                    type != dns_rdatatype_aaaa &&
2814                    type != dns_rdatatype_a6) {
2815                 return (ISC_FALSE);
2816         }
2817
2818         header = search->zonecut_rdataset;
2819         raw = (unsigned char *)header + sizeof(*header);
2820         count = raw[0] * 256 + raw[1];
2821 #if DNS_RDATASET_FIXED
2822         raw += 2 + (4 * count);
2823 #else
2824         raw += 2;
2825 #endif
2826
2827         while (count > 0) {
2828                 count--;
2829                 size = raw[0] * 256 + raw[1];
2830 #if DNS_RDATASET_FIXED
2831                 raw += 4;
2832 #else
2833                 raw += 2;
2834 #endif
2835                 region.base = raw;
2836                 region.length = size;
2837                 raw += size;
2838                 /*
2839                  * XXX Until we have rdata structures, we have no choice but
2840                  * to directly access the rdata format.
2841                  */
2842                 dns_name_init(&ns_name, offsets);
2843                 dns_name_fromregion(&ns_name, &region);
2844                 if (dns_name_compare(&ns_name, name) == 0) {
2845                         valid = ISC_TRUE;
2846                         break;
2847                 }
2848         }
2849
2850         return (valid);
2851 }
2852
2853 static inline isc_boolean_t
2854 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2855             dns_name_t *name)
2856 {
2857         dns_fixedname_t fnext;
2858         dns_fixedname_t forigin;
2859         dns_name_t *next;
2860         dns_name_t *origin;
2861         dns_name_t prefix;
2862         dns_rbtdb_t *rbtdb;
2863         dns_rbtnode_t *node;
2864         isc_result_t result;
2865         isc_boolean_t answer = ISC_FALSE;
2866         rdatasetheader_t *header;
2867
2868         rbtdb = search->rbtdb;
2869
2870         dns_name_init(&prefix, NULL);
2871         dns_fixedname_init(&fnext);
2872         next = dns_fixedname_name(&fnext);
2873         dns_fixedname_init(&forigin);
2874         origin = dns_fixedname_name(&forigin);
2875
2876         result = dns_rbtnodechain_next(chain, NULL, NULL);
2877         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2878                 node = NULL;
2879                 result = dns_rbtnodechain_current(chain, &prefix,
2880                                                   origin, &node);
2881                 if (result != ISC_R_SUCCESS)
2882                         break;
2883                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2884                           isc_rwlocktype_read);
2885                 for (header = node->data;
2886                      header != NULL;
2887                      header = header->next) {
2888                         if (header->serial <= search->serial &&
2889                             !IGNORE(header) && EXISTS(header))
2890                                 break;
2891                 }
2892                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2893                             isc_rwlocktype_read);
2894                 if (header != NULL)
2895                         break;
2896                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2897         }
2898         if (result == ISC_R_SUCCESS)
2899                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2900         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2901                 answer = ISC_TRUE;
2902         return (answer);
2903 }
2904
2905 static inline isc_boolean_t
2906 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2907         dns_fixedname_t fnext;
2908         dns_fixedname_t forigin;
2909         dns_fixedname_t fprev;
2910         dns_name_t *next;
2911         dns_name_t *origin;
2912         dns_name_t *prev;
2913         dns_name_t name;
2914         dns_name_t rname;
2915         dns_name_t tname;
2916         dns_rbtdb_t *rbtdb;
2917         dns_rbtnode_t *node;
2918         dns_rbtnodechain_t chain;
2919         isc_boolean_t check_next = ISC_TRUE;
2920         isc_boolean_t check_prev = ISC_TRUE;
2921         isc_boolean_t answer = ISC_FALSE;
2922         isc_result_t result;
2923         rdatasetheader_t *header;
2924         unsigned int n;
2925
2926         rbtdb = search->rbtdb;
2927
2928         dns_name_init(&name, NULL);
2929         dns_name_init(&tname, NULL);
2930         dns_name_init(&rname, NULL);
2931         dns_fixedname_init(&fnext);
2932         next = dns_fixedname_name(&fnext);
2933         dns_fixedname_init(&fprev);
2934         prev = dns_fixedname_name(&fprev);
2935         dns_fixedname_init(&forigin);
2936         origin = dns_fixedname_name(&forigin);
2937
2938         /*
2939          * Find if qname is at or below a empty node.
2940          * Use our own copy of the chain.
2941          */
2942
2943         chain = search->chain;
2944         do {
2945                 node = NULL;
2946                 result = dns_rbtnodechain_current(&chain, &name,
2947                                                   origin, &node);
2948                 if (result != ISC_R_SUCCESS)
2949                         break;
2950                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2951                           isc_rwlocktype_read);
2952                 for (header = node->data;
2953                      header != NULL;
2954                      header = header->next) {
2955                         if (header->serial <= search->serial &&
2956                             !IGNORE(header) && EXISTS(header))
2957                                 break;
2958                 }
2959                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2960                             isc_rwlocktype_read);
2961                 if (header != NULL)
2962                         break;
2963                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2964         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2965         if (result == ISC_R_SUCCESS)
2966                 result = dns_name_concatenate(&name, origin, prev, NULL);
2967         if (result != ISC_R_SUCCESS)
2968                 check_prev = ISC_FALSE;
2969
2970         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2971         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2972                 node = NULL;
2973                 result = dns_rbtnodechain_current(&chain, &name,
2974                                                   origin, &node);
2975                 if (result != ISC_R_SUCCESS)
2976                         break;
2977                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2978                           isc_rwlocktype_read);
2979                 for (header = node->data;
2980                      header != NULL;
2981                      header = header->next) {
2982                         if (header->serial <= search->serial &&
2983                             !IGNORE(header) && EXISTS(header))
2984                                 break;
2985                 }
2986                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2987                             isc_rwlocktype_read);
2988                 if (header != NULL)
2989                         break;
2990                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2991         }
2992         if (result == ISC_R_SUCCESS)
2993                 result = dns_name_concatenate(&name, origin, next, NULL);
2994         if (result != ISC_R_SUCCESS)
2995                 check_next = ISC_FALSE;
2996
2997         dns_name_clone(qname, &rname);
2998
2999         /*
3000          * Remove the wildcard label to find the terminal name.
3001          */
3002         n = dns_name_countlabels(wname);
3003         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3004
3005         do {
3006                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3007                     (check_next && dns_name_issubdomain(next, &rname))) {
3008                         answer = ISC_TRUE;
3009                         break;
3010                 }
3011                 /*
3012                  * Remove the left hand label.
3013                  */
3014                 n = dns_name_countlabels(&rname);
3015                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3016         } while (!dns_name_equal(&rname, &tname));
3017         return (answer);
3018 }
3019
3020 static inline isc_result_t
3021 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3022               dns_name_t *qname)
3023 {
3024         unsigned int i, j;
3025         dns_rbtnode_t *node, *level_node, *wnode;
3026         rdatasetheader_t *header;
3027         isc_result_t result = ISC_R_NOTFOUND;
3028         dns_name_t name;
3029         dns_name_t *wname;
3030         dns_fixedname_t fwname;
3031         dns_rbtdb_t *rbtdb;
3032         isc_boolean_t done, wild, active;
3033         dns_rbtnodechain_t wchain;
3034
3035         /*
3036          * Caller must be holding the tree lock and MUST NOT be holding
3037          * any node locks.
3038          */
3039
3040         /*
3041          * Examine each ancestor level.  If the level's wild bit
3042          * is set, then construct the corresponding wildcard name and
3043          * search for it.  If the wildcard node exists, and is active in
3044          * this version, we're done.  If not, then we next check to see
3045          * if the ancestor is active in this version.  If so, then there
3046          * can be no possible wildcard match and again we're done.  If not,
3047          * continue the search.
3048          */
3049
3050         rbtdb = search->rbtdb;
3051         i = search->chain.level_matches;
3052         done = ISC_FALSE;
3053         node = *nodep;
3054         do {
3055                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3056                           isc_rwlocktype_read);
3057
3058                 /*
3059                  * First we try to figure out if this node is active in
3060                  * the search's version.  We do this now, even though we
3061                  * may not need the information, because it simplifies the
3062                  * locking and code flow.
3063                  */
3064                 for (header = node->data;
3065                      header != NULL;
3066                      header = header->next) {
3067                         if (header->serial <= search->serial &&
3068                             !IGNORE(header) && EXISTS(header))
3069                                 break;
3070                 }
3071                 if (header != NULL)
3072                         active = ISC_TRUE;
3073                 else
3074                         active = ISC_FALSE;
3075
3076                 if (node->wild)
3077                         wild = ISC_TRUE;
3078                 else
3079                         wild = ISC_FALSE;
3080
3081                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3082                             isc_rwlocktype_read);
3083
3084                 if (wild) {
3085                         /*
3086                          * Construct the wildcard name for this level.
3087                          */
3088                         dns_name_init(&name, NULL);
3089                         dns_rbt_namefromnode(node, &name);
3090                         dns_fixedname_init(&fwname);
3091                         wname = dns_fixedname_name(&fwname);
3092                         result = dns_name_concatenate(dns_wildcardname, &name,
3093                                                       wname, NULL);
3094                         j = i;
3095                         while (result == ISC_R_SUCCESS && j != 0) {
3096                                 j--;
3097                                 level_node = search->chain.levels[j];
3098                                 dns_name_init(&name, NULL);
3099                                 dns_rbt_namefromnode(level_node, &name);
3100                                 result = dns_name_concatenate(wname,
3101                                                               &name,
3102                                                               wname,
3103                                                               NULL);
3104                         }
3105                         if (result != ISC_R_SUCCESS)
3106                                 break;
3107
3108                         wnode = NULL;
3109                         dns_rbtnodechain_init(&wchain, NULL);
3110                         result = dns_rbt_findnode(rbtdb->tree, wname,
3111                                                   NULL, &wnode, &wchain,
3112                                                   DNS_RBTFIND_EMPTYDATA,
3113                                                   NULL, NULL);
3114                         if (result == ISC_R_SUCCESS) {
3115                                 nodelock_t *lock;
3116
3117                                 /*
3118                                  * We have found the wildcard node.  If it
3119                                  * is active in the search's version, we're
3120                                  * done.
3121                                  */
3122                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3123                                 NODE_LOCK(lock, isc_rwlocktype_read);
3124                                 for (header = wnode->data;
3125                                      header != NULL;
3126                                      header = header->next) {
3127                                         if (header->serial <= search->serial &&
3128                                             !IGNORE(header) && EXISTS(header))
3129                                                 break;
3130                                 }
3131                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3132                                 if (header != NULL ||
3133                                     activeempty(search, &wchain, wname)) {
3134                                         if (activeemtpynode(search, qname,
3135                                                             wname)) {
3136                                                 return (ISC_R_NOTFOUND);
3137                                         }
3138                                         /*
3139                                          * The wildcard node is active!
3140                                          *
3141                                          * Note: result is still ISC_R_SUCCESS
3142                                          * so we don't have to set it.
3143                                          */
3144                                         *nodep = wnode;
3145                                         break;
3146                                 }
3147                         } else if (result != ISC_R_NOTFOUND &&
3148                                    result != DNS_R_PARTIALMATCH) {
3149                                 /*
3150                                  * An error has occurred.  Bail out.
3151                                  */
3152                                 break;
3153                         }
3154                 }
3155
3156                 if (active) {
3157                         /*
3158                          * The level node is active.  Any wildcarding
3159                          * present at higher levels has no
3160                          * effect and we're done.
3161                          */
3162                         result = ISC_R_NOTFOUND;
3163                         break;
3164                 }
3165
3166                 if (i > 0) {
3167                         i--;
3168                         node = search->chain.levels[i];
3169                 } else
3170                         done = ISC_TRUE;
3171         } while (!done);
3172
3173         return (result);
3174 }
3175
3176 static isc_boolean_t
3177 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3178 {
3179         dns_rdata_t rdata = DNS_RDATA_INIT;
3180         dns_rdata_nsec3_t nsec3;
3181         unsigned char *raw;                     /* RDATASLAB */
3182         unsigned int rdlen, count;
3183         isc_region_t region;
3184         isc_result_t result;
3185
3186         REQUIRE(header->type == dns_rdatatype_nsec3);
3187
3188         raw = (unsigned char *)header + sizeof(*header);
3189         count = raw[0] * 256 + raw[1]; /* count */
3190 #if DNS_RDATASET_FIXED
3191         raw += count * 4 + 2;
3192 #else
3193         raw += 2;
3194 #endif
3195         while (count-- > 0) {
3196                 rdlen = raw[0] * 256 + raw[1];
3197 #if DNS_RDATASET_FIXED
3198                 raw += 4;
3199 #else
3200                 raw += 2;
3201 #endif
3202                 region.base = raw;
3203                 region.length = rdlen;
3204                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3205                                      dns_rdatatype_nsec3, &region);
3206                 raw += rdlen;
3207                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3208                 INSIST(result == ISC_R_SUCCESS);
3209                 if (nsec3.hash == search->rbtversion->hash &&
3210                     nsec3.iterations == search->rbtversion->iterations &&
3211                     nsec3.salt_length == search->rbtversion->salt_length &&
3212                     memcmp(nsec3.salt, search->rbtversion->salt,
3213                            nsec3.salt_length) == 0)
3214                         return (ISC_TRUE);
3215                 dns_rdata_reset(&rdata);
3216         }
3217         return (ISC_FALSE);
3218 }
3219
3220 static inline isc_result_t
3221 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3222                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3223                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3224                   dns_db_secure_t secure)
3225 {
3226         dns_rbtnode_t *node;
3227         rdatasetheader_t *header, *header_next, *found, *foundsig;
3228         isc_boolean_t empty_node;
3229         isc_result_t result;
3230         dns_fixedname_t fname, forigin;
3231         dns_name_t *name, *origin;
3232         dns_rdatatype_t type;
3233         rbtdb_rdatatype_t sigtype;
3234         isc_boolean_t wraps;
3235         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3236
3237         if (tree == search->rbtdb->nsec3) {
3238                 type = dns_rdatatype_nsec3;
3239                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3240                 wraps = ISC_TRUE;
3241         } else {
3242                 type = dns_rdatatype_nsec;
3243                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3244                 wraps = ISC_FALSE;
3245         }
3246
3247  again:
3248         do {
3249                 node = NULL;
3250                 dns_fixedname_init(&fname);
3251                 name = dns_fixedname_name(&fname);
3252                 dns_fixedname_init(&forigin);
3253                 origin = dns_fixedname_name(&forigin);
3254                 result = dns_rbtnodechain_current(&search->chain, name,
3255                                                   origin, &node);
3256                 if (result != ISC_R_SUCCESS)
3257                         return (result);
3258                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3259                           isc_rwlocktype_read);
3260                 found = NULL;
3261                 foundsig = NULL;
3262                 empty_node = ISC_TRUE;
3263                 for (header = node->data;
3264                      header != NULL;
3265                      header = header_next) {
3266                         header_next = header->next;
3267                         /*
3268                          * Look for an active, extant NSEC or RRSIG NSEC.
3269                          */
3270                         do {
3271                                 if (header->serial <= search->serial &&
3272                                     !IGNORE(header)) {
3273                                         /*
3274                                          * Is this a "this rdataset doesn't
3275                                          * exist" record?
3276                                          */
3277                                         if (NONEXISTENT(header))
3278                                                 header = NULL;
3279                                         break;
3280                                 } else
3281                                         header = header->down;
3282                         } while (header != NULL);
3283                         if (header != NULL) {
3284                                 /*
3285                                  * We now know that there is at least one
3286                                  * active rdataset at this node.
3287                                  */
3288                                 empty_node = ISC_FALSE;
3289                                 if (header->type == type) {
3290                                         found = header;
3291                                         if (foundsig != NULL)
3292                                                 break;
3293                                 } else if (header->type == sigtype) {
3294                                         foundsig = header;
3295                                         if (found != NULL)
3296                                                 break;
3297                                 }
3298                         }
3299                 }
3300                 if (!empty_node) {
3301                         if (found != NULL && search->rbtversion->havensec3 &&
3302                             found->type == dns_rdatatype_nsec3 &&
3303                             !matchparams(found, search)) {
3304                                 empty_node = ISC_TRUE;
3305                                 found = NULL;
3306                                 foundsig = NULL;
3307                                 result = dns_rbtnodechain_prev(&search->chain,
3308                                                                NULL, NULL);
3309                         } else if (found != NULL &&
3310                                    (foundsig != NULL || !need_sig))
3311                         {
3312                                 /*
3313                                  * We've found the right NSEC/NSEC3 record.
3314                                  *
3315                                  * Note: for this to really be the right
3316                                  * NSEC record, it's essential that the NSEC
3317                                  * records of any nodes obscured by a zone
3318                                  * cut have been removed; we assume this is
3319                                  * the case.
3320                                  */
3321                                 result = dns_name_concatenate(name, origin,
3322                                                               foundname, NULL);
3323                                 if (result == ISC_R_SUCCESS) {
3324                                         if (nodep != NULL) {
3325                                                 new_reference(search->rbtdb,
3326                                                               node);
3327                                                 *nodep = node;
3328                                         }
3329                                         bind_rdataset(search->rbtdb, node,
3330                                                       found, search->now,
3331                                                       rdataset);
3332                                         if (foundsig != NULL)
3333                                                 bind_rdataset(search->rbtdb,
3334                                                               node,
3335                                                               foundsig,
3336                                                               search->now,
3337                                                               sigrdataset);
3338                                 }
3339                         } else if (found == NULL && foundsig == NULL) {
3340                                 /*
3341                                  * This node is active, but has no NSEC or
3342                                  * RRSIG NSEC.  That means it's glue or
3343                                  * other obscured zone data that isn't
3344                                  * relevant for our search.  Treat the
3345                                  * node as if it were empty and keep looking.
3346                                  */
3347                                 empty_node = ISC_TRUE;
3348                                 result = dns_rbtnodechain_prev(&search->chain,
3349                                                                NULL, NULL);
3350                         } else {
3351                                 /*
3352                                  * We found an active node, but either the
3353                                  * NSEC or the RRSIG NSEC is missing.  This
3354                                  * shouldn't happen.
3355                                  */
3356                                 result = DNS_R_BADDB;
3357                         }
3358                 } else {
3359                         /*
3360                          * This node isn't active.  We've got to keep
3361                          * looking.
3362                          */
3363                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3364                                                        NULL);
3365                 }
3366                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3367                             isc_rwlocktype_read);
3368         } while (empty_node && result == ISC_R_SUCCESS);
3369
3370         if (result == ISC_R_NOMORE && wraps) {
3371                 result = dns_rbtnodechain_last(&search->chain, tree,
3372                                                NULL, NULL);
3373                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3374                         wraps = ISC_FALSE;
3375                         goto again;
3376                 }
3377         }
3378
3379         /*
3380          * If the result is ISC_R_NOMORE, then we got to the beginning of
3381          * the database and didn't find a NSEC record.  This shouldn't
3382          * happen.
3383          */
3384         if (result == ISC_R_NOMORE)
3385                 result = DNS_R_BADDB;
3386
3387         return (result);
3388 }
3389
3390 static isc_result_t
3391 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3392           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3393           dns_dbnode_t **nodep, dns_name_t *foundname,
3394           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3395 {
3396         dns_rbtnode_t *node = NULL;
3397         isc_result_t result;
3398         rbtdb_search_t search;
3399         isc_boolean_t cname_ok = ISC_TRUE;
3400         isc_boolean_t close_version = ISC_FALSE;
3401         isc_boolean_t maybe_zonecut = ISC_FALSE;
3402         isc_boolean_t at_zonecut = ISC_FALSE;
3403         isc_boolean_t wild;
3404         isc_boolean_t empty_node;
3405         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3406         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3407         rbtdb_rdatatype_t sigtype;
3408         isc_boolean_t active;
3409         dns_rbtnodechain_t chain;
3410         nodelock_t *lock;
3411         dns_rbt_t *tree;
3412
3413         search.rbtdb = (dns_rbtdb_t *)db;
3414
3415         REQUIRE(VALID_RBTDB(search.rbtdb));
3416
3417         /*
3418          * We don't care about 'now'.
3419          */
3420         UNUSED(now);
3421
3422         /*
3423          * If the caller didn't supply a version, attach to the current
3424          * version.
3425          */
3426         if (version == NULL) {
3427                 currentversion(db, &version);
3428                 close_version = ISC_TRUE;
3429         }
3430
3431         search.rbtversion = version;
3432         search.serial = search.rbtversion->serial;
3433         search.options = options;
3434         search.copy_name = ISC_FALSE;
3435         search.need_cleanup = ISC_FALSE;
3436         search.wild = ISC_FALSE;
3437         search.zonecut = NULL;
3438         dns_fixedname_init(&search.zonecut_name);
3439         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3440         search.now = 0;
3441
3442         /*
3443          * 'wild' will be true iff. we've matched a wildcard.
3444          */
3445         wild = ISC_FALSE;
3446
3447         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3448
3449         /*
3450          * Search down from the root of the tree.  If, while going down, we
3451          * encounter a callback node, zone_zonecut_callback() will search the
3452          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3453          */
3454         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3455                                                          search.rbtdb->tree;
3456         result = dns_rbt_findnode(tree, name, foundname, &node,
3457                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3458                                   zone_zonecut_callback, &search);
3459
3460         if (result == DNS_R_PARTIALMATCH) {
3461         partial_match:
3462                 if (search.zonecut != NULL) {
3463                     result = setup_delegation(&search, nodep, foundname,
3464                                               rdataset, sigrdataset);
3465                     goto tree_exit;
3466                 }
3467
3468                 if (search.wild) {
3469                         /*
3470                          * At least one of the levels in the search chain
3471                          * potentially has a wildcard.  For each such level,
3472                          * we must see if there's a matching wildcard active
3473                          * in the current version.
3474                          */
3475                         result = find_wildcard(&search, &node, name);
3476                         if (result == ISC_R_SUCCESS) {
3477                                 result = dns_name_copy(name, foundname, NULL);
3478                                 if (result != ISC_R_SUCCESS)
3479                                         goto tree_exit;
3480                                 wild = ISC_TRUE;
3481                                 goto found;
3482                         }
3483                         else if (result != ISC_R_NOTFOUND)
3484                                 goto tree_exit;
3485                 }
3486
3487                 chain = search.chain;
3488                 active = activeempty(&search, &chain, name);
3489
3490                 /*
3491                  * If we're here, then the name does not exist, is not
3492                  * beneath a zonecut, and there's no matching wildcard.
3493                  */
3494                 if ((search.rbtversion->secure == dns_db_secure &&
3495                      !search.rbtversion->havensec3) ||
3496                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3497                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3498                 {
3499                         result = find_closest_nsec(&search, nodep, foundname,
3500                                                    rdataset, sigrdataset, tree,
3501                                                    search.rbtversion->secure);
3502                         if (result == ISC_R_SUCCESS)
3503                                 result = active ? DNS_R_EMPTYNAME :
3504                                                   DNS_R_NXDOMAIN;
3505                 } else
3506                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3507                 goto tree_exit;
3508         } else if (result != ISC_R_SUCCESS)
3509                 goto tree_exit;
3510
3511  found:
3512         /*
3513          * We have found a node whose name is the desired name, or we
3514          * have matched a wildcard.
3515          */
3516
3517         if (search.zonecut != NULL) {
3518                 /*
3519                  * If we're beneath a zone cut, we don't want to look for
3520                  * CNAMEs because they're not legitimate zone glue.
3521                  */
3522                 cname_ok = ISC_FALSE;
3523         } else {
3524                 /*
3525                  * The node may be a zone cut itself.  If it might be one,
3526                  * make sure we check for it later.
3527                  */
3528                 if (node->find_callback &&
3529                     (node != search.rbtdb->origin_node ||
3530                      IS_STUB(search.rbtdb)) &&
3531                     !dns_rdatatype_atparent(type))
3532                         maybe_zonecut = ISC_TRUE;
3533         }
3534
3535         /*
3536          * Certain DNSSEC types are not subject to CNAME matching
3537          * (RFC4035, section 2.5 and RFC3007).
3538          *
3539          * We don't check for RRSIG, because we don't store RRSIG records
3540          * directly.
3541          */
3542         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3543                 cname_ok = ISC_FALSE;
3544
3545         /*
3546          * We now go looking for rdata...
3547          */
3548
3549         NODE_LOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3550                   isc_rwlocktype_read);
3551
3552         found = NULL;
3553         foundsig = NULL;
3554         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3555         nsecheader = NULL;
3556         nsecsig = NULL;
3557         cnamesig = NULL;
3558         empty_node = ISC_TRUE;
3559         for (header = node->data; header != NULL; header = header_next) {
3560                 header_next = header->next;
3561                 /*
3562                  * Look for an active, extant rdataset.
3563                  */
3564                 do {
3565                         if (header->serial <= search.serial &&
3566                             !IGNORE(header)) {
3567                                 /*
3568                                  * Is this a "this rdataset doesn't
3569                                  * exist" record?
3570                                  */
3571                                 if (NONEXISTENT(header))
3572                                         header = NULL;
3573                                 break;
3574                         } else
3575                                 header = header->down;
3576                 } while (header != NULL);
3577                 if (header != NULL) {
3578                         /*
3579                          * We now know that there is at least one active
3580                          * rdataset at this node.
3581                          */
3582                         empty_node = ISC_FALSE;
3583
3584                         /*
3585                          * Do special zone cut handling, if requested.
3586                          */
3587                         if (maybe_zonecut &&
3588                             header->type == dns_rdatatype_ns) {
3589                                 /*
3590                                  * We increment the reference count on node to
3591                                  * ensure that search->zonecut_rdataset will
3592                                  * still be valid later.
3593                                  */
3594                                 new_reference(search.rbtdb, node);
3595                                 search.zonecut = node;
3596                                 search.zonecut_rdataset = header;
3597                                 search.zonecut_sigrdataset = NULL;
3598                                 search.need_cleanup = ISC_TRUE;
3599                                 maybe_zonecut = ISC_FALSE;
3600                                 at_zonecut = ISC_TRUE;
3601                                 /*
3602                                  * It is not clear if KEY should still be
3603                                  * allowed at the parent side of the zone
3604                                  * cut or not.  It is needed for RFC3007
3605                                  * validated updates.
3606                                  */
3607                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3608                                     && type != dns_rdatatype_nsec
3609                                     && type != dns_rdatatype_key) {
3610                                         /*
3611                                          * Glue is not OK, but any answer we
3612                                          * could return would be glue.  Return
3613                                          * the delegation.
3614                                          */
3615                                         found = NULL;
3616                                         break;
3617                                 }
3618                                 if (found != NULL && foundsig != NULL)
3619                                         break;
3620                         }
3621
3622
3623                         /*
3624                          * If the NSEC3 record doesn't match the chain
3625                          * we are using behave as if it isn't here.
3626                          */
3627                         if (header->type == dns_rdatatype_nsec3 &&
3628                             !matchparams(header, &search))
3629                                 goto partial_match;
3630                         /*
3631                          * If we found a type we were looking for,
3632                          * remember it.
3633                          */
3634                         if (header->type == type ||
3635                             type == dns_rdatatype_any ||
3636                             (header->type == dns_rdatatype_cname &&
3637                              cname_ok)) {
3638                                 /*
3639                                  * We've found the answer!
3640                                  */
3641                                 found = header;
3642                                 if (header->type == dns_rdatatype_cname &&
3643                                     cname_ok) {
3644                                         /*
3645                                          * We may be finding a CNAME instead
3646                                          * of the desired type.
3647                                          *
3648                                          * If we've already got the CNAME RRSIG,
3649                                          * use it, otherwise change sigtype
3650                                          * so that we find it.
3651                                          */
3652                                         if (cnamesig != NULL)
3653                                                 foundsig = cnamesig;
3654                                         else
3655                                                 sigtype =
3656                                                     RBTDB_RDATATYPE_SIGCNAME;
3657                                 }
3658                                 /*
3659                                  * If we've got all we need, end the search.
3660                                  */
3661                                 if (!maybe_zonecut && foundsig != NULL)
3662                                         break;
3663                         } else if (header->type == sigtype) {
3664                                 /*
3665                                  * We've found the RRSIG rdataset for our
3666                                  * target type.  Remember it.
3667                                  */
3668                                 foundsig = header;
3669                                 /*
3670                                  * If we've got all we need, end the search.
3671                                  */
3672                                 if (!maybe_zonecut && found != NULL)
3673                                         break;
3674                         } else if (header->type == dns_rdatatype_nsec &&
3675                                    !search.rbtversion->havensec3) {
3676                                 /*
3677                                  * Remember a NSEC rdataset even if we're
3678                                  * not specifically looking for it, because
3679                                  * we might need it later.
3680                                  */
3681                                 nsecheader = header;
3682                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3683                                    !search.rbtversion->havensec3) {
3684                                 /*
3685                                  * If we need the NSEC rdataset, we'll also
3686                                  * need its signature.
3687                                  */
3688                                 nsecsig = header;
3689                         } else if (cname_ok &&
3690                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3691                                 /*
3692                                  * If we get a CNAME match, we'll also need
3693                                  * its signature.
3694                                  */
3695                                 cnamesig = header;
3696                         }
3697                 }
3698         }
3699
3700         if (empty_node) {
3701                 /*
3702                  * We have an exact match for the name, but there are no
3703                  * active rdatasets in the desired version.  That means that
3704                  * this node doesn't exist in the desired version, and that
3705                  * we really have a partial match.
3706                  */
3707                 if (!wild) {
3708                         lock = &search.rbtdb->node_locks[node->locknum].lock;
3709                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3710                         goto partial_match;
3711                 }
3712         }
3713
3714         /*
3715          * If we didn't find what we were looking for...
3716          */
3717         if (found == NULL) {
3718                 if (search.zonecut != NULL) {
3719                         /*
3720                          * We were trying to find glue at a node beneath a
3721                          * zone cut, but didn't.
3722                          *
3723                          * Return the delegation.
3724                          */
3725                         lock = &search.rbtdb->node_locks[node->locknum].lock;
3726                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3727                         result = setup_delegation(&search, nodep, foundname,
3728                                                   rdataset, sigrdataset);
3729                         goto tree_exit;
3730                 }
3731                 /*
3732                  * The desired type doesn't exist.
3733                  */
3734                 result = DNS_R_NXRRSET;
3735                 if (search.rbtversion->secure == dns_db_secure &&
3736                     !search.rbtversion->havensec3 &&
3737                     (nsecheader == NULL || nsecsig == NULL)) {
3738                         /*
3739                          * The zone is secure but there's no NSEC,
3740                          * or the NSEC has no signature!
3741                          */
3742                         if (!wild) {
3743                                 result = DNS_R_BADDB;
3744                                 goto node_exit;
3745                         }
3746
3747                         lock = &search.rbtdb->node_locks[node->locknum].lock;
3748                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3749                         result = find_closest_nsec(&search, nodep, foundname,
3750                                                    rdataset, sigrdataset,
3751                                                    search.rbtdb->tree,
3752                                                    search.rbtversion->secure);
3753                         if (result == ISC_R_SUCCESS)
3754                                 result = DNS_R_EMPTYWILD;
3755                         goto tree_exit;
3756                 }
3757                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3758                     nsecheader == NULL)
3759                 {
3760                         /*
3761                          * There's no NSEC record, and we were told
3762                          * to find one.
3763                          */
3764                         result = DNS_R_BADDB;
3765                         goto node_exit;
3766                 }
3767                 if (nodep != NULL) {
3768                         new_reference(search.rbtdb, node);
3769                         *nodep = node;
3770                 }
3771                 if ((search.rbtversion->secure == dns_db_secure &&
3772                      !search.rbtversion->havensec3) ||
3773                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3774                 {
3775                         bind_rdataset(search.rbtdb, node, nsecheader,
3776                                       0, rdataset);
3777                         if (nsecsig != NULL)
3778                                 bind_rdataset(search.rbtdb, node,
3779                                               nsecsig, 0, sigrdataset);
3780                 }
3781                 if (wild)
3782                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3783                 goto node_exit;
3784         }
3785
3786         /*
3787          * We found what we were looking for, or we found a CNAME.
3788          */
3789
3790         if (type != found->type &&
3791             type != dns_rdatatype_any &&
3792             found->type == dns_rdatatype_cname) {
3793                 /*
3794                  * We weren't doing an ANY query and we found a CNAME instead
3795                  * of the type we were looking for, so we need to indicate
3796                  * that result to the caller.
3797                  */
3798                 result = DNS_R_CNAME;
3799         } else if (search.zonecut != NULL) {
3800                 /*
3801                  * If we're beneath a zone cut, we must indicate that the
3802                  * result is glue, unless we're actually at the zone cut
3803                  * and the type is NSEC or KEY.
3804                  */
3805                 if (search.zonecut == node) {
3806                         /*
3807                          * It is not clear if KEY should still be
3808                          * allowed at the parent side of the zone
3809                          * cut or not.  It is needed for RFC3007
3810                          * validated updates.
3811                          */
3812                         if (type == dns_rdatatype_nsec ||
3813                             type == dns_rdatatype_nsec3 ||
3814                             type == dns_rdatatype_key)
3815                                 result = ISC_R_SUCCESS;
3816                         else if (type == dns_rdatatype_any)
3817                                 result = DNS_R_ZONECUT;
3818                         else
3819                                 result = DNS_R_GLUE;
3820                 } else
3821                         result = DNS_R_GLUE;
3822                 /*
3823                  * We might have found data that isn't glue, but was occluded
3824                  * by a dynamic update.  If the caller cares about this, they
3825                  * will have told us to validate glue.
3826                  *
3827                  * XXX We should cache the glue validity state!
3828                  */
3829                 if (result == DNS_R_GLUE &&
3830                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3831                     !valid_glue(&search, foundname, type, node)) {
3832                         lock = &search.rbtdb->node_locks[node->locknum].lock;
3833                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3834                         result = setup_delegation(&search, nodep, foundname,
3835                                                   rdataset, sigrdataset);
3836                     goto tree_exit;
3837                 }
3838         } else {
3839                 /*
3840                  * An ordinary successful query!
3841                  */
3842                 result = ISC_R_SUCCESS;
3843         }
3844
3845         if (nodep != NULL) {
3846                 if (!at_zonecut)
3847                         new_reference(search.rbtdb, node);
3848                 else
3849                         search.need_cleanup = ISC_FALSE;
3850                 *nodep = node;
3851         }
3852
3853         if (type != dns_rdatatype_any) {
3854                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3855                 if (foundsig != NULL)
3856                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3857                                       sigrdataset);
3858         }
3859
3860         if (wild)
3861                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3862
3863  node_exit:
3864         NODE_UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3865                     isc_rwlocktype_read);
3866
3867  tree_exit:
3868         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3869
3870         /*
3871          * If we found a zonecut but aren't going to use it, we have to
3872          * let go of it.
3873          */
3874         if (search.need_cleanup) {
3875                 node = search.zonecut;
3876                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3877
3878                 NODE_LOCK(lock, isc_rwlocktype_read);
3879                 decrement_reference(search.rbtdb, node, 0,
3880                                     isc_rwlocktype_read, isc_rwlocktype_none,
3881                                     ISC_FALSE);
3882                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3883         }
3884
3885         if (close_version)
3886                 closeversion(db, &version, ISC_FALSE);
3887
3888         dns_rbtnodechain_reset(&search.chain);
3889
3890         return (result);
3891 }
3892
3893 static isc_result_t
3894 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3895                  isc_stdtime_t now, dns_dbnode_t **nodep,
3896                  dns_name_t *foundname,
3897                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3898 {
3899         UNUSED(db);
3900         UNUSED(name);
3901         UNUSED(options);
3902         UNUSED(now);
3903         UNUSED(nodep);
3904         UNUSED(foundname);
3905         UNUSED(rdataset);
3906         UNUSED(sigrdataset);
3907
3908         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3909
3910         return (ISC_R_NOTIMPLEMENTED);
3911 }
3912
3913 static isc_result_t
3914 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3915         rbtdb_search_t *search = arg;
3916         rdatasetheader_t *header, *header_prev, *header_next;
3917         rdatasetheader_t *dname_header, *sigdname_header;
3918         isc_result_t result;
3919         nodelock_t *lock;
3920         isc_rwlocktype_t locktype;
3921
3922         /* XXX comment */
3923
3924         REQUIRE(search->zonecut == NULL);
3925
3926         /*
3927          * Keep compiler silent.
3928          */
3929         UNUSED(name);
3930
3931         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3932         locktype = isc_rwlocktype_read;
3933         NODE_LOCK(lock, locktype);
3934
3935         /*
3936          * Look for a DNAME or RRSIG DNAME rdataset.
3937          */
3938         dname_header = NULL;
3939         sigdname_header = NULL;
3940         header_prev = NULL;
3941         for (header = node->data; header != NULL; header = header_next) {
3942                 header_next = header->next;
3943                 if (header->rdh_ttl <= search->now) {
3944                         /*
3945                          * This rdataset is stale.  If no one else is
3946                          * using the node, we can clean it up right
3947                          * now, otherwise we mark it as stale, and
3948                          * the node as dirty, so it will get cleaned
3949                          * up later.
3950                          */
3951                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3952                             (locktype == isc_rwlocktype_write ||
3953                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3954                                 /*
3955                                  * We update the node's status only when we
3956                                  * can get write access; otherwise, we leave
3957                                  * others to this work.  Periodical cleaning
3958                                  * will eventually take the job as the last
3959                                  * resort.
3960                                  * We won't downgrade the lock, since other
3961                                  * rdatasets are probably stale, too.
3962                                  */
3963                                 locktype = isc_rwlocktype_write;
3964
3965                                 if (dns_rbtnode_refcurrent(node) == 0) {
3966                                         isc_mem_t *mctx;
3967
3968                                         /*
3969                                          * header->down can be non-NULL if the
3970                                          * refcount has just decremented to 0
3971                                          * but decrement_reference() has not
3972                                          * performed clean_cache_node(), in
3973                                          * which case we need to purge the
3974                                          * stale headers first.
3975                                          */
3976                                         mctx = search->rbtdb->common.mctx;
3977                                         clean_stale_headers(search->rbtdb,
3978                                                             mctx,
3979                                                             header);
3980                                         if (header_prev != NULL)
3981                                                 header_prev->next =
3982                                                         header->next;
3983                                         else
3984                                                 node->data = header->next;
3985                                         free_rdataset(search->rbtdb, mctx,
3986                                                       header);
3987                                 } else {
3988                                         header->attributes |=
3989                                                 RDATASET_ATTR_STALE;
3990                                         node->dirty = 1;
3991                                         header_prev = header;
3992                                 }
3993                         } else
3994                                 header_prev = header;
3995                 } else if (header->type == dns_rdatatype_dname &&
3996                            EXISTS(header)) {
3997                         dname_header = header;
3998                         header_prev = header;
3999                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4000                          EXISTS(header)) {
4001                         sigdname_header = header;
4002                         header_prev = header;
4003                 } else
4004                         header_prev = header;
4005         }
4006
4007         if (dname_header != NULL &&
4008             (!DNS_TRUST_PENDING(dname_header->trust) ||
4009              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4010                 /*
4011                  * We increment the reference count on node to ensure that
4012                  * search->zonecut_rdataset will still be valid later.
4013                  */
4014                 new_reference(search->rbtdb, node);
4015                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4016                 search->zonecut = node;
4017                 search->zonecut_rdataset = dname_header;
4018                 search->zonecut_sigrdataset = sigdname_header;
4019                 search->need_cleanup = ISC_TRUE;
4020                 result = DNS_R_PARTIALMATCH;
4021         } else
4022                 result = DNS_R_CONTINUE;
4023
4024         NODE_UNLOCK(lock, locktype);
4025
4026         return (result);
4027 }
4028
4029 static inline isc_result_t
4030 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4031                      dns_dbnode_t **nodep, dns_name_t *foundname,
4032                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4033 {
4034         unsigned int i;
4035         dns_rbtnode_t *level_node;
4036         rdatasetheader_t *header, *header_prev, *header_next;
4037         rdatasetheader_t *found, *foundsig;
4038         isc_result_t result = ISC_R_NOTFOUND;
4039         dns_name_t name;
4040         dns_rbtdb_t *rbtdb;
4041         isc_boolean_t done;
4042         nodelock_t *lock;
4043         isc_rwlocktype_t locktype;
4044
4045         /*
4046          * Caller must be holding the tree lock.
4047          */
4048
4049         rbtdb = search->rbtdb;
4050         i = search->chain.level_matches;
4051         done = ISC_FALSE;
4052         do {
4053                 locktype = isc_rwlocktype_read;
4054                 lock = &rbtdb->node_locks[node->locknum].lock;
4055                 NODE_LOCK(lock, locktype);
4056
4057                 /*
4058                  * Look for NS and RRSIG NS rdatasets.
4059                  */
4060                 found = NULL;
4061                 foundsig = NULL;
4062                 header_prev = NULL;
4063                 for (header = node->data;
4064                      header != NULL;
4065                      header = header_next) {
4066                         header_next = header->next;
4067                         if (header->rdh_ttl <= search->now) {
4068                                 /*
4069                                  * This rdataset is stale.  If no one else is
4070                                  * using the node, we can clean it up right
4071                                  * now, otherwise we mark it as stale, and
4072                                  * the node as dirty, so it will get cleaned
4073                                  * up later.
4074                                  */
4075                                 if ((header->rdh_ttl <= search->now -
4076                                                     RBTDB_VIRTUAL) &&
4077                                     (locktype == isc_rwlocktype_write ||
4078                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4079                                         /*
4080                                          * We update the node's status only
4081                                          * when we can get write access.
4082                                          */
4083                                         locktype = isc_rwlocktype_write;
4084
4085                                         if (dns_rbtnode_refcurrent(node)
4086                                             == 0) {
4087                                                 isc_mem_t *m;
4088
4089                                                 m = search->rbtdb->common.mctx;
4090                                                 clean_stale_headers(
4091                                                         search->rbtdb,
4092                                                         m, header);
4093                                                 if (header_prev != NULL)
4094                                                         header_prev->next =
4095                                                                 header->next;
4096                                                 else
4097                                                         node->data =
4098                                                                 header->next;
4099                                                 free_rdataset(rbtdb, m,
4100                                                               header);
4101                                         } else {
4102                                                 header->attributes |=
4103                                                         RDATASET_ATTR_STALE;
4104                                                 node->dirty = 1;
4105                                                 header_prev = header;
4106                                         }
4107                                 } else
4108                                         header_prev = header;
4109                         } else if (EXISTS(header)) {
4110                                 /*
4111                                  * We've found an extant rdataset.  See if
4112                                  * we're interested in it.
4113                                  */
4114                                 if (header->type == dns_rdatatype_ns) {
4115                                         found = header;
4116                                         if (foundsig != NULL)
4117                                                 break;
4118                                 } else if (header->type ==
4119                                            RBTDB_RDATATYPE_SIGNS) {
4120                                         foundsig = header;
4121                                         if (found != NULL)
4122                                                 break;
4123                                 }
4124                                 header_prev = header;
4125                         } else
4126                                 header_prev = header;
4127                 }
4128
4129                 if (found != NULL) {
4130                         /*
4131                          * If we have to set foundname, we do it before
4132                          * anything else.  If we were to set foundname after
4133                          * we had set nodep or bound the rdataset, then we'd
4134                          * have to undo that work if dns_name_concatenate()
4135                          * failed.  By setting foundname first, there's
4136                          * nothing to undo if we have trouble.
4137                          */
4138                         if (foundname != NULL) {
4139                                 dns_name_init(&name, NULL);
4140                                 dns_rbt_namefromnode(node, &name);
4141                                 result = dns_name_copy(&name, foundname, NULL);
4142                                 while (result == ISC_R_SUCCESS && i > 0) {
4143                                         i--;
4144                                         level_node = search->chain.levels[i];
4145                                         dns_name_init(&name, NULL);
4146                                         dns_rbt_namefromnode(level_node,
4147                                                              &name);
4148                                         result =
4149                                                 dns_name_concatenate(foundname,
4150                                                                      &name,
4151                                                                      foundname,
4152                                                                      NULL);
4153                                 }
4154                                 if (result != ISC_R_SUCCESS) {
4155                                         *nodep = NULL;
4156                                         goto node_exit;
4157                                 }
4158                         }
4159                         result = DNS_R_DELEGATION;
4160                         if (nodep != NULL) {
4161                                 new_reference(search->rbtdb, node);
4162                                 *nodep = node;
4163                         }
4164                         bind_rdataset(search->rbtdb, node, found, search->now,
4165                                       rdataset);
4166                         if (foundsig != NULL)
4167                                 bind_rdataset(search->rbtdb, node, foundsig,
4168                                               search->now, sigrdataset);
4169                         if (need_headerupdate(found, search->now) ||
4170                             (foundsig != NULL &&
4171                              need_headerupdate(foundsig, search->now))) {
4172                                 if (locktype != isc_rwlocktype_write) {
4173                                         NODE_UNLOCK(lock, locktype);
4174                                         NODE_LOCK(lock, isc_rwlocktype_write);
4175                                         locktype = isc_rwlocktype_write;
4176                                 }
4177                                 if (need_headerupdate(found, search->now))
4178                                         update_header(search->rbtdb, found,
4179                                                       search->now);
4180                                 if (foundsig != NULL &&
4181                                     need_headerupdate(foundsig, search->now)) {
4182                                         update_header(search->rbtdb, foundsig,
4183                                                       search->now);
4184                                 }
4185                         }
4186                 }
4187
4188         node_exit:
4189                 NODE_UNLOCK(lock, locktype);
4190
4191                 if (found == NULL && i > 0) {
4192                         i--;
4193                         node = search->chain.levels[i];
4194                 } else
4195                         done = ISC_TRUE;
4196
4197         } while (!done);
4198
4199         return (result);
4200 }
4201
4202 static isc_result_t
4203 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4204                   isc_stdtime_t now, dns_name_t *foundname,
4205                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4206 {
4207         dns_rbtnode_t *node;
4208         rdatasetheader_t *header, *header_next, *header_prev;
4209         rdatasetheader_t *found, *foundsig;
4210         isc_boolean_t empty_node;
4211         isc_result_t result;
4212         dns_fixedname_t fname, forigin;
4213         dns_name_t *name, *origin;
4214         rbtdb_rdatatype_t matchtype, sigmatchtype;
4215         nodelock_t *lock;
4216         isc_rwlocktype_t locktype;
4217
4218         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4219         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4220                                              dns_rdatatype_nsec);
4221
4222         do {
4223                 node = NULL;
4224                 dns_fixedname_init(&fname);
4225                 name = dns_fixedname_name(&fname);
4226                 dns_fixedname_init(&forigin);
4227                 origin = dns_fixedname_name(&forigin);
4228                 result = dns_rbtnodechain_current(&search->chain, name,
4229                                                   origin, &node);
4230                 if (result != ISC_R_SUCCESS)
4231                         return (result);
4232                 locktype = isc_rwlocktype_read;
4233                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4234                 NODE_LOCK(lock, locktype);
4235                 found = NULL;
4236                 foundsig = NULL;
4237                 empty_node = ISC_TRUE;
4238                 header_prev = NULL;
4239                 for (header = node->data;
4240                      header != NULL;
4241                      header = header_next) {
4242                         header_next = header->next;
4243                         if (header->rdh_ttl <= now) {
4244                                 /*
4245                                  * This rdataset is stale.  If no one else is
4246                                  * using the node, we can clean it up right
4247                                  * now, otherwise we mark it as stale, and the
4248                                  * node as dirty, so it will get cleaned up
4249                                  * later.
4250                                  */
4251                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4252                                     (locktype == isc_rwlocktype_write ||
4253                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4254                                         /*
4255                                          * We update the node's status only
4256                                          * when we can get write access.
4257                                          */
4258                                         locktype = isc_rwlocktype_write;
4259
4260                                         if (dns_rbtnode_refcurrent(node)
4261                                             == 0) {
4262                                                 isc_mem_t *m;
4263
4264                                                 m = search->rbtdb->common.mctx;
4265                                                 clean_stale_headers(
4266                                                         search->rbtdb,
4267                                                         m, header);
4268                                                 if (header_prev != NULL)
4269                                                         header_prev->next =
4270                                                                 header->next;
4271                                                 else
4272                                                         node->data = header->next;
4273                                                 free_rdataset(search->rbtdb, m,
4274                                                               header);
4275                                         } else {
4276                                                 header->attributes |=
4277                                                         RDATASET_ATTR_STALE;
4278                                                 node->dirty = 1;
4279                                                 header_prev = header;
4280                                         }
4281                                 } else
4282                                         header_prev = header;
4283                                 continue;
4284                         }
4285                         if (NONEXISTENT(header) ||
4286                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4287                                 header_prev = header;
4288                                 continue;
4289                         }
4290                         empty_node = ISC_FALSE;
4291                         if (header->type == matchtype)
4292                                 found = header;
4293                         else if (header->type == sigmatchtype)
4294                                 foundsig = header;
4295                         header_prev = header;
4296                 }
4297                 if (found != NULL) {
4298                         result = dns_name_concatenate(name, origin,
4299                                                       foundname, NULL);
4300                         if (result != ISC_R_SUCCESS)
4301                                 goto unlock_node;
4302                         bind_rdataset(search->rbtdb, node, found,
4303                                       now, rdataset);
4304                         if (foundsig != NULL)
4305                                 bind_rdataset(search->rbtdb, node, foundsig,
4306                                               now, sigrdataset);
4307                         new_reference(search->rbtdb, node);
4308                         *nodep = node;
4309                         result = DNS_R_COVERINGNSEC;
4310                 } else if (!empty_node) {
4311                         result = ISC_R_NOTFOUND;
4312                 } else
4313                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4314                                                        NULL);
4315  unlock_node:
4316                 NODE_UNLOCK(lock, locktype);
4317         } while (empty_node && result == ISC_R_SUCCESS);
4318         return (result);
4319 }
4320
4321 static isc_result_t
4322 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4323            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4324            dns_dbnode_t **nodep, dns_name_t *foundname,
4325            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4326 {
4327         dns_rbtnode_t *node = NULL;
4328         isc_result_t result;
4329         rbtdb_search_t search;
4330         isc_boolean_t cname_ok = ISC_TRUE;
4331         isc_boolean_t empty_node;
4332         nodelock_t *lock;
4333         isc_rwlocktype_t locktype;
4334         rdatasetheader_t *header, *header_prev, *header_next;
4335         rdatasetheader_t *found, *nsheader;
4336         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4337         rdatasetheader_t *update, *updatesig;
4338         rbtdb_rdatatype_t sigtype, negtype;
4339
4340         UNUSED(version);
4341
4342         search.rbtdb = (dns_rbtdb_t *)db;
4343
4344         REQUIRE(VALID_RBTDB(search.rbtdb));
4345         REQUIRE(version == NULL);
4346
4347         if (now == 0)
4348                 isc_stdtime_get(&now);
4349
4350         search.rbtversion = NULL;
4351         search.serial = 1;
4352         search.options = options;
4353         search.copy_name = ISC_FALSE;
4354         search.need_cleanup = ISC_FALSE;
4355         search.wild = ISC_FALSE;
4356         search.zonecut = NULL;
4357         dns_fixedname_init(&search.zonecut_name);
4358         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4359         search.now = now;
4360         update = NULL;
4361         updatesig = NULL;
4362
4363         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4364
4365         /*
4366          * Search down from the root of the tree.  If, while going down, we
4367          * encounter a callback node, cache_zonecut_callback() will search the
4368          * rdatasets at the zone cut for a DNAME rdataset.
4369          */
4370         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4371                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4372                                   cache_zonecut_callback, &search);
4373
4374         if (result == DNS_R_PARTIALMATCH) {
4375                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4376                         result = find_coveringnsec(&search, nodep, now,
4377                                                    foundname, rdataset,
4378                                                    sigrdataset);
4379                         if (result == DNS_R_COVERINGNSEC)
4380                                 goto tree_exit;
4381                 }
4382                 if (search.zonecut != NULL) {
4383                     result = setup_delegation(&search, nodep, foundname,
4384                                               rdataset, sigrdataset);
4385                     goto tree_exit;
4386                 } else {
4387                 find_ns:
4388                         result = find_deepest_zonecut(&search, node, nodep,
4389                                                       foundname, rdataset,
4390                                                       sigrdataset);
4391                         goto tree_exit;
4392                 }
4393         } else if (result != ISC_R_SUCCESS)
4394                 goto tree_exit;
4395
4396         /*
4397          * Certain DNSSEC types are not subject to CNAME matching
4398          * (RFC4035, section 2.5 and RFC3007).
4399          *
4400          * We don't check for RRSIG, because we don't store RRSIG records
4401          * directly.
4402          */
4403         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4404                 cname_ok = ISC_FALSE;
4405
4406         /*
4407          * We now go looking for rdata...
4408          */
4409
4410         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4411         locktype = isc_rwlocktype_read;
4412         NODE_LOCK(lock, locktype);
4413
4414         found = NULL;
4415         foundsig = NULL;
4416         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4417         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4418         nsheader = NULL;
4419         nssig = NULL;
4420         cnamesig = NULL;
4421         empty_node = ISC_TRUE;
4422         header_prev = NULL;
4423         for (header = node->data; header != NULL; header = header_next) {
4424                 header_next = header->next;
4425                 if (header->rdh_ttl <= now) {
4426                         /*
4427                          * This rdataset is stale.  If no one else is using the
4428                          * node, we can clean it up right now, otherwise we
4429                          * mark it as stale, and the node as dirty, so it will
4430                          * get cleaned up later.
4431                          */
4432                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4433                             (locktype == isc_rwlocktype_write ||
4434                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4435                                 /*
4436                                  * We update the node's status only when we
4437                                  * can get write access.
4438                                  */
4439                                 locktype = isc_rwlocktype_write;
4440
4441                                 if (dns_rbtnode_refcurrent(node) == 0) {
4442                                         isc_mem_t *mctx;
4443
4444                                         mctx = search.rbtdb->common.mctx;
4445                                         clean_stale_headers(search.rbtdb, mctx,
4446                                                             header);
4447                                         if (header_prev != NULL)
4448                                                 header_prev->next =
4449                                                         header->next;
4450                                         else
4451                                                 node->data = header->next;
4452                                         free_rdataset(search.rbtdb, mctx,
4453                                                       header);
4454                                 } else {
4455                                         header->attributes |=
4456                                                 RDATASET_ATTR_STALE;
4457                                         node->dirty = 1;
4458                                         header_prev = header;
4459                                 }
4460                         } else
4461                                 header_prev = header;
4462                 } else if (EXISTS(header)) {
4463                         /*
4464                          * We now know that there is at least one active
4465                          * non-stale rdataset at this node.
4466                          */
4467                         empty_node = ISC_FALSE;
4468
4469                         /*
4470                          * If we found a type we were looking for, remember
4471                          * it.
4472                          */
4473                         if (header->type == type ||
4474                             (type == dns_rdatatype_any &&
4475                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4476                             (cname_ok && header->type ==
4477                              dns_rdatatype_cname)) {
4478                                 /*
4479                                  * We've found the answer.
4480                                  */
4481                                 found = header;
4482                                 if (header->type == dns_rdatatype_cname &&
4483                                     cname_ok &&
4484                                     cnamesig != NULL) {
4485                                         /*
4486                                          * If we've already got the CNAME RRSIG,
4487                                          * use it, otherwise change sigtype
4488                                          * so that we find it.
4489                                          */
4490                                         if (cnamesig != NULL)
4491                                                 foundsig = cnamesig;
4492                                         else
4493                                                 sigtype =
4494                                                     RBTDB_RDATATYPE_SIGCNAME;
4495                                         foundsig = cnamesig;
4496                                 }
4497                         } else if (header->type == sigtype) {
4498                                 /*
4499                                  * We've found the RRSIG rdataset for our
4500                                  * target type.  Remember it.
4501                                  */
4502                                 foundsig = header;
4503                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4504                                    header->type == negtype) {
4505                                 /*
4506                                  * We've found a negative cache entry.
4507                                  */
4508                                 found = header;
4509                         } else if (header->type == dns_rdatatype_ns) {
4510                                 /*
4511                                  * Remember a NS rdataset even if we're
4512                                  * not specifically looking for it, because
4513                                  * we might need it later.
4514                                  */
4515                                 nsheader = header;
4516                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4517                                 /*
4518                                  * If we need the NS rdataset, we'll also
4519                                  * need its signature.
4520                                  */
4521                                 nssig = header;
4522                         } else if (cname_ok &&
4523                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4524                                 /*
4525                                  * If we get a CNAME match, we'll also need
4526                                  * its signature.
4527                                  */
4528                                 cnamesig = header;
4529                         }
4530                         header_prev = header;
4531                 } else
4532                         header_prev = header;
4533         }
4534
4535         if (empty_node) {
4536                 /*
4537                  * We have an exact match for the name, but there are no
4538                  * extant rdatasets.  That means that this node doesn't
4539                  * meaningfully exist, and that we really have a partial match.
4540                  */
4541                 NODE_UNLOCK(lock, locktype);
4542                 goto find_ns;
4543         }
4544
4545         /*
4546          * If we didn't find what we were looking for...
4547          */
4548         if (found == NULL ||
4549             (DNS_TRUST_ADDITIONAL(found->trust) &&
4550              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4551             (found->trust == dns_trust_glue &&
4552              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4553             (DNS_TRUST_PENDING(found->trust) &&
4554              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4555                 /*
4556                  * If there is an NS rdataset at this node, then this is the
4557                  * deepest zone cut.
4558                  */
4559                 if (nsheader != NULL) {
4560                         if (nodep != NULL) {
4561                                 new_reference(search.rbtdb, node);
4562                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4563                                 *nodep = node;
4564                         }
4565                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4566                                       rdataset);
4567                         if (need_headerupdate(nsheader, search.now))
4568                                 update = nsheader;
4569                         if (nssig != NULL) {
4570                                 bind_rdataset(search.rbtdb, node, nssig,
4571                                               search.now, sigrdataset);
4572                                 if (need_headerupdate(nssig, search.now))
4573                                         updatesig = nssig;
4574                         }
4575                         result = DNS_R_DELEGATION;
4576                         goto node_exit;
4577                 }
4578
4579                 /*
4580                  * Go find the deepest zone cut.
4581                  */
4582                 NODE_UNLOCK(lock, locktype);
4583                 goto find_ns;
4584         }
4585
4586         /*
4587          * We found what we were looking for, or we found a CNAME.
4588          */
4589
4590         if (nodep != NULL) {
4591                 new_reference(search.rbtdb, node);
4592                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4593                 *nodep = node;
4594         }
4595
4596         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4597                 /*
4598                  * We found a negative cache entry.
4599                  */
4600                 if (NXDOMAIN(found))
4601                         result = DNS_R_NCACHENXDOMAIN;
4602                 else
4603                         result = DNS_R_NCACHENXRRSET;
4604         } else if (type != found->type &&
4605                    type != dns_rdatatype_any &&
4606                    found->type == dns_rdatatype_cname) {
4607                 /*
4608                  * We weren't doing an ANY query and we found a CNAME instead
4609                  * of the type we were looking for, so we need to indicate
4610                  * that result to the caller.
4611                  */
4612                 result = DNS_R_CNAME;
4613         } else {
4614                 /*
4615                  * An ordinary successful query!
4616                  */
4617                 result = ISC_R_SUCCESS;
4618         }
4619
4620         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4621             result == DNS_R_NCACHENXRRSET) {
4622                 bind_rdataset(search.rbtdb, node, found, search.now,
4623                               rdataset);
4624                 if (need_headerupdate(found, search.now))
4625                         update = found;
4626                 if (foundsig != NULL) {
4627                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4628                                       sigrdataset);
4629                         if (need_headerupdate(foundsig, search.now))
4630                                 updatesig = foundsig;
4631                 }
4632         }
4633
4634  node_exit:
4635         if ((update != NULL || updatesig != NULL) &&
4636             locktype != isc_rwlocktype_write) {
4637                 NODE_UNLOCK(lock, locktype);
4638                 NODE_LOCK(lock, isc_rwlocktype_write);
4639                 locktype = isc_rwlocktype_write;
4640         }
4641         if (update != NULL && need_headerupdate(update, search.now))
4642                 update_header(search.rbtdb, update, search.now);
4643         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4644                 update_header(search.rbtdb, updatesig, search.now);
4645
4646         NODE_UNLOCK(lock, locktype);
4647
4648  tree_exit:
4649         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4650
4651         /*
4652          * If we found a zonecut but aren't going to use it, we have to
4653          * let go of it.
4654          */
4655         if (search.need_cleanup) {
4656                 node = search.zonecut;
4657                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4658
4659                 NODE_LOCK(lock, isc_rwlocktype_read);
4660                 decrement_reference(search.rbtdb, node, 0,
4661                                     isc_rwlocktype_read, isc_rwlocktype_none,
4662                                     ISC_FALSE);
4663                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4664         }
4665
4666         dns_rbtnodechain_reset(&search.chain);
4667
4668         return (result);
4669 }
4670
4671 static isc_result_t
4672 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4673                   isc_stdtime_t now, dns_dbnode_t **nodep,
4674                   dns_name_t *foundname,
4675                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4676 {
4677         dns_rbtnode_t *node = NULL;
4678         nodelock_t *lock;
4679         isc_result_t result;
4680         rbtdb_search_t search;
4681         rdatasetheader_t *header, *header_prev, *header_next;
4682         rdatasetheader_t *found, *foundsig;
4683         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4684         isc_rwlocktype_t locktype;
4685
4686         search.rbtdb = (dns_rbtdb_t *)db;
4687
4688         REQUIRE(VALID_RBTDB(search.rbtdb));
4689
4690         if (now == 0)
4691                 isc_stdtime_get(&now);
4692
4693         search.rbtversion = NULL;
4694         search.serial = 1;
4695         search.options = options;
4696         search.copy_name = ISC_FALSE;
4697         search.need_cleanup = ISC_FALSE;
4698         search.wild = ISC_FALSE;
4699         search.zonecut = NULL;
4700         dns_fixedname_init(&search.zonecut_name);
4701         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4702         search.now = now;
4703
4704         if ((options & DNS_DBFIND_NOEXACT) != 0)
4705                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4706
4707         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4708
4709         /*
4710          * Search down from the root of the tree.
4711          */
4712         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4713                                   &search.chain, rbtoptions, NULL, &search);
4714
4715         if (result == DNS_R_PARTIALMATCH) {
4716         find_ns:
4717                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4718                                               rdataset, sigrdataset);
4719                 goto tree_exit;
4720         } else if (result != ISC_R_SUCCESS)
4721                 goto tree_exit;
4722
4723         /*
4724          * We now go looking for an NS rdataset at the node.
4725          */
4726
4727         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4728         locktype = isc_rwlocktype_read;
4729         NODE_LOCK(lock, locktype);
4730
4731         found = NULL;
4732         foundsig = NULL;
4733         header_prev = NULL;
4734         for (header = node->data; header != NULL; header = header_next) {
4735                 header_next = header->next;
4736                 if (header->rdh_ttl <= now) {
4737                         /*
4738                          * This rdataset is stale.  If no one else is using the
4739                          * node, we can clean it up right now, otherwise we
4740                          * mark it as stale, and the node as dirty, so it will
4741                          * get cleaned up later.
4742                          */
4743                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4744                             (locktype == isc_rwlocktype_write ||
4745                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4746                                 /*
4747                                  * We update the node's status only when we
4748                                  * can get write access.
4749                                  */
4750                                 locktype = isc_rwlocktype_write;
4751
4752                                 if (dns_rbtnode_refcurrent(node) == 0) {
4753                                         isc_mem_t *mctx;
4754
4755                                         mctx = search.rbtdb->common.mctx;
4756                                         clean_stale_headers(search.rbtdb, mctx,
4757                                                             header);
4758                                         if (header_prev != NULL)
4759                                                 header_prev->next =
4760                                                         header->next;
4761                                         else
4762                                                 node->data = header->next;
4763                                         free_rdataset(search.rbtdb, mctx,
4764                                                       header);
4765                                 } else {
4766                                         header->attributes |=
4767                                                 RDATASET_ATTR_STALE;
4768                                         node->dirty = 1;
4769                                         header_prev = header;
4770                                 }
4771                         } else
4772                                 header_prev = header;
4773                 } else if (EXISTS(header)) {
4774                         /*
4775                          * If we found a type we were looking for, remember
4776                          * it.
4777                          */
4778                         if (header->type == dns_rdatatype_ns) {
4779                                 /*
4780                                  * Remember a NS rdataset even if we're
4781                                  * not specifically looking for it, because
4782                                  * we might need it later.
4783                                  */
4784                                 found = header;
4785                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4786                                 /*
4787                                  * If we need the NS rdataset, we'll also
4788                                  * need its signature.
4789                                  */
4790                                 foundsig = header;
4791                         }
4792                         header_prev = header;
4793                 } else
4794                         header_prev = header;
4795         }
4796
4797         if (found == NULL) {
4798                 /*
4799                  * No NS records here.
4800                  */
4801                 NODE_UNLOCK(lock, locktype);
4802                 goto find_ns;
4803         }
4804
4805         if (nodep != NULL) {
4806                 new_reference(search.rbtdb, node);
4807                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4808                 *nodep = node;
4809         }
4810
4811         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4812         if (foundsig != NULL)
4813                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4814                               sigrdataset);
4815
4816         if (need_headerupdate(found, search.now) ||
4817             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4818                 if (locktype != isc_rwlocktype_write) {
4819                         NODE_UNLOCK(lock, locktype);
4820                         NODE_LOCK(lock, isc_rwlocktype_write);
4821                         locktype = isc_rwlocktype_write;
4822                 }
4823                 if (need_headerupdate(found, search.now))
4824                         update_header(search.rbtdb, found, search.now);
4825                 if (foundsig != NULL &&
4826                     need_headerupdate(foundsig, search.now)) {
4827                         update_header(search.rbtdb, foundsig, search.now);
4828                 }
4829         }
4830
4831         NODE_UNLOCK(lock, locktype);
4832
4833  tree_exit:
4834         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4835
4836         INSIST(!search.need_cleanup);
4837
4838         dns_rbtnodechain_reset(&search.chain);
4839
4840         if (result == DNS_R_DELEGATION)
4841                 result = ISC_R_SUCCESS;
4842
4843         return (result);
4844 }
4845
4846 static void
4847 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4848         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4849         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4850         unsigned int refs;
4851
4852         REQUIRE(VALID_RBTDB(rbtdb));
4853         REQUIRE(targetp != NULL && *targetp == NULL);
4854
4855         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4856         dns_rbtnode_refincrement(node, &refs);
4857         INSIST(refs != 0);
4858         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4859
4860         *targetp = source;
4861 }
4862
4863 static void
4864 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4865         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4866         dns_rbtnode_t *node;
4867         isc_boolean_t want_free = ISC_FALSE;
4868         isc_boolean_t inactive = ISC_FALSE;
4869         rbtdb_nodelock_t *nodelock;
4870
4871         REQUIRE(VALID_RBTDB(rbtdb));
4872         REQUIRE(targetp != NULL && *targetp != NULL);
4873
4874         node = (dns_rbtnode_t *)(*targetp);
4875         nodelock = &rbtdb->node_locks[node->locknum];
4876
4877         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4878
4879         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4880                                 isc_rwlocktype_none, ISC_FALSE)) {
4881                 if (isc_refcount_current(&nodelock->references) == 0 &&
4882                     nodelock->exiting) {
4883                         inactive = ISC_TRUE;
4884                 }
4885         }
4886
4887         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4888
4889         *targetp = NULL;
4890
4891         if (inactive) {
4892                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4893                 rbtdb->active--;
4894                 if (rbtdb->active == 0)
4895                         want_free = ISC_TRUE;
4896                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4897                 if (want_free) {
4898                         char buf[DNS_NAME_FORMATSIZE];
4899                         if (dns_name_dynamic(&rbtdb->common.origin))
4900                                 dns_name_format(&rbtdb->common.origin, buf,
4901                                                 sizeof(buf));
4902                         else
4903                                 strcpy(buf, "<UNKNOWN>");
4904                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4905                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4906                                       "calling free_rbtdb(%s)", buf);
4907                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4908                 }
4909         }
4910 }
4911
4912 static isc_result_t
4913 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4914         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4915         dns_rbtnode_t *rbtnode = node;
4916         rdatasetheader_t *header;
4917         isc_boolean_t force_expire = ISC_FALSE;
4918         /*
4919          * These are the category and module used by the cache cleaner.
4920          */
4921         isc_boolean_t log = ISC_FALSE;
4922         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4923         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4924         int level = ISC_LOG_DEBUG(2);
4925         char printname[DNS_NAME_FORMATSIZE];
4926
4927         REQUIRE(VALID_RBTDB(rbtdb));
4928
4929         /*
4930          * Caller must hold a tree lock.
4931          */
4932
4933         if (now == 0)
4934                 isc_stdtime_get(&now);
4935
4936         if (rbtdb->overmem) {
4937                 isc_uint32_t val;
4938
4939                 isc_random_get(&val);
4940                 /*
4941                  * XXXDCL Could stand to have a better policy, like LRU.
4942                  */
4943                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4944
4945                 /*
4946                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
4947                  * rbtdb->overmem can currently only be true for cache
4948                  * databases -- hence all of the "overmem cache" log strings.
4949                  */
4950                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4951                 if (log)
4952                         isc_log_write(dns_lctx, category, module, level,
4953                                       "overmem cache: %s %s",
4954                                       force_expire ? "FORCE" : "check",
4955                                       dns_rbt_formatnodename(rbtnode,
4956                                                            printname,
4957                                                            sizeof(printname)));
4958         }
4959
4960         /*
4961          * We may not need write access, but this code path is not performance
4962          * sensitive, so it should be okay to always lock as a writer.
4963          */
4964         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4965                   isc_rwlocktype_write);
4966
4967         for (header = rbtnode->data; header != NULL; header = header->next)
4968                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4969                         /*
4970                          * We don't check if refcurrent(rbtnode) == 0 and try
4971                          * to free like we do in cache_find(), because
4972                          * refcurrent(rbtnode) must be non-zero.  This is so
4973                          * because 'node' is an argument to the function.
4974                          */
4975                         header->attributes |= RDATASET_ATTR_STALE;
4976                         rbtnode->dirty = 1;
4977                         if (log)
4978                                 isc_log_write(dns_lctx, category, module,
4979                                               level, "overmem cache: stale %s",
4980                                               printname);
4981                 } else if (force_expire) {
4982                         if (! RETAIN(header)) {
4983                                 set_ttl(rbtdb, header, 0);
4984                                 header->attributes |= RDATASET_ATTR_STALE;
4985                                 rbtnode->dirty = 1;
4986                         } else if (log) {
4987                                 isc_log_write(dns_lctx, category, module,
4988                                               level, "overmem cache: "
4989                                               "reprieve by RETAIN() %s",
4990                                               printname);
4991                         }
4992                 } else if (rbtdb->overmem && log)
4993                         isc_log_write(dns_lctx, category, module, level,
4994                                       "overmem cache: saved %s", printname);
4995
4996         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4997                     isc_rwlocktype_write);
4998
4999         return (ISC_R_SUCCESS);
5000 }
5001
5002 static void
5003 overmem(dns_db_t *db, isc_boolean_t overmem) {
5004         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5005
5006         if (IS_CACHE(rbtdb))
5007                 rbtdb->overmem = overmem;
5008 }
5009
5010 static void
5011 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5012         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5013         dns_rbtnode_t *rbtnode = node;
5014         isc_boolean_t first;
5015
5016         REQUIRE(VALID_RBTDB(rbtdb));
5017
5018         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5019                   isc_rwlocktype_read);
5020
5021         fprintf(out, "node %p, %u references, locknum = %u\n",
5022                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5023                 rbtnode->locknum);
5024         if (rbtnode->data != NULL) {
5025                 rdatasetheader_t *current, *top_next;
5026
5027                 for (current = rbtnode->data; current != NULL;
5028                      current = top_next) {
5029                         top_next = current->next;
5030                         first = ISC_TRUE;
5031                         fprintf(out, "\ttype %u", current->type);
5032                         do {
5033                                 if (!first)
5034                                         fprintf(out, "\t");
5035                                 first = ISC_FALSE;
5036                                 fprintf(out,
5037                                         "\tserial = %lu, ttl = %u, "
5038                                         "trust = %u, attributes = %u, "
5039                                         "resign = %u\n",
5040                                         (unsigned long)current->serial,
5041                                         current->rdh_ttl,
5042                                         current->trust,
5043                                         current->attributes,
5044                                         current->resign);
5045                                 current = current->down;
5046                         } while (current != NULL);
5047                 }
5048         } else
5049                 fprintf(out, "(empty)\n");
5050
5051         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5052                     isc_rwlocktype_read);
5053 }
5054
5055 static isc_result_t
5056 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5057 {
5058         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5059         rbtdb_dbiterator_t *rbtdbiter;
5060
5061         REQUIRE(VALID_RBTDB(rbtdb));
5062
5063         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5064         if (rbtdbiter == NULL)
5065                 return (ISC_R_NOMEMORY);
5066
5067         rbtdbiter->common.methods = &dbiterator_methods;
5068         rbtdbiter->common.db = NULL;
5069         dns_db_attach(db, &rbtdbiter->common.db);
5070         rbtdbiter->common.relative_names =
5071                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5072         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5073         rbtdbiter->common.cleaning = ISC_FALSE;
5074         rbtdbiter->paused = ISC_TRUE;
5075         rbtdbiter->tree_locked = isc_rwlocktype_none;
5076         rbtdbiter->result = ISC_R_SUCCESS;
5077         dns_fixedname_init(&rbtdbiter->name);
5078         dns_fixedname_init(&rbtdbiter->origin);
5079         rbtdbiter->node = NULL;
5080         rbtdbiter->delete = 0;
5081         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5082         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5083         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5084         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5085         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5086         if (rbtdbiter->nsec3only)
5087                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5088         else
5089                 rbtdbiter->current = &rbtdbiter->chain;
5090
5091         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5092
5093         return (ISC_R_SUCCESS);
5094 }
5095
5096 static isc_result_t
5097 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5098                   dns_rdatatype_t type, dns_rdatatype_t covers,
5099                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5100                   dns_rdataset_t *sigrdataset)
5101 {
5102         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5103         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5104         rdatasetheader_t *header, *header_next, *found, *foundsig;
5105         rbtdb_serial_t serial;
5106         rbtdb_version_t *rbtversion = version;
5107         isc_boolean_t close_version = ISC_FALSE;
5108         rbtdb_rdatatype_t matchtype, sigmatchtype;
5109
5110         REQUIRE(VALID_RBTDB(rbtdb));
5111         REQUIRE(type != dns_rdatatype_any);
5112
5113         if (rbtversion == NULL) {
5114                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5115                 close_version = ISC_TRUE;
5116         }
5117         serial = rbtversion->serial;
5118         now = 0;
5119
5120         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5121                   isc_rwlocktype_read);
5122
5123         found = NULL;
5124         foundsig = NULL;
5125         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5126         if (covers == 0)
5127                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5128         else
5129                 sigmatchtype = 0;
5130
5131         for (header = rbtnode->data; header != NULL; header = header_next) {
5132                 header_next = header->next;
5133                 do {
5134                         if (header->serial <= serial &&
5135                             !IGNORE(header)) {
5136                                 /*
5137                                  * Is this a "this rdataset doesn't
5138                                  * exist" record?
5139                                  */
5140                                 if (NONEXISTENT(header))
5141                                         header = NULL;
5142                                 break;
5143                         } else
5144                                 header = header->down;
5145                 } while (header != NULL);
5146                 if (header != NULL) {
5147                         /*
5148                          * We have an active, extant rdataset.  If it's a
5149                          * type we're looking for, remember it.
5150                          */
5151                         if (header->type == matchtype) {
5152                                 found = header;
5153                                 if (foundsig != NULL)
5154                                         break;
5155                         } else if (header->type == sigmatchtype) {
5156                                 foundsig = header;
5157                                 if (found != NULL)
5158                                         break;
5159                         }
5160                 }
5161         }
5162         if (found != NULL) {
5163                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5164                 if (foundsig != NULL)
5165                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5166                                       sigrdataset);
5167         }
5168
5169         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5170                     isc_rwlocktype_read);
5171
5172         if (close_version)
5173                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5174                              ISC_FALSE);
5175
5176         if (found == NULL)
5177                 return (ISC_R_NOTFOUND);
5178
5179         return (ISC_R_SUCCESS);
5180 }
5181
5182 static isc_result_t
5183 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5184                    dns_rdatatype_t type, dns_rdatatype_t covers,
5185                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5186                    dns_rdataset_t *sigrdataset)
5187 {
5188         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5189         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5190         rdatasetheader_t *header, *header_next, *found, *foundsig;
5191         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5192         isc_result_t result;
5193         nodelock_t *lock;
5194         isc_rwlocktype_t locktype;
5195
5196         REQUIRE(VALID_RBTDB(rbtdb));
5197         REQUIRE(type != dns_rdatatype_any);
5198
5199         UNUSED(version);
5200
5201         result = ISC_R_SUCCESS;
5202
5203         if (now == 0)
5204                 isc_stdtime_get(&now);
5205
5206         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5207         locktype = isc_rwlocktype_read;
5208         NODE_LOCK(lock, locktype);
5209
5210         found = NULL;
5211         foundsig = NULL;
5212         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5213         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5214         if (covers == 0)
5215                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5216         else
5217                 sigmatchtype = 0;
5218
5219         for (header = rbtnode->data; header != NULL; header = header_next) {
5220                 header_next = header->next;
5221                 if (header->rdh_ttl <= now) {
5222                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5223                             (locktype == isc_rwlocktype_write ||
5224                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5225                                 /*
5226                                  * We update the node's status only when we
5227                                  * can get write access.
5228                                  */
5229                                 locktype = isc_rwlocktype_write;
5230
5231                                 /*
5232                                  * We don't check if refcurrent(rbtnode) == 0
5233                                  * and try to free like we do in cache_find(),
5234                                  * because refcurrent(rbtnode) must be
5235                                  * non-zero.  This is so because 'node' is an
5236                                  * argument to the function.
5237                                  */
5238                                 header->attributes |= RDATASET_ATTR_STALE;
5239                                 rbtnode->dirty = 1;
5240                         }
5241                 } else if (EXISTS(header)) {
5242                         if (header->type == matchtype)
5243                                 found = header;
5244                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5245                                  header->type == negtype)
5246                                 found = header;
5247                         else if (header->type == sigmatchtype)
5248                                 foundsig = header;
5249                 }
5250         }
5251         if (found != NULL) {
5252                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5253                 if (foundsig != NULL)
5254                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5255                                       sigrdataset);
5256         }
5257
5258         NODE_UNLOCK(lock, locktype);
5259
5260         if (found == NULL)
5261                 return (ISC_R_NOTFOUND);
5262
5263         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5264                 /*
5265                  * We found a negative cache entry.
5266                  */
5267                 if (NXDOMAIN(found))
5268                         result = DNS_R_NCACHENXDOMAIN;
5269                 else
5270                         result = DNS_R_NCACHENXRRSET;
5271         }
5272
5273         return (result);
5274 }
5275
5276 static isc_result_t
5277 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5278              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5279 {
5280         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5281         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5282         rbtdb_version_t *rbtversion = version;
5283         rbtdb_rdatasetiter_t *iterator;
5284         unsigned int refs;
5285
5286         REQUIRE(VALID_RBTDB(rbtdb));
5287
5288         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5289         if (iterator == NULL)
5290                 return (ISC_R_NOMEMORY);
5291
5292         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5293                 now = 0;
5294                 if (rbtversion == NULL)
5295                         currentversion(db,
5296                                  (dns_dbversion_t **) (void *)(&rbtversion));
5297                 else {
5298                         unsigned int refs;
5299
5300                         isc_refcount_increment(&rbtversion->references,
5301                                                &refs);
5302                         INSIST(refs > 1);
5303                 }
5304         } else {
5305                 if (now == 0)
5306                         isc_stdtime_get(&now);
5307                 rbtversion = NULL;
5308         }
5309
5310         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5311         iterator->common.methods = &rdatasetiter_methods;
5312         iterator->common.db = db;
5313         iterator->common.node = node;
5314         iterator->common.version = (dns_dbversion_t *)rbtversion;
5315         iterator->common.now = now;
5316
5317         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5318
5319         dns_rbtnode_refincrement(rbtnode, &refs);
5320         INSIST(refs != 0);
5321
5322         iterator->current = NULL;
5323
5324         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5325
5326         *iteratorp = (dns_rdatasetiter_t *)iterator;
5327
5328         return (ISC_R_SUCCESS);
5329 }
5330
5331 static isc_boolean_t
5332 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5333         rdatasetheader_t *header, *header_next;
5334         isc_boolean_t cname, other_data;
5335         dns_rdatatype_t rdtype;
5336
5337         /*
5338          * The caller must hold the node lock.
5339          */
5340
5341         /*
5342          * Look for CNAME and "other data" rdatasets active in our version.
5343          */
5344         cname = ISC_FALSE;
5345         other_data = ISC_FALSE;
5346         for (header = node->data; header != NULL; header = header_next) {
5347                 header_next = header->next;
5348                 if (header->type == dns_rdatatype_cname) {
5349                         /*
5350                          * Look for an active extant CNAME.
5351                          */
5352                         do {
5353                                 if (header->serial <= serial &&
5354                                     !IGNORE(header)) {
5355                                         /*
5356                                          * Is this a "this rdataset doesn't
5357                                          * exist" record?
5358                                          */
5359                                         if (NONEXISTENT(header))
5360                                                 header = NULL;
5361                                         break;
5362                                 } else
5363                                         header = header->down;
5364                         } while (header != NULL);
5365                         if (header != NULL)
5366                                 cname = ISC_TRUE;
5367                 } else {
5368                         /*
5369                          * Look for active extant "other data".
5370                          *
5371                          * "Other data" is any rdataset whose type is not
5372                          * KEY, NSEC, SIG or RRSIG.
5373                          */
5374                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5375                         if (rdtype != dns_rdatatype_key &&
5376                             rdtype != dns_rdatatype_sig &&
5377                             rdtype != dns_rdatatype_nsec &&
5378                             rdtype != dns_rdatatype_rrsig) {
5379                                 /*
5380                                  * Is it active and extant?
5381                                  */
5382                                 do {
5383                                         if (header->serial <= serial &&
5384                                             !IGNORE(header)) {
5385                                                 /*
5386                                                  * Is this a "this rdataset
5387                                                  * doesn't exist" record?
5388                                                  */
5389                                                 if (NONEXISTENT(header))
5390                                                         header = NULL;
5391                                                 break;
5392                                         } else
5393                                                 header = header->down;
5394                                 } while (header != NULL);
5395                                 if (header != NULL)
5396                                         other_data = ISC_TRUE;
5397                         }
5398                 }
5399         }
5400
5401         if (cname && other_data)
5402                 return (ISC_TRUE);
5403
5404         return (ISC_FALSE);
5405 }
5406
5407 static isc_result_t
5408 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5409         isc_result_t result;
5410
5411         INSIST(newheader->heap_index == 0);
5412         INSIST(!ISC_LINK_LINKED(newheader, lru_link));
5413         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5414         return (result);
5415 }
5416
5417 static isc_result_t
5418 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5419     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5420     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5421 {
5422         rbtdb_changed_t *changed = NULL;
5423         rdatasetheader_t *topheader, *topheader_prev, *header;
5424         unsigned char *merged;
5425         isc_result_t result;
5426         isc_boolean_t header_nx;
5427         isc_boolean_t newheader_nx;
5428         isc_boolean_t merge;
5429         dns_rdatatype_t rdtype, covers;
5430         rbtdb_rdatatype_t negtype;
5431         dns_trust_t trust;
5432         int idx;
5433
5434         /*
5435          * Add an rdatasetheader_t to a node.
5436          */
5437
5438         /*
5439          * Caller must be holding the node lock.
5440          */
5441
5442         if ((options & DNS_DBADD_MERGE) != 0) {
5443                 REQUIRE(rbtversion != NULL);
5444                 merge = ISC_TRUE;
5445         } else
5446                 merge = ISC_FALSE;
5447
5448         if ((options & DNS_DBADD_FORCE) != 0)
5449                 trust = dns_trust_ultimate;
5450         else
5451                 trust = newheader->trust;
5452
5453         if (rbtversion != NULL && !loading) {
5454                 /*
5455                  * We always add a changed record, even if no changes end up
5456                  * being made to this node, because it's harmless and
5457                  * simplifies the code.
5458                  */
5459                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5460                 if (changed == NULL) {
5461                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5462                         return (ISC_R_NOMEMORY);
5463                 }
5464         }
5465
5466         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5467         topheader_prev = NULL;
5468
5469         negtype = 0;
5470         if (rbtversion == NULL && !newheader_nx) {
5471                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5472                 if (rdtype == 0) {
5473                         /*
5474                          * We're adding a negative cache entry.
5475                          */
5476                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5477                         if (covers == dns_rdatatype_any) {
5478                                 /*
5479                                  * We're adding an negative cache entry
5480                                  * which covers all types (NXDOMAIN,
5481                                  * NODATA(QTYPE=ANY)).
5482                                  *
5483                                  * We make all other data stale so that the
5484                                  * only rdataset that can be found at this
5485                                  * node is the negative cache entry.
5486                                  */
5487                                 for (topheader = rbtnode->data;
5488                                      topheader != NULL;
5489                                      topheader = topheader->next) {
5490                                         set_ttl(rbtdb, topheader, 0);
5491                                         topheader->attributes |=
5492                                                 RDATASET_ATTR_STALE;
5493                                 }
5494                                 rbtnode->dirty = 1;
5495                                 goto find_header;
5496                         }
5497                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5498                 } else {
5499                         /*
5500                          * We're adding something that isn't a
5501                          * negative cache entry.  Look for an extant
5502                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5503                          * cache entry.
5504                          */
5505                         for (topheader = rbtnode->data;
5506                              topheader != NULL;
5507                              topheader = topheader->next) {
5508                                 if (topheader->type ==
5509                                     RBTDB_RDATATYPE_NCACHEANY)
5510                                         break;
5511                         }
5512                         if (topheader != NULL && EXISTS(topheader) &&
5513                             topheader->rdh_ttl > now) {
5514                                 /*
5515                                  * Found one.
5516                                  */
5517                                 if (trust < topheader->trust) {
5518                                         /*
5519                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5520                                          * is more trusted.
5521                                          */
5522                                         free_rdataset(rbtdb,
5523                                                       rbtdb->common.mctx,
5524                                                       newheader);
5525                                         if (addedrdataset != NULL)
5526                                                 bind_rdataset(rbtdb, rbtnode,
5527                                                               topheader, now,
5528                                                               addedrdataset);
5529                                         return (DNS_R_UNCHANGED);
5530                                 }
5531                                 /*
5532                                  * The new rdataset is better.  Expire the
5533                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5534                                  */
5535                                 set_ttl(rbtdb, topheader, 0);
5536                                 topheader->attributes |= RDATASET_ATTR_STALE;
5537                                 rbtnode->dirty = 1;
5538                                 topheader = NULL;
5539                                 goto find_header;
5540                         }
5541                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5542                 }
5543         }
5544
5545         for (topheader = rbtnode->data;
5546              topheader != NULL;
5547              topheader = topheader->next) {
5548                 if (topheader->type == newheader->type ||
5549                     topheader->type == negtype)
5550                         break;
5551                 topheader_prev = topheader;
5552         }
5553
5554  find_header:
5555         /*
5556          * If header isn't NULL, we've found the right type.  There may be
5557          * IGNORE rdatasets between the top of the chain and the first real
5558          * data.  We skip over them.
5559          */
5560         header = topheader;
5561         while (header != NULL && IGNORE(header))
5562                 header = header->down;
5563         if (header != NULL) {
5564                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5565
5566                 /*
5567                  * Deleting an already non-existent rdataset has no effect.
5568                  */
5569                 if (header_nx && newheader_nx) {
5570                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5571                         return (DNS_R_UNCHANGED);
5572                 }
5573
5574                 /*
5575                  * Trying to add an rdataset with lower trust to a cache DB
5576                  * has no effect, provided that the cache data isn't stale.
5577                  */
5578                 if (rbtversion == NULL && trust < header->trust &&
5579                     (header->rdh_ttl > now || header_nx)) {
5580                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5581                         if (addedrdataset != NULL)
5582                                 bind_rdataset(rbtdb, rbtnode, header, now,
5583                                               addedrdataset);
5584                         return (DNS_R_UNCHANGED);
5585                 }
5586
5587                 /*
5588                  * Don't merge if a nonexistent rdataset is involved.
5589                  */
5590                 if (merge && (header_nx || newheader_nx))
5591                         merge = ISC_FALSE;
5592
5593                 /*
5594                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5595                  * that is the union of 'newheader' and 'header'.
5596                  */
5597                 if (merge) {
5598                         unsigned int flags = 0;
5599                         INSIST(rbtversion->serial >= header->serial);
5600                         merged = NULL;
5601                         result = ISC_R_SUCCESS;
5602
5603                         if ((options & DNS_DBADD_EXACT) != 0)
5604                                 flags |= DNS_RDATASLAB_EXACT;
5605                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5606                              newheader->rdh_ttl != header->rdh_ttl)
5607                                         result = DNS_R_NOTEXACT;
5608                         else if (newheader->rdh_ttl != header->rdh_ttl)
5609                                 flags |= DNS_RDATASLAB_FORCE;
5610                         if (result == ISC_R_SUCCESS)
5611                                 result = dns_rdataslab_merge(
5612                                              (unsigned char *)header,
5613                                              (unsigned char *)newheader,
5614                                              (unsigned int)(sizeof(*newheader)),
5615                                              rbtdb->common.mctx,
5616                                              rbtdb->common.rdclass,
5617                                              (dns_rdatatype_t)header->type,
5618                                              flags, &merged);
5619                         if (result == ISC_R_SUCCESS) {
5620                                 /*
5621                                  * If 'header' has the same serial number as
5622                                  * we do, we could clean it up now if we knew
5623                                  * that our caller had no references to it.
5624                                  * We don't know this, however, so we leave it
5625                                  * alone.  It will get cleaned up when
5626                                  * clean_zone_node() runs.
5627                                  */
5628                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5629                                               newheader);
5630                                 newheader = (rdatasetheader_t *)merged;
5631                                 if (loading && RESIGN(newheader) &&
5632                                     RESIGN(header) &&
5633                                     header->resign < newheader->resign)
5634                                         newheader->resign = header->resign;
5635                         } else {
5636                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5637                                               newheader);
5638                                 return (result);
5639                         }
5640                 }
5641                 /*
5642                  * Don't replace existing NS, A and AAAA RRsets
5643                  * in the cache if they are already exist.  This
5644                  * prevents named being locked to old servers.
5645                  * Don't lower trust of existing record if the
5646                  * update is forced.
5647                  */
5648                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5649                     header->type == dns_rdatatype_ns &&
5650                     !header_nx && !newheader_nx &&
5651                     header->trust >= newheader->trust &&
5652                     dns_rdataslab_equalx((unsigned char *)header,
5653                                          (unsigned char *)newheader,
5654                                          (unsigned int)(sizeof(*newheader)),
5655                                          rbtdb->common.rdclass,
5656                                          (dns_rdatatype_t)header->type)) {
5657                         /*
5658                          * Honour the new ttl if it is less than the
5659                          * older one.
5660                          */
5661                         if (header->rdh_ttl > newheader->rdh_ttl)
5662                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5663                         if (header->noqname == NULL &&
5664                             newheader->noqname != NULL) {
5665                                 header->noqname = newheader->noqname;
5666                                 newheader->noqname = NULL;
5667                         }
5668                         if (header->closest == NULL &&
5669                             newheader->closest != NULL) {
5670                                 header->closest = newheader->closest;
5671                                 newheader->closest = NULL;
5672                         }
5673                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5674                         if (addedrdataset != NULL)
5675                                 bind_rdataset(rbtdb, rbtnode, header, now,
5676                                               addedrdataset);
5677                         return (ISC_R_SUCCESS);
5678                 }
5679                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5680                     (header->type == dns_rdatatype_a ||
5681                      header->type == dns_rdatatype_aaaa) &&
5682                     !header_nx && !newheader_nx &&
5683                     header->trust >= newheader->trust &&
5684                     dns_rdataslab_equal((unsigned char *)header,
5685                                         (unsigned char *)newheader,
5686                                         (unsigned int)(sizeof(*newheader)))) {
5687                         /*
5688                          * Honour the new ttl if it is less than the
5689                          * older one.
5690                          */
5691                         if (header->rdh_ttl > newheader->rdh_ttl)
5692                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5693                         if (header->noqname == NULL &&
5694                             newheader->noqname != NULL) {
5695                                 header->noqname = newheader->noqname;
5696                                 newheader->noqname = NULL;
5697                         }
5698                         if (header->closest == NULL &&
5699                             newheader->closest != NULL) {
5700                                 header->closest = newheader->closest;
5701                                 newheader->closest = NULL;
5702                         }
5703                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5704                         if (addedrdataset != NULL)
5705                                 bind_rdataset(rbtdb, rbtnode, header, now,
5706                                               addedrdataset);
5707                         return (ISC_R_SUCCESS);
5708                 }
5709                 INSIST(rbtversion == NULL ||
5710                        rbtversion->serial >= topheader->serial);
5711                 if (topheader_prev != NULL)
5712                         topheader_prev->next = newheader;
5713                 else
5714                         rbtnode->data = newheader;
5715                 newheader->next = topheader->next;
5716                 if (loading) {
5717                         /*
5718                          * There are no other references to 'header' when
5719                          * loading, so we MAY clean up 'header' now.
5720                          * Since we don't generate changed records when
5721                          * loading, we MUST clean up 'header' now.
5722                          */
5723                         newheader->down = NULL;
5724                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5725                 } else {
5726                         newheader->down = topheader;
5727                         topheader->next = newheader;
5728                         rbtnode->dirty = 1;
5729                         if (changed != NULL)
5730                                 changed->dirty = ISC_TRUE;
5731                         if (rbtversion == NULL) {
5732                                 set_ttl(rbtdb, header, 0);
5733                                 header->attributes |= RDATASET_ATTR_STALE;
5734                         }
5735                         idx = newheader->node->locknum;
5736                         if (IS_CACHE(rbtdb)) {
5737                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5738                                                  newheader, lru_link);
5739                                 /*
5740                                  * XXXMLG We don't check the return value
5741                                  * here.  If it fails, we will not do TTL
5742                                  * based expiry on this node.  However, we
5743                                  * will do it on the LRU side, so memory
5744                                  * will not leak... for long.
5745                                  */
5746                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5747                         } else if (RESIGN(newheader))
5748                                 resign_insert(rbtdb, idx, newheader);
5749                 }
5750         } else {
5751                 /*
5752                  * No non-IGNORED rdatasets of the given type exist at
5753                  * this node.
5754                  */
5755
5756                 /*
5757                  * If we're trying to delete the type, don't bother.
5758                  */
5759                 if (newheader_nx) {
5760                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5761                         return (DNS_R_UNCHANGED);
5762                 }
5763
5764                 if (topheader != NULL) {
5765                         /*
5766                          * We have an list of rdatasets of the given type,
5767                          * but they're all marked IGNORE.  We simply insert
5768                          * the new rdataset at the head of the list.
5769                          *
5770                          * Ignored rdatasets cannot occur during loading, so
5771                          * we INSIST on it.
5772                          */
5773                         INSIST(!loading);
5774                         INSIST(rbtversion == NULL ||
5775                                rbtversion->serial >= topheader->serial);
5776                         if (topheader_prev != NULL)
5777                                 topheader_prev->next = newheader;
5778                         else
5779                                 rbtnode->data = newheader;
5780                         newheader->next = topheader->next;
5781                         newheader->down = topheader;
5782                         topheader->next = newheader;
5783                         rbtnode->dirty = 1;
5784                         if (changed != NULL)
5785                                 changed->dirty = ISC_TRUE;
5786                 } else {
5787                         /*
5788                          * No rdatasets of the given type exist at the node.
5789                          */
5790                         newheader->next = rbtnode->data;
5791                         newheader->down = NULL;
5792                         rbtnode->data = newheader;
5793                 }
5794                 idx = newheader->node->locknum;
5795                 if (IS_CACHE(rbtdb)) {
5796                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5797                                          newheader, lru_link);
5798                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5799                 } else if (RESIGN(newheader)) {
5800                         resign_insert(rbtdb, idx, newheader);
5801                 }
5802         }
5803
5804         /*
5805          * Check if the node now contains CNAME and other data.
5806          */
5807         if (rbtversion != NULL &&
5808             cname_and_other_data(rbtnode, rbtversion->serial))
5809                 return (DNS_R_CNAMEANDOTHER);
5810
5811         if (addedrdataset != NULL)
5812                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5813
5814         return (ISC_R_SUCCESS);
5815 }
5816
5817 static inline isc_boolean_t
5818 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5819                 rbtdb_rdatatype_t type)
5820 {
5821         if (IS_CACHE(rbtdb)) {
5822                 if (type == dns_rdatatype_dname)
5823                         return (ISC_TRUE);
5824                 else
5825                         return (ISC_FALSE);
5826         } else if (type == dns_rdatatype_dname ||
5827                    (type == dns_rdatatype_ns &&
5828                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5829                 return (ISC_TRUE);
5830         return (ISC_FALSE);
5831 }
5832
5833 static inline isc_result_t
5834 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5835            dns_rdataset_t *rdataset)
5836 {
5837         struct noqname *noqname;
5838         isc_mem_t *mctx = rbtdb->common.mctx;
5839         dns_name_t name;
5840         dns_rdataset_t neg, negsig;
5841         isc_result_t result;
5842         isc_region_t r;
5843
5844         dns_name_init(&name, NULL);
5845         dns_rdataset_init(&neg);
5846         dns_rdataset_init(&negsig);
5847
5848         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5849         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5850
5851         noqname = isc_mem_get(mctx, sizeof(*noqname));
5852         if (noqname == NULL) {
5853                 result = ISC_R_NOMEMORY;
5854                 goto cleanup;
5855         }
5856         dns_name_init(&noqname->name, NULL);
5857         noqname->neg = NULL;
5858         noqname->negsig = NULL;
5859         noqname->type = neg.type;
5860         result = dns_name_dup(&name, mctx, &noqname->name);
5861         if (result != ISC_R_SUCCESS)
5862                 goto cleanup;
5863         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5864         if (result != ISC_R_SUCCESS)
5865                 goto cleanup;
5866         noqname->neg = r.base;
5867         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5868         if (result != ISC_R_SUCCESS)
5869                 goto cleanup;
5870         noqname->negsig = r.base;
5871         dns_rdataset_disassociate(&neg);
5872         dns_rdataset_disassociate(&negsig);
5873         newheader->noqname = noqname;
5874         return (ISC_R_SUCCESS);
5875
5876 cleanup:
5877         dns_rdataset_disassociate(&neg);
5878         dns_rdataset_disassociate(&negsig);
5879         free_noqname(mctx, &noqname);
5880         return(result);
5881 }
5882
5883 static inline isc_result_t
5884 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5885            dns_rdataset_t *rdataset)
5886 {
5887         struct noqname *closest;
5888         isc_mem_t *mctx = rbtdb->common.mctx;
5889         dns_name_t name;
5890         dns_rdataset_t neg, negsig;
5891         isc_result_t result;
5892         isc_region_t r;
5893
5894         dns_name_init(&name, NULL);
5895         dns_rdataset_init(&neg);
5896         dns_rdataset_init(&negsig);
5897
5898         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5899         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5900
5901         closest = isc_mem_get(mctx, sizeof(*closest));
5902         if (closest == NULL) {
5903                 result = ISC_R_NOMEMORY;
5904                 goto cleanup;
5905         }
5906         dns_name_init(&closest->name, NULL);
5907         closest->neg = NULL;
5908         closest->negsig = NULL;
5909         closest->type = neg.type;
5910         result = dns_name_dup(&name, mctx, &closest->name);
5911         if (result != ISC_R_SUCCESS)
5912                 goto cleanup;
5913         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5914         if (result != ISC_R_SUCCESS)
5915                 goto cleanup;
5916         closest->neg = r.base;
5917         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5918         if (result != ISC_R_SUCCESS)
5919                 goto cleanup;
5920         closest->negsig = r.base;
5921         dns_rdataset_disassociate(&neg);
5922         dns_rdataset_disassociate(&negsig);
5923         newheader->closest = closest;
5924         return (ISC_R_SUCCESS);
5925
5926  cleanup:
5927         dns_rdataset_disassociate(&neg);
5928         dns_rdataset_disassociate(&negsig);
5929         free_noqname(mctx, &closest);
5930         return(result);
5931 }
5932
5933 static dns_dbmethods_t zone_methods;
5934
5935 static isc_result_t
5936 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5937             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5938             dns_rdataset_t *addedrdataset)
5939 {
5940         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5941         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5942         rbtdb_version_t *rbtversion = version;
5943         isc_region_t region;
5944         rdatasetheader_t *newheader;
5945         rdatasetheader_t *header;
5946         isc_result_t result;
5947         isc_boolean_t delegating;
5948         isc_boolean_t tree_locked = ISC_FALSE;
5949
5950         REQUIRE(VALID_RBTDB(rbtdb));
5951
5952         if (rbtdb->common.methods == &zone_methods)
5953                 REQUIRE(((rbtnode->nsec3 &&
5954                           (rdataset->type == dns_rdatatype_nsec3 ||
5955                            rdataset->covers == dns_rdatatype_nsec3)) ||
5956                          (!rbtnode->nsec3 &&
5957                            rdataset->type != dns_rdatatype_nsec3 &&
5958                            rdataset->covers != dns_rdatatype_nsec3)));
5959
5960         if (rbtversion == NULL) {
5961                 if (now == 0)
5962                         isc_stdtime_get(&now);
5963         } else
5964                 now = 0;
5965
5966         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5967                                             &region,
5968                                             sizeof(rdatasetheader_t));
5969         if (result != ISC_R_SUCCESS)
5970                 return (result);
5971
5972         newheader = (rdatasetheader_t *)region.base;
5973         init_rdataset(rbtdb, newheader);
5974         set_ttl(rbtdb, newheader, rdataset->ttl + now);
5975         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5976                                                 rdataset->covers);
5977         newheader->attributes = 0;
5978         newheader->noqname = NULL;
5979         newheader->closest = NULL;
5980         newheader->count = init_count++;
5981         newheader->trust = rdataset->trust;
5982         newheader->additional_auth = NULL;
5983         newheader->additional_glue = NULL;
5984         newheader->last_used = now;
5985         newheader->node = rbtnode;
5986         if (rbtversion != NULL) {
5987                 newheader->serial = rbtversion->serial;
5988                 now = 0;
5989
5990                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5991                         newheader->attributes |= RDATASET_ATTR_RESIGN;
5992                         newheader->resign = rdataset->resign;
5993                 } else
5994                         newheader->resign = 0;
5995         } else {
5996                 newheader->serial = 1;
5997                 newheader->resign = 0;
5998                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5999                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6000                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6001                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6002                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6003                         result = addnoqname(rbtdb, newheader, rdataset);
6004                         if (result != ISC_R_SUCCESS) {
6005                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6006                                               newheader);
6007                                 return (result);
6008                         }
6009                 }
6010                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6011                         result = addclosest(rbtdb, newheader, rdataset);
6012                         if (result != ISC_R_SUCCESS) {
6013                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6014                                               newheader);
6015                                 return (result);
6016                         }
6017                 }
6018         }
6019
6020         /*
6021          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6022          * just DNAME for the cache), then we need to set the callback bit
6023          * on the node.
6024          */
6025         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6026                 delegating = ISC_TRUE;
6027         else
6028                 delegating = ISC_FALSE;
6029
6030         /*
6031          * If we're adding a delegation type or the DB is a cache in an overmem
6032          * state, hold an exclusive lock on the tree.  In the latter case
6033          * the lock does not necessarily have to be acquired but it will help
6034          * purge stale entries more effectively.
6035          */
6036         if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6037                 tree_locked = ISC_TRUE;
6038                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6039         }
6040
6041         if (IS_CACHE(rbtdb) && rbtdb->overmem)
6042                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6043
6044         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6045                   isc_rwlocktype_write);
6046
6047         if (rbtdb->rrsetstats != NULL) {
6048                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6049                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6050         }
6051
6052         if (IS_CACHE(rbtdb)) {
6053                 if (tree_locked)
6054                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6055
6056                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6057                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6058                         expire_header(rbtdb, header, tree_locked);
6059
6060                 /*
6061                  * If we've been holding a write lock on the tree just for
6062                  * cleaning, we can release it now.  However, we still need the
6063                  * node lock.
6064                  */
6065                 if (tree_locked && !delegating) {
6066                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6067                         tree_locked = ISC_FALSE;
6068                 }
6069         }
6070
6071         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6072                      addedrdataset, now);
6073         if (result == ISC_R_SUCCESS && delegating)
6074                 rbtnode->find_callback = 1;
6075
6076         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6077                     isc_rwlocktype_write);
6078
6079         if (tree_locked)
6080                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6081
6082         /*
6083          * Update the zone's secure status.  If version is non-NULL
6084          * this is deferred until closeversion() is called.
6085          */
6086         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6087                 iszonesecure(db, version, rbtdb->origin_node);
6088
6089         return (result);
6090 }
6091
6092 static isc_result_t
6093 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6094                  dns_rdataset_t *rdataset, unsigned int options,
6095                  dns_rdataset_t *newrdataset)
6096 {
6097         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6098         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6099         rbtdb_version_t *rbtversion = version;
6100         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6101         unsigned char *subresult;
6102         isc_region_t region;
6103         isc_result_t result;
6104         rbtdb_changed_t *changed;
6105
6106         REQUIRE(VALID_RBTDB(rbtdb));
6107
6108         if (rbtdb->common.methods == &zone_methods)
6109                 REQUIRE(((rbtnode->nsec3 &&
6110                           (rdataset->type == dns_rdatatype_nsec3 ||
6111                            rdataset->covers == dns_rdatatype_nsec3)) ||
6112                          (!rbtnode->nsec3 &&
6113                            rdataset->type != dns_rdatatype_nsec3 &&
6114                            rdataset->covers != dns_rdatatype_nsec3)));
6115
6116         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6117                                             &region,
6118                                             sizeof(rdatasetheader_t));
6119         if (result != ISC_R_SUCCESS)
6120                 return (result);
6121         newheader = (rdatasetheader_t *)region.base;
6122         init_rdataset(rbtdb, newheader);
6123         set_ttl(rbtdb, newheader, rdataset->ttl);
6124         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6125                                                 rdataset->covers);
6126         newheader->attributes = 0;
6127         newheader->serial = rbtversion->serial;
6128         newheader->trust = 0;
6129         newheader->noqname = NULL;
6130         newheader->closest = NULL;
6131         newheader->count = init_count++;
6132         newheader->additional_auth = NULL;
6133         newheader->additional_glue = NULL;
6134         newheader->last_used = 0;
6135         newheader->node = rbtnode;
6136         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6137                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6138                 newheader->resign = rdataset->resign;
6139         } else
6140                 newheader->resign = 0;
6141
6142         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6143                   isc_rwlocktype_write);
6144
6145         changed = add_changed(rbtdb, rbtversion, rbtnode);
6146         if (changed == NULL) {
6147                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6148                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6149                             isc_rwlocktype_write);
6150                 return (ISC_R_NOMEMORY);
6151         }
6152
6153         topheader_prev = NULL;
6154         for (topheader = rbtnode->data;
6155              topheader != NULL;
6156              topheader = topheader->next) {
6157                 if (topheader->type == newheader->type)
6158                         break;
6159                 topheader_prev = topheader;
6160         }
6161         /*
6162          * If header isn't NULL, we've found the right type.  There may be
6163          * IGNORE rdatasets between the top of the chain and the first real
6164          * data.  We skip over them.
6165          */
6166         header = topheader;
6167         while (header != NULL && IGNORE(header))
6168                 header = header->down;
6169         if (header != NULL && EXISTS(header)) {
6170                 unsigned int flags = 0;
6171                 subresult = NULL;
6172                 result = ISC_R_SUCCESS;
6173                 if ((options & DNS_DBSUB_EXACT) != 0) {
6174                         flags |= DNS_RDATASLAB_EXACT;
6175                         if (newheader->rdh_ttl != header->rdh_ttl)
6176                                 result = DNS_R_NOTEXACT;
6177                 }
6178                 if (result == ISC_R_SUCCESS)
6179                         result = dns_rdataslab_subtract(
6180                                         (unsigned char *)header,
6181                                         (unsigned char *)newheader,
6182                                         (unsigned int)(sizeof(*newheader)),
6183                                         rbtdb->common.mctx,
6184                                         rbtdb->common.rdclass,
6185                                         (dns_rdatatype_t)header->type,
6186                                         flags, &subresult);
6187                 if (result == ISC_R_SUCCESS) {
6188                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6189                         newheader = (rdatasetheader_t *)subresult;
6190                         init_rdataset(rbtdb, newheader);
6191                         /*
6192                          * We have to set the serial since the rdataslab
6193                          * subtraction routine copies the reserved portion of
6194                          * header, not newheader.
6195                          */
6196                         newheader->serial = rbtversion->serial;
6197                         /*
6198                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6199                          * to additional info.  We need to clear these fields
6200                          * to avoid having duplicated references.
6201                          */
6202                         newheader->additional_auth = NULL;
6203                         newheader->additional_glue = NULL;
6204                 } else if (result == DNS_R_NXRRSET) {
6205                         /*
6206                          * This subtraction would remove all of the rdata;
6207                          * add a nonexistent header instead.
6208                          */
6209                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6210                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6211                         if (newheader == NULL) {
6212                                 result = ISC_R_NOMEMORY;
6213                                 goto unlock;
6214                         }
6215                         set_ttl(rbtdb, newheader, 0);
6216                         newheader->type = topheader->type;
6217                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6218                         newheader->trust = 0;
6219                         newheader->serial = rbtversion->serial;
6220                         newheader->noqname = NULL;
6221                         newheader->closest = NULL;
6222                         newheader->count = 0;
6223                         newheader->additional_auth = NULL;
6224                         newheader->additional_glue = NULL;
6225                         newheader->node = rbtnode;
6226                         newheader->resign = 0;
6227                         newheader->last_used = 0;
6228                 } else {
6229                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6230                         goto unlock;
6231                 }
6232
6233                 /*
6234                  * If we're here, we want to link newheader in front of
6235                  * topheader.
6236                  */
6237                 INSIST(rbtversion->serial >= topheader->serial);
6238                 if (topheader_prev != NULL)
6239                         topheader_prev->next = newheader;
6240                 else
6241                         rbtnode->data = newheader;
6242                 newheader->next = topheader->next;
6243                 newheader->down = topheader;
6244                 topheader->next = newheader;
6245                 rbtnode->dirty = 1;
6246                 changed->dirty = ISC_TRUE;
6247         } else {
6248                 /*
6249                  * The rdataset doesn't exist, so we don't need to do anything
6250                  * to satisfy the deletion request.
6251                  */
6252                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6253                 if ((options & DNS_DBSUB_EXACT) != 0)
6254                         result = DNS_R_NOTEXACT;
6255                 else
6256                         result = DNS_R_UNCHANGED;
6257         }
6258
6259         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6260                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6261
6262  unlock:
6263         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6264                     isc_rwlocktype_write);
6265
6266         /*
6267          * Update the zone's secure status.  If version is non-NULL
6268          * this is deferred until closeversion() is called.
6269          */
6270         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6271                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6272
6273         return (result);
6274 }
6275
6276 static isc_result_t
6277 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6278                dns_rdatatype_t type, dns_rdatatype_t covers)
6279 {
6280         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6281         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6282         rbtdb_version_t *rbtversion = version;
6283         isc_result_t result;
6284         rdatasetheader_t *newheader;
6285
6286         REQUIRE(VALID_RBTDB(rbtdb));
6287
6288         if (type == dns_rdatatype_any)
6289                 return (ISC_R_NOTIMPLEMENTED);
6290         if (type == dns_rdatatype_rrsig && covers == 0)
6291                 return (ISC_R_NOTIMPLEMENTED);
6292
6293         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6294         if (newheader == NULL)
6295                 return (ISC_R_NOMEMORY);
6296         set_ttl(rbtdb, newheader, 0);
6297         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6298         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6299         newheader->trust = 0;
6300         newheader->noqname = NULL;
6301         newheader->closest = NULL;
6302         newheader->additional_auth = NULL;
6303         newheader->additional_glue = NULL;
6304         if (rbtversion != NULL)
6305                 newheader->serial = rbtversion->serial;
6306         else
6307                 newheader->serial = 0;
6308         newheader->count = 0;
6309         newheader->last_used = 0;
6310         newheader->node = rbtnode;
6311
6312         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6313                   isc_rwlocktype_write);
6314
6315         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6316                      ISC_FALSE, NULL, 0);
6317
6318         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6319                     isc_rwlocktype_write);
6320
6321         /*
6322          * Update the zone's secure status.  If version is non-NULL
6323          * this is deferred until closeversion() is called.
6324          */
6325         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6326                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6327
6328         return (result);
6329 }
6330
6331 static isc_result_t
6332 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6333         rbtdb_load_t *loadctx = arg;
6334         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6335         dns_rbtnode_t *node;
6336         isc_result_t result;
6337         isc_region_t region;
6338         rdatasetheader_t *newheader;
6339
6340         /*
6341          * This routine does no node locking.  See comments in
6342          * 'load' below for more information on loading and
6343          * locking.
6344          */
6345
6346
6347         /*
6348          * SOA records are only allowed at top of zone.
6349          */
6350         if (rdataset->type == dns_rdatatype_soa &&
6351             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6352                 return (DNS_R_NOTZONETOP);
6353
6354         if (rdataset->type != dns_rdatatype_nsec3 &&
6355             rdataset->covers != dns_rdatatype_nsec3)
6356                 add_empty_wildcards(rbtdb, name);
6357
6358         if (dns_name_iswildcard(name)) {
6359                 /*
6360                  * NS record owners cannot legally be wild cards.
6361                  */
6362                 if (rdataset->type == dns_rdatatype_ns)
6363                         return (DNS_R_INVALIDNS);
6364                 /*
6365                  * NSEC3 record owners cannot legally be wild cards.
6366                  */
6367                 if (rdataset->type == dns_rdatatype_nsec3)
6368                         return (DNS_R_INVALIDNSEC3);
6369                 result = add_wildcard_magic(rbtdb, name);
6370                 if (result != ISC_R_SUCCESS)
6371                         return (result);
6372         }
6373
6374         node = NULL;
6375         if (rdataset->type == dns_rdatatype_nsec3 ||
6376             rdataset->covers == dns_rdatatype_nsec3) {
6377                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6378                 if (result == ISC_R_SUCCESS)
6379                         node->nsec3 = 1;
6380         } else {
6381                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6382                 if (result == ISC_R_SUCCESS)
6383                         node->nsec3 = 0;
6384         }
6385         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6386                 return (result);
6387         if (result != ISC_R_EXISTS) {
6388                 dns_name_t foundname;
6389                 dns_name_init(&foundname, NULL);
6390                 dns_rbt_namefromnode(node, &foundname);
6391 #ifdef DNS_RBT_USEHASH
6392                 node->locknum = node->hashval % rbtdb->node_lock_count;
6393 #else
6394                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6395                         rbtdb->node_lock_count;
6396 #endif
6397         }
6398
6399         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6400                                             &region,
6401                                             sizeof(rdatasetheader_t));
6402         if (result != ISC_R_SUCCESS)
6403                 return (result);
6404         newheader = (rdatasetheader_t *)region.base;
6405         init_rdataset(rbtdb, newheader);
6406         set_ttl(rbtdb, newheader,
6407                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6408         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6409                                                 rdataset->covers);
6410         newheader->attributes = 0;
6411         newheader->trust = rdataset->trust;
6412         newheader->serial = 1;
6413         newheader->noqname = NULL;
6414         newheader->closest = NULL;
6415         newheader->count = init_count++;
6416         newheader->additional_auth = NULL;
6417         newheader->additional_glue = NULL;
6418         newheader->last_used = 0;
6419         newheader->node = node;
6420         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6421                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6422                 newheader->resign = rdataset->resign;
6423         } else
6424                 newheader->resign = 0;
6425
6426         result = add(rbtdb, node, rbtdb->current_version, newheader,
6427                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6428         if (result == ISC_R_SUCCESS &&
6429             delegating_type(rbtdb, node, rdataset->type))
6430                 node->find_callback = 1;
6431         else if (result == DNS_R_UNCHANGED)
6432                 result = ISC_R_SUCCESS;
6433
6434         return (result);
6435 }
6436
6437 static isc_result_t
6438 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6439         rbtdb_load_t *loadctx;
6440         dns_rbtdb_t *rbtdb;
6441
6442         rbtdb = (dns_rbtdb_t *)db;
6443
6444         REQUIRE(VALID_RBTDB(rbtdb));
6445
6446         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6447         if (loadctx == NULL)
6448                 return (ISC_R_NOMEMORY);
6449
6450         loadctx->rbtdb = rbtdb;
6451         if (IS_CACHE(rbtdb))
6452                 isc_stdtime_get(&loadctx->now);
6453         else
6454                 loadctx->now = 0;
6455
6456         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6457
6458         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6459                 == 0);
6460         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6461
6462         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6463
6464         *addp = loading_addrdataset;
6465         *dbloadp = loadctx;
6466
6467         return (ISC_R_SUCCESS);
6468 }
6469
6470 static isc_result_t
6471 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6472         rbtdb_load_t *loadctx;
6473         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6474
6475         REQUIRE(VALID_RBTDB(rbtdb));
6476         REQUIRE(dbloadp != NULL);
6477         loadctx = *dbloadp;
6478         REQUIRE(loadctx->rbtdb == rbtdb);
6479
6480         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6481
6482         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6483         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6484
6485         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6486         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6487
6488         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6489
6490         /*
6491          * If there's a KEY rdataset at the zone origin containing a
6492          * zone key, we consider the zone secure.
6493          */
6494         if (! IS_CACHE(rbtdb))
6495                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6496
6497         *dbloadp = NULL;
6498
6499         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6500
6501         return (ISC_R_SUCCESS);
6502 }
6503
6504 static isc_result_t
6505 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6506      dns_masterformat_t masterformat) {
6507         dns_rbtdb_t *rbtdb;
6508
6509         rbtdb = (dns_rbtdb_t *)db;
6510
6511         REQUIRE(VALID_RBTDB(rbtdb));
6512
6513         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6514                                  &dns_master_style_default,
6515                                  filename, masterformat));
6516 }
6517
6518 static void
6519 delete_callback(void *data, void *arg) {
6520         dns_rbtdb_t *rbtdb = arg;
6521         rdatasetheader_t *current, *next;
6522
6523         for (current = data; current != NULL; current = next) {
6524                 next = current->next;
6525                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6526         }
6527 }
6528
6529 static isc_boolean_t
6530 issecure(dns_db_t *db) {
6531         dns_rbtdb_t *rbtdb;
6532         isc_boolean_t secure;
6533
6534         rbtdb = (dns_rbtdb_t *)db;
6535
6536         REQUIRE(VALID_RBTDB(rbtdb));
6537
6538         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6539         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6540         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6541
6542         return (secure);
6543 }
6544
6545 static isc_boolean_t
6546 isdnssec(dns_db_t *db) {
6547         dns_rbtdb_t *rbtdb;
6548         isc_boolean_t dnssec;
6549
6550         rbtdb = (dns_rbtdb_t *)db;
6551
6552         REQUIRE(VALID_RBTDB(rbtdb));
6553
6554         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6555         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6556         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6557
6558         return (dnssec);
6559 }
6560
6561 static unsigned int
6562 nodecount(dns_db_t *db) {
6563         dns_rbtdb_t *rbtdb;
6564         unsigned int count;
6565
6566         rbtdb = (dns_rbtdb_t *)db;
6567
6568         REQUIRE(VALID_RBTDB(rbtdb));
6569
6570         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6571         count = dns_rbt_nodecount(rbtdb->tree);
6572         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6573
6574         return (count);
6575 }
6576
6577 static void
6578 settask(dns_db_t *db, isc_task_t *task) {
6579         dns_rbtdb_t *rbtdb;
6580
6581         rbtdb = (dns_rbtdb_t *)db;
6582
6583         REQUIRE(VALID_RBTDB(rbtdb));
6584
6585         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6586         if (rbtdb->task != NULL)
6587                 isc_task_detach(&rbtdb->task);
6588         if (task != NULL)
6589                 isc_task_attach(task, &rbtdb->task);
6590         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6591 }
6592
6593 static isc_boolean_t
6594 ispersistent(dns_db_t *db) {
6595         UNUSED(db);
6596         return (ISC_FALSE);
6597 }
6598
6599 static isc_result_t
6600 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6601         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6602         dns_rbtnode_t *onode;
6603         isc_result_t result = ISC_R_SUCCESS;
6604
6605         REQUIRE(VALID_RBTDB(rbtdb));
6606         REQUIRE(nodep != NULL && *nodep == NULL);
6607
6608         /* Note that the access to origin_node doesn't require a DB lock */
6609         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6610         if (onode != NULL) {
6611                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6612                 new_reference(rbtdb, onode);
6613                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6614
6615                 *nodep = rbtdb->origin_node;
6616         } else {
6617                 INSIST(IS_CACHE(rbtdb));
6618                 result = ISC_R_NOTFOUND;
6619         }
6620
6621         return (result);
6622 }
6623
6624 static isc_result_t
6625 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6626                    isc_uint8_t *flags, isc_uint16_t *iterations,
6627                    unsigned char *salt, size_t *salt_length)
6628 {
6629         dns_rbtdb_t *rbtdb;
6630         isc_result_t result = ISC_R_NOTFOUND;
6631         rbtdb_version_t *rbtversion = version;
6632
6633         rbtdb = (dns_rbtdb_t *)db;
6634
6635         REQUIRE(VALID_RBTDB(rbtdb));
6636
6637         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6638
6639         if (rbtversion == NULL)
6640                 rbtversion = rbtdb->current_version;
6641
6642         if (rbtversion->havensec3) {
6643                 if (hash != NULL)
6644                         *hash = rbtversion->hash;
6645                 if (salt != NULL && salt_length != 0) {
6646                         REQUIRE(*salt_length > rbtversion->salt_length);
6647                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6648                 }
6649                 if (salt_length != NULL)
6650                         *salt_length = rbtversion->salt_length;
6651                 if (iterations != NULL)
6652                         *iterations = rbtversion->iterations;
6653                 if (flags != NULL)
6654                         *flags = rbtversion->flags;
6655                 result = ISC_R_SUCCESS;
6656         }
6657         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6658
6659         return (result);
6660 }
6661
6662 static isc_result_t
6663 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6664         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6665         isc_stdtime_t oldresign;
6666         isc_result_t result = ISC_R_SUCCESS;
6667         rdatasetheader_t *header;
6668
6669         REQUIRE(VALID_RBTDB(rbtdb));
6670         REQUIRE(!IS_CACHE(rbtdb));
6671         REQUIRE(rdataset != NULL);
6672
6673         header = rdataset->private3;
6674         header--;
6675
6676         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6677                   isc_rwlocktype_write);
6678
6679         oldresign = header->resign;
6680         header->resign = resign;
6681         if (header->heap_index != 0) {
6682                 INSIST(RESIGN(header));
6683                 if (resign == 0) {
6684                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6685                                         header->heap_index);
6686                         header->heap_index = 0;
6687                 } else if (resign < oldresign)
6688                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6689                                            header->heap_index);
6690                 else
6691                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6692                                            header->heap_index);
6693         } else if (resign && header->heap_index == 0) {
6694                 header->attributes |= RDATASET_ATTR_RESIGN;
6695                 result = resign_insert(rbtdb, header->node->locknum, header);
6696         }
6697         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6698                     isc_rwlocktype_write);
6699         return (result);
6700 }
6701
6702 static isc_result_t
6703 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6704                dns_name_t *foundname)
6705 {
6706         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6707         rdatasetheader_t *header = NULL, *this;
6708         unsigned int i;
6709         isc_result_t result = ISC_R_NOTFOUND;
6710
6711         REQUIRE(VALID_RBTDB(rbtdb));
6712
6713         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6714
6715         for (i = 0; i < rbtdb->node_lock_count; i++) {
6716                 this = isc_heap_element(rbtdb->heaps[i], 1);
6717                 if (this == NULL)
6718                         continue;
6719                 if (header == NULL)
6720                         header = this;
6721                 else if (isc_serial_lt(this->resign, header->resign))
6722                         header = this;
6723         }
6724
6725         if (header == NULL)
6726                 goto unlock;
6727
6728         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6729                   isc_rwlocktype_read);
6730
6731         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6732
6733         if (foundname != NULL)
6734                 dns_rbt_fullnamefromnode(header->node, foundname);
6735
6736         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6737                     isc_rwlocktype_read);
6738
6739         result = ISC_R_SUCCESS;
6740
6741  unlock:
6742         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6743
6744         return (result);
6745 }
6746
6747 static void
6748 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6749 {
6750         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6751         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6752         dns_rbtnode_t *node;
6753         rdatasetheader_t *header;
6754
6755         REQUIRE(VALID_RBTDB(rbtdb));
6756         REQUIRE(rdataset != NULL);
6757         REQUIRE(rbtdb->future_version == rbtversion);
6758         REQUIRE(rbtversion->writer);
6759
6760         node = rdataset->private2;
6761         header = rdataset->private3;
6762         header--;
6763
6764         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6765         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6766                   isc_rwlocktype_write);
6767         /*
6768          * Delete from heap and save to re-signed list so that it can
6769          * be restored if we backout of this change.
6770          */
6771         new_reference(rbtdb, node);
6772         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6773         header->heap_index = 0;
6774         ISC_LIST_APPEND(rbtversion->resigned_list, header, lru_link);
6775
6776         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6777                     isc_rwlocktype_write);
6778         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6779 }
6780
6781 static dns_stats_t *
6782 getrrsetstats(dns_db_t *db) {
6783         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6784
6785         REQUIRE(VALID_RBTDB(rbtdb));
6786         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6787
6788         return (rbtdb->rrsetstats);
6789 }
6790
6791 static dns_dbmethods_t zone_methods = {
6792         attach,
6793         detach,
6794         beginload,
6795         endload,
6796         dump,
6797         currentversion,
6798         newversion,
6799         attachversion,
6800         closeversion,
6801         findnode,
6802         zone_find,
6803         zone_findzonecut,
6804         attachnode,
6805         detachnode,
6806         expirenode,
6807         printnode,
6808         createiterator,
6809         zone_findrdataset,
6810         allrdatasets,
6811         addrdataset,
6812         subtractrdataset,
6813         deleterdataset,
6814         issecure,
6815         nodecount,
6816         ispersistent,
6817         overmem,
6818         settask,
6819         getoriginnode,
6820         NULL,
6821         getnsec3parameters,
6822         findnsec3node,
6823         setsigningtime,
6824         getsigningtime,
6825         resigned,
6826         isdnssec,
6827         NULL
6828 };
6829
6830 static dns_dbmethods_t cache_methods = {
6831         attach,
6832         detach,
6833         beginload,
6834         endload,
6835         dump,
6836         currentversion,
6837         newversion,
6838         attachversion,
6839         closeversion,
6840         findnode,
6841         cache_find,
6842         cache_findzonecut,
6843         attachnode,
6844         detachnode,
6845         expirenode,
6846         printnode,
6847         createiterator,
6848         cache_findrdataset,
6849         allrdatasets,
6850         addrdataset,
6851         subtractrdataset,
6852         deleterdataset,
6853         issecure,
6854         nodecount,
6855         ispersistent,
6856         overmem,
6857         settask,
6858         getoriginnode,
6859         NULL,
6860         NULL,
6861         NULL,
6862         NULL,
6863         NULL,
6864         NULL,
6865         isdnssec,
6866         getrrsetstats
6867 };
6868
6869 isc_result_t
6870 #ifdef DNS_RBTDB_VERSION64
6871 dns_rbtdb64_create
6872 #else
6873 dns_rbtdb_create
6874 #endif
6875                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6876                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6877                  void *driverarg, dns_db_t **dbp)
6878 {
6879         dns_rbtdb_t *rbtdb;
6880         isc_result_t result;
6881         int i;
6882         dns_name_t name;
6883         isc_boolean_t (*sooner)(void *, void *);
6884
6885         /* Keep the compiler happy. */
6886         UNUSED(argc);
6887         UNUSED(argv);
6888         UNUSED(driverarg);
6889
6890         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6891         if (rbtdb == NULL)
6892                 return (ISC_R_NOMEMORY);
6893
6894         memset(rbtdb, '\0', sizeof(*rbtdb));
6895         dns_name_init(&rbtdb->common.origin, NULL);
6896         rbtdb->common.attributes = 0;
6897         if (type == dns_dbtype_cache) {
6898                 rbtdb->common.methods = &cache_methods;
6899                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6900         } else if (type == dns_dbtype_stub) {
6901                 rbtdb->common.methods = &zone_methods;
6902                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6903         } else
6904                 rbtdb->common.methods = &zone_methods;
6905         rbtdb->common.rdclass = rdclass;
6906         rbtdb->common.mctx = NULL;
6907
6908         result = RBTDB_INITLOCK(&rbtdb->lock);
6909         if (result != ISC_R_SUCCESS)
6910                 goto cleanup_rbtdb;
6911
6912         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6913         if (result != ISC_R_SUCCESS)
6914                 goto cleanup_lock;
6915
6916         /*
6917          * Initialize node_lock_count in a generic way to support future
6918          * extension which allows the user to specify this value on creation.
6919          * Note that when specified for a cache DB it must be larger than 1
6920          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6921          */
6922         if (rbtdb->node_lock_count == 0) {
6923                 if (IS_CACHE(rbtdb))
6924                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6925                 else
6926                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6927         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6928                 result = ISC_R_RANGE;
6929                 goto cleanup_tree_lock;
6930         }
6931         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6932         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6933                                         sizeof(rbtdb_nodelock_t));
6934         if (rbtdb->node_locks == NULL) {
6935                 result = ISC_R_NOMEMORY;
6936                 goto cleanup_tree_lock;
6937         }
6938
6939         rbtdb->rrsetstats = NULL;
6940         if (IS_CACHE(rbtdb)) {
6941                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6942                 if (result != ISC_R_SUCCESS)
6943                         goto cleanup_node_locks;
6944                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6945                                                sizeof(rdatasetheaderlist_t));
6946                 if (rbtdb->rdatasets == NULL) {
6947                         result = ISC_R_NOMEMORY;
6948                         goto cleanup_rrsetstats;
6949                 }
6950                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6951                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
6952         } else
6953                 rbtdb->rdatasets = NULL;
6954
6955         /*
6956          * Create the heaps.
6957          */
6958         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6959                                    sizeof(isc_heap_t *));
6960         if (rbtdb->heaps == NULL) {
6961                 result = ISC_R_NOMEMORY;
6962                 goto cleanup_rdatasets;
6963         }
6964         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6965                 rbtdb->heaps[i] = NULL;
6966         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6967         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6968                 result = isc_heap_create(mctx, sooner, set_index, 0,
6969                                          &rbtdb->heaps[i]);
6970                 if (result != ISC_R_SUCCESS)
6971                         goto cleanup_heaps;
6972         }
6973
6974         /*
6975          * Create deadnode lists.
6976          */
6977         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6978                                        sizeof(rbtnodelist_t));
6979         if (rbtdb->deadnodes == NULL) {
6980                 result = ISC_R_NOMEMORY;
6981                 goto cleanup_heaps;
6982         }
6983         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6984                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6985
6986         rbtdb->active = rbtdb->node_lock_count;
6987
6988         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
6989                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
6990                 if (result == ISC_R_SUCCESS) {
6991                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
6992                         if (result != ISC_R_SUCCESS)
6993                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6994                 }
6995                 if (result != ISC_R_SUCCESS) {
6996                         while (i-- > 0) {
6997                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6998                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
6999                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7000                         }
7001                         goto cleanup_deadnodes;
7002                 }
7003                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7004         }
7005
7006         /*
7007          * Attach to the mctx.  The database will persist so long as there
7008          * are references to it, and attaching to the mctx ensures that our
7009          * mctx won't disappear out from under us.
7010          */
7011         isc_mem_attach(mctx, &rbtdb->common.mctx);
7012
7013         /*
7014          * Must be initialized before free_rbtdb() is called.
7015          */
7016         isc_ondestroy_init(&rbtdb->common.ondest);
7017
7018         /*
7019          * Make a copy of the origin name.
7020          */
7021         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7022         if (result != ISC_R_SUCCESS) {
7023                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7024                 return (result);
7025         }
7026
7027         /*
7028          * Make the Red-Black Trees.
7029          */
7030         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7031         if (result != ISC_R_SUCCESS) {
7032                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7033                 return (result);
7034         }
7035
7036         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7037         if (result != ISC_R_SUCCESS) {
7038                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7039                 return (result);
7040         }
7041
7042         /*
7043          * In order to set the node callback bit correctly in zone databases,
7044          * we need to know if the node has the origin name of the zone.
7045          * In loading_addrdataset() we could simply compare the new name
7046          * to the origin name, but this is expensive.  Also, we don't know the
7047          * node name in addrdataset(), so we need another way of knowing the
7048          * zone's top.
7049          *
7050          * We now explicitly create a node for the zone's origin, and then
7051          * we simply remember the node's address.  This is safe, because
7052          * the top-of-zone node can never be deleted, nor can its address
7053          * change.
7054          */
7055         if (!IS_CACHE(rbtdb)) {
7056                 rbtdb->origin_node = NULL;
7057                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7058                                          &rbtdb->origin_node);
7059                 if (result != ISC_R_SUCCESS) {
7060                         INSIST(result != ISC_R_EXISTS);
7061                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7062                         return (result);
7063                 }
7064                 rbtdb->origin_node->nsec3 = 0;
7065                 /*
7066                  * We need to give the origin node the right locknum.
7067                  */
7068                 dns_name_init(&name, NULL);
7069                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7070 #ifdef DNS_RBT_USEHASH
7071                 rbtdb->origin_node->locknum =
7072                         rbtdb->origin_node->hashval %
7073                         rbtdb->node_lock_count;
7074 #else
7075                 rbtdb->origin_node->locknum =
7076                         dns_name_hash(&name, ISC_TRUE) %
7077                         rbtdb->node_lock_count;
7078 #endif
7079         }
7080
7081         /*
7082          * Misc. Initialization.
7083          */
7084         result = isc_refcount_init(&rbtdb->references, 1);
7085         if (result != ISC_R_SUCCESS) {
7086                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7087                 return (result);
7088         }
7089         rbtdb->attributes = 0;
7090         rbtdb->overmem = ISC_FALSE;
7091         rbtdb->task = NULL;
7092
7093         /*
7094          * Version Initialization.
7095          */
7096         rbtdb->current_serial = 1;
7097         rbtdb->least_serial = 1;
7098         rbtdb->next_serial = 2;
7099         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7100         if (rbtdb->current_version == NULL) {
7101                 isc_refcount_decrement(&rbtdb->references, NULL);
7102                 isc_refcount_destroy(&rbtdb->references);
7103                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7104                 return (ISC_R_NOMEMORY);
7105         }
7106         rbtdb->current_version->secure = dns_db_insecure;
7107         rbtdb->current_version->havensec3 = ISC_FALSE;
7108         rbtdb->current_version->flags = 0;
7109         rbtdb->current_version->iterations = 0;
7110         rbtdb->current_version->hash = 0;
7111         rbtdb->current_version->salt_length = 0;
7112         memset(rbtdb->current_version->salt, 0,
7113                sizeof(rbtdb->current_version->salt));
7114         rbtdb->future_version = NULL;
7115         ISC_LIST_INIT(rbtdb->open_versions);
7116         /*
7117          * Keep the current version in the open list so that list operation
7118          * won't happen in normal lookup operations.
7119          */
7120         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7121
7122         rbtdb->common.magic = DNS_DB_MAGIC;
7123         rbtdb->common.impmagic = RBTDB_MAGIC;
7124
7125         *dbp = (dns_db_t *)rbtdb;
7126
7127         return (ISC_R_SUCCESS);
7128
7129  cleanup_deadnodes:
7130         isc_mem_put(mctx, rbtdb->deadnodes,
7131                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7132
7133  cleanup_heaps:
7134         if (rbtdb->heaps != NULL) {
7135                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7136                         if (rbtdb->heaps[i] != NULL)
7137                                 isc_heap_destroy(&rbtdb->heaps[i]);
7138                 isc_mem_put(mctx, rbtdb->heaps,
7139                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7140         }
7141
7142  cleanup_rdatasets:
7143         if (rbtdb->rdatasets != NULL)
7144                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7145                             sizeof(rdatasetheaderlist_t));
7146  cleanup_rrsetstats:
7147         if (rbtdb->rrsetstats != NULL)
7148                 dns_stats_detach(&rbtdb->rrsetstats);
7149
7150  cleanup_node_locks:
7151         isc_mem_put(mctx, rbtdb->node_locks,
7152                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7153
7154  cleanup_tree_lock:
7155         isc_rwlock_destroy(&rbtdb->tree_lock);
7156
7157  cleanup_lock:
7158         RBTDB_DESTROYLOCK(&rbtdb->lock);
7159
7160  cleanup_rbtdb:
7161         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7162         return (result);
7163 }
7164
7165
7166 /*
7167  * Slabbed Rdataset Methods
7168  */
7169
7170 static void
7171 rdataset_disassociate(dns_rdataset_t *rdataset) {
7172         dns_db_t *db = rdataset->private1;
7173         dns_dbnode_t *node = rdataset->private2;
7174
7175         detachnode(db, &node);
7176 }
7177
7178 static isc_result_t
7179 rdataset_first(dns_rdataset_t *rdataset) {
7180         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7181         unsigned int count;
7182
7183         count = raw[0] * 256 + raw[1];
7184         if (count == 0) {
7185                 rdataset->private5 = NULL;
7186                 return (ISC_R_NOMORE);
7187         }
7188
7189 #if DNS_RDATASET_FIXED
7190         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7191                 raw += 2 + (4 * count);
7192         else
7193 #endif
7194                 raw += 2;
7195
7196         /*
7197          * The privateuint4 field is the number of rdata beyond the
7198          * cursor position, so we decrement the total count by one
7199          * before storing it.
7200          *
7201          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7202          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7203          * to the first entry in the offset table.
7204          */
7205         count--;
7206         rdataset->privateuint4 = count;
7207         rdataset->private5 = raw;
7208
7209         return (ISC_R_SUCCESS);
7210 }
7211
7212 static isc_result_t
7213 rdataset_next(dns_rdataset_t *rdataset) {
7214         unsigned int count;
7215         unsigned int length;
7216         unsigned char *raw;     /* RDATASLAB */
7217
7218         count = rdataset->privateuint4;
7219         if (count == 0)
7220                 return (ISC_R_NOMORE);
7221         count--;
7222         rdataset->privateuint4 = count;
7223
7224         /*
7225          * Skip forward one record (length + 4) or one offset (4).
7226          */
7227         raw = rdataset->private5;
7228 #if DNS_RDATASET_FIXED
7229         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7230 #endif
7231                 length = raw[0] * 256 + raw[1];
7232                 raw += length;
7233 #if DNS_RDATASET_FIXED
7234         }
7235         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7236 #else
7237         rdataset->private5 = raw + 2;           /* length(2) */
7238 #endif
7239
7240         return (ISC_R_SUCCESS);
7241 }
7242
7243 static void
7244 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7245         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7246 #if DNS_RDATASET_FIXED
7247         unsigned int offset;
7248 #endif
7249         unsigned int length;
7250         isc_region_t r;
7251         unsigned int flags = 0;
7252
7253         REQUIRE(raw != NULL);
7254
7255         /*
7256          * Find the start of the record if not already in private5
7257          * then skip the length and order fields.
7258          */
7259 #if DNS_RDATASET_FIXED
7260         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7261                 offset = (raw[0] << 24) + (raw[1] << 16) +
7262                          (raw[2] << 8) + raw[3];
7263                 raw = rdataset->private3;
7264                 raw += offset;
7265         }
7266 #endif
7267         length = raw[0] * 256 + raw[1];
7268 #if DNS_RDATASET_FIXED
7269         raw += 4;
7270 #else
7271         raw += 2;
7272 #endif
7273         if (rdataset->type == dns_rdatatype_rrsig) {
7274                 if (*raw & DNS_RDATASLAB_OFFLINE)
7275                         flags |= DNS_RDATA_OFFLINE;
7276                 length--;
7277                 raw++;
7278         }
7279         r.length = length;
7280         r.base = raw;
7281         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7282         rdata->flags |= flags;
7283 }
7284
7285 static void
7286 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7287         dns_db_t *db = source->private1;
7288         dns_dbnode_t *node = source->private2;
7289         dns_dbnode_t *cloned_node = NULL;
7290
7291         attachnode(db, node, &cloned_node);
7292         *target = *source;
7293
7294         /*
7295          * Reset iterator state.
7296          */
7297         target->privateuint4 = 0;
7298         target->private5 = NULL;
7299 }
7300
7301 static unsigned int
7302 rdataset_count(dns_rdataset_t *rdataset) {
7303         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7304         unsigned int count;
7305
7306         count = raw[0] * 256 + raw[1];
7307
7308         return (count);
7309 }
7310
7311 static isc_result_t
7312 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7313                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7314 {
7315         dns_db_t *db = rdataset->private1;
7316         dns_dbnode_t *node = rdataset->private2;
7317         dns_dbnode_t *cloned_node;
7318         struct noqname *noqname = rdataset->private6;
7319
7320         cloned_node = NULL;
7321         attachnode(db, node, &cloned_node);
7322         nsec->methods = &rdataset_methods;
7323         nsec->rdclass = db->rdclass;
7324         nsec->type = noqname->type;
7325         nsec->covers = 0;
7326         nsec->ttl = rdataset->ttl;
7327         nsec->trust = rdataset->trust;
7328         nsec->private1 = rdataset->private1;
7329         nsec->private2 = rdataset->private2;
7330         nsec->private3 = noqname->neg;
7331         nsec->privateuint4 = 0;
7332         nsec->private5 = NULL;
7333         nsec->private6 = NULL;
7334         nsec->private7 = NULL;
7335
7336         cloned_node = NULL;
7337         attachnode(db, node, &cloned_node);
7338         nsecsig->methods = &rdataset_methods;
7339         nsecsig->rdclass = db->rdclass;
7340         nsecsig->type = dns_rdatatype_rrsig;
7341         nsecsig->covers = noqname->type;
7342         nsecsig->ttl = rdataset->ttl;
7343         nsecsig->trust = rdataset->trust;
7344         nsecsig->private1 = rdataset->private1;
7345         nsecsig->private2 = rdataset->private2;
7346         nsecsig->private3 = noqname->negsig;
7347         nsecsig->privateuint4 = 0;
7348         nsecsig->private5 = NULL;
7349         nsec->private6 = NULL;
7350         nsec->private7 = NULL;
7351
7352         dns_name_clone(&noqname->name, name);
7353
7354         return (ISC_R_SUCCESS);
7355 }
7356
7357 static isc_result_t
7358 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7359                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7360 {
7361         dns_db_t *db = rdataset->private1;
7362         dns_dbnode_t *node = rdataset->private2;
7363         dns_dbnode_t *cloned_node;
7364         struct noqname *closest = rdataset->private7;
7365
7366         cloned_node = NULL;
7367         attachnode(db, node, &cloned_node);
7368         nsec->methods = &rdataset_methods;
7369         nsec->rdclass = db->rdclass;
7370         nsec->type = closest->type;
7371         nsec->covers = 0;
7372         nsec->ttl = rdataset->ttl;
7373         nsec->trust = rdataset->trust;
7374         nsec->private1 = rdataset->private1;
7375         nsec->private2 = rdataset->private2;
7376         nsec->private3 = closest->neg;
7377         nsec->privateuint4 = 0;
7378         nsec->private5 = NULL;
7379         nsec->private6 = NULL;
7380         nsec->private7 = NULL;
7381
7382         cloned_node = NULL;
7383         attachnode(db, node, &cloned_node);
7384         nsecsig->methods = &rdataset_methods;
7385         nsecsig->rdclass = db->rdclass;
7386         nsecsig->type = dns_rdatatype_rrsig;
7387         nsecsig->covers = closest->type;
7388         nsecsig->ttl = rdataset->ttl;
7389         nsecsig->trust = rdataset->trust;
7390         nsecsig->private1 = rdataset->private1;
7391         nsecsig->private2 = rdataset->private2;
7392         nsecsig->private3 = closest->negsig;
7393         nsecsig->privateuint4 = 0;
7394         nsecsig->private5 = NULL;
7395         nsec->private6 = NULL;
7396         nsec->private7 = NULL;
7397
7398         dns_name_clone(&closest->name, name);
7399
7400         return (ISC_R_SUCCESS);
7401 }
7402
7403 /*
7404  * Rdataset Iterator Methods
7405  */
7406
7407 static void
7408 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7409         rbtdb_rdatasetiter_t *rbtiterator;
7410
7411         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7412
7413         if (rbtiterator->common.version != NULL)
7414                 closeversion(rbtiterator->common.db,
7415                              &rbtiterator->common.version, ISC_FALSE);
7416         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7417         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7418                     sizeof(*rbtiterator));
7419
7420         *iteratorp = NULL;
7421 }
7422
7423 static isc_result_t
7424 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7425         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7426         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7427         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7428         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7429         rdatasetheader_t *header, *top_next;
7430         rbtdb_serial_t serial;
7431         isc_stdtime_t now;
7432
7433         if (IS_CACHE(rbtdb)) {
7434                 serial = 1;
7435                 now = rbtiterator->common.now;
7436         } else {
7437                 serial = rbtversion->serial;
7438                 now = 0;
7439         }
7440
7441         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7442                   isc_rwlocktype_read);
7443
7444         for (header = rbtnode->data; header != NULL; header = top_next) {
7445                 top_next = header->next;
7446                 do {
7447                         if (header->serial <= serial && !IGNORE(header)) {
7448                                 /*
7449                                  * Is this a "this rdataset doesn't exist"
7450                                  * record?  Or is it too old in the cache?
7451                                  *
7452                                  * Note: unlike everywhere else, we
7453                                  * check for now > header->rdh_ttl instead
7454                                  * of now >= header->rdh_ttl.  This allows
7455                                  * ANY and RRSIG queries for 0 TTL
7456                                  * rdatasets to work.
7457                                  */
7458                                 if (NONEXISTENT(header) ||
7459                                     (now != 0 && now > header->rdh_ttl))
7460                                         header = NULL;
7461                                 break;
7462                         } else
7463                                 header = header->down;
7464                 } while (header != NULL);
7465                 if (header != NULL)
7466                         break;
7467         }
7468
7469         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7470                     isc_rwlocktype_read);
7471
7472         rbtiterator->current = header;
7473
7474         if (header == NULL)
7475                 return (ISC_R_NOMORE);
7476
7477         return (ISC_R_SUCCESS);
7478 }
7479
7480 static isc_result_t
7481 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7482         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7483         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7484         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7485         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7486         rdatasetheader_t *header, *top_next;
7487         rbtdb_serial_t serial;
7488         isc_stdtime_t now;
7489         rbtdb_rdatatype_t type, negtype;
7490         dns_rdatatype_t rdtype, covers;
7491
7492         header = rbtiterator->current;
7493         if (header == NULL)
7494                 return (ISC_R_NOMORE);
7495
7496         if (IS_CACHE(rbtdb)) {
7497                 serial = 1;
7498                 now = rbtiterator->common.now;
7499         } else {
7500                 serial = rbtversion->serial;
7501                 now = 0;
7502         }
7503
7504         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7505                   isc_rwlocktype_read);
7506
7507         type = header->type;
7508         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7509         if (rdtype == 0) {
7510                 covers = RBTDB_RDATATYPE_EXT(header->type);
7511                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7512         } else
7513                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7514         for (header = header->next; header != NULL; header = top_next) {
7515                 top_next = header->next;
7516                 /*
7517                  * If not walking back up the down list.
7518                  */
7519                 if (header->type != type && header->type != negtype) {
7520                         do {
7521                                 if (header->serial <= serial &&
7522                                     !IGNORE(header)) {
7523                                         /*
7524                                          * Is this a "this rdataset doesn't
7525                                          * exist" record?
7526                                          *
7527                                          * Note: unlike everywhere else, we
7528                                          * check for now > header->ttl instead
7529                                          * of now >= header->ttl.  This allows
7530                                          * ANY and RRSIG queries for 0 TTL
7531                                          * rdatasets to work.
7532                                          */
7533                                         if ((header->attributes &
7534                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7535                                             (now != 0 && now > header->rdh_ttl))
7536                                                 header = NULL;
7537                                         break;
7538                                 } else
7539                                         header = header->down;
7540                         } while (header != NULL);
7541                         if (header != NULL)
7542                                 break;
7543                 }
7544         }
7545
7546         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7547                     isc_rwlocktype_read);
7548
7549         rbtiterator->current = header;
7550
7551         if (header == NULL)
7552                 return (ISC_R_NOMORE);
7553
7554         return (ISC_R_SUCCESS);
7555 }
7556
7557 static void
7558 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7559         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7560         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7561         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7562         rdatasetheader_t *header;
7563
7564         header = rbtiterator->current;
7565         REQUIRE(header != NULL);
7566
7567         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7568                   isc_rwlocktype_read);
7569
7570         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7571                       rdataset);
7572
7573         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7574                     isc_rwlocktype_read);
7575 }
7576
7577
7578 /*
7579  * Database Iterator Methods
7580  */
7581
7582 static inline void
7583 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7584         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7585         dns_rbtnode_t *node = rbtdbiter->node;
7586
7587         if (node == NULL)
7588                 return;
7589
7590         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7591         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7592 }
7593
7594 static inline void
7595 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7596         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7597         dns_rbtnode_t *node = rbtdbiter->node;
7598         nodelock_t *lock;
7599
7600         if (node == NULL)
7601                 return;
7602
7603         lock = &rbtdb->node_locks[node->locknum].lock;
7604         NODE_LOCK(lock, isc_rwlocktype_read);
7605         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7606                             rbtdbiter->tree_locked, ISC_FALSE);
7607         NODE_UNLOCK(lock, isc_rwlocktype_read);
7608
7609         rbtdbiter->node = NULL;
7610 }
7611
7612 static void
7613 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7614         dns_rbtnode_t *node;
7615         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7616         isc_boolean_t was_read_locked = ISC_FALSE;
7617         nodelock_t *lock;
7618         int i;
7619
7620         if (rbtdbiter->delete != 0) {
7621                 /*
7622                  * Note that "%d node of %d in tree" can report things like
7623                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7624                  * That some nodes appear on the deletions list more than
7625                  * once.  Only the last occurence will actually be deleted.
7626                  */
7627                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7628                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7629                               "flush_deletions: %d nodes of %d in tree",
7630                               rbtdbiter->delete,
7631                               dns_rbt_nodecount(rbtdb->tree));
7632
7633                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7634                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7635                         was_read_locked = ISC_TRUE;
7636                 }
7637                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7638                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7639
7640                 for (i = 0; i < rbtdbiter->delete; i++) {
7641                         node = rbtdbiter->deletions[i];
7642                         lock = &rbtdb->node_locks[node->locknum].lock;
7643
7644                         NODE_LOCK(lock, isc_rwlocktype_read);
7645                         decrement_reference(rbtdb, node, 0,
7646                                             isc_rwlocktype_read,
7647                                             rbtdbiter->tree_locked, ISC_FALSE);
7648                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7649                 }
7650
7651                 rbtdbiter->delete = 0;
7652
7653                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7654                 if (was_read_locked) {
7655                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7656                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7657
7658                 } else {
7659                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7660                 }
7661         }
7662 }
7663
7664 static inline void
7665 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7666         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7667
7668         REQUIRE(rbtdbiter->paused);
7669         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7670
7671         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7672         rbtdbiter->tree_locked = isc_rwlocktype_read;
7673
7674         rbtdbiter->paused = ISC_FALSE;
7675 }
7676
7677 static void
7678 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7679         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7680         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7681         dns_db_t *db = NULL;
7682
7683         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7684                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7685                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7686         } else
7687                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7688
7689         dereference_iter_node(rbtdbiter);
7690
7691         flush_deletions(rbtdbiter);
7692
7693         dns_db_attach(rbtdbiter->common.db, &db);
7694         dns_db_detach(&rbtdbiter->common.db);
7695
7696         dns_rbtnodechain_reset(&rbtdbiter->chain);
7697         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7698         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7699         dns_db_detach(&db);
7700
7701         *iteratorp = NULL;
7702 }
7703
7704 static isc_result_t
7705 dbiterator_first(dns_dbiterator_t *iterator) {
7706         isc_result_t result;
7707         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7708         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7709         dns_name_t *name, *origin;
7710
7711         if (rbtdbiter->result != ISC_R_SUCCESS &&
7712             rbtdbiter->result != ISC_R_NOMORE)
7713                 return (rbtdbiter->result);
7714
7715         if (rbtdbiter->paused)
7716                 resume_iteration(rbtdbiter);
7717
7718         dereference_iter_node(rbtdbiter);
7719
7720         name = dns_fixedname_name(&rbtdbiter->name);
7721         origin = dns_fixedname_name(&rbtdbiter->origin);
7722         dns_rbtnodechain_reset(&rbtdbiter->chain);
7723         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7724
7725         if (rbtdbiter->nsec3only) {
7726                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7727                 result = dns_rbtnodechain_first(rbtdbiter->current,
7728                                                 rbtdb->nsec3, name, origin);
7729         } else {
7730                 rbtdbiter->current = &rbtdbiter->chain;
7731                 result = dns_rbtnodechain_first(rbtdbiter->current,
7732                                                 rbtdb->tree, name, origin);
7733                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7734                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7735                         result = dns_rbtnodechain_first(rbtdbiter->current,
7736                                                         rbtdb->nsec3, name,
7737                                                         origin);
7738                 }
7739         }
7740         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7741                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7742                                                   NULL, &rbtdbiter->node);
7743                 if (result == ISC_R_SUCCESS) {
7744                         rbtdbiter->new_origin = ISC_TRUE;
7745                         reference_iter_node(rbtdbiter);
7746                 }
7747         } else {
7748                 INSIST(result == ISC_R_NOTFOUND);
7749                 result = ISC_R_NOMORE; /* The tree is empty. */
7750         }
7751
7752         rbtdbiter->result = result;
7753
7754         return (result);
7755 }
7756
7757 static isc_result_t
7758 dbiterator_last(dns_dbiterator_t *iterator) {
7759         isc_result_t result;
7760         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7761         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7762         dns_name_t *name, *origin;
7763
7764         if (rbtdbiter->result != ISC_R_SUCCESS &&
7765             rbtdbiter->result != ISC_R_NOMORE)
7766                 return (rbtdbiter->result);
7767
7768         if (rbtdbiter->paused)
7769                 resume_iteration(rbtdbiter);
7770
7771         dereference_iter_node(rbtdbiter);
7772
7773         name = dns_fixedname_name(&rbtdbiter->name);
7774         origin = dns_fixedname_name(&rbtdbiter->origin);
7775         dns_rbtnodechain_reset(&rbtdbiter->chain);
7776         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7777
7778         result = ISC_R_NOTFOUND;
7779         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7780                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7781                 result = dns_rbtnodechain_last(rbtdbiter->current,
7782                                                rbtdb->nsec3, name, origin);
7783         }
7784         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7785                 rbtdbiter->current = &rbtdbiter->chain;
7786                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7787                                                name, origin);
7788         }
7789         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7790                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7791                                                   NULL, &rbtdbiter->node);
7792                 if (result == ISC_R_SUCCESS) {
7793                         rbtdbiter->new_origin = ISC_TRUE;
7794                         reference_iter_node(rbtdbiter);
7795                 }
7796         } else {
7797                 INSIST(result == ISC_R_NOTFOUND);
7798                 result = ISC_R_NOMORE; /* The tree is empty. */
7799         }
7800
7801         rbtdbiter->result = result;
7802
7803         return (result);
7804 }
7805
7806 static isc_result_t
7807 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7808         isc_result_t result;
7809         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7810         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7811         dns_name_t *iname, *origin;
7812
7813         if (rbtdbiter->result != ISC_R_SUCCESS &&
7814             rbtdbiter->result != ISC_R_NOTFOUND &&
7815             rbtdbiter->result != ISC_R_NOMORE)
7816                 return (rbtdbiter->result);
7817
7818         if (rbtdbiter->paused)
7819                 resume_iteration(rbtdbiter);
7820
7821         dereference_iter_node(rbtdbiter);
7822
7823         iname = dns_fixedname_name(&rbtdbiter->name);
7824         origin = dns_fixedname_name(&rbtdbiter->origin);
7825         dns_rbtnodechain_reset(&rbtdbiter->chain);
7826         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7827
7828         if (rbtdbiter->nsec3only) {
7829                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7830                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7831                                           &rbtdbiter->node,
7832                                           rbtdbiter->current,
7833                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7834         } else if (rbtdbiter->nonsec3) {
7835                 rbtdbiter->current = &rbtdbiter->chain;
7836                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7837                                           &rbtdbiter->node,
7838                                           rbtdbiter->current,
7839                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7840         } else {
7841                 /*
7842                  * Stay on main chain if not found on either chain.
7843                  */
7844                 rbtdbiter->current = &rbtdbiter->chain;
7845                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7846                                           &rbtdbiter->node,
7847                                           rbtdbiter->current,
7848                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7849                 if (result == DNS_R_PARTIALMATCH) {
7850                         dns_rbtnode_t *node = NULL;
7851                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7852                                                   &node, &rbtdbiter->nsec3chain,
7853                                                   DNS_RBTFIND_EMPTYDATA,
7854                                                   NULL, NULL);
7855                         if (result == ISC_R_SUCCESS) {
7856                                 rbtdbiter->node = node;
7857                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7858                         }
7859                 }
7860         }
7861
7862 #if 1
7863         if (result == ISC_R_SUCCESS) {
7864                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7865                                                   origin, NULL);
7866                 if (result == ISC_R_SUCCESS) {
7867                         rbtdbiter->new_origin = ISC_TRUE;
7868                         reference_iter_node(rbtdbiter);
7869                 }
7870         } else if (result == DNS_R_PARTIALMATCH) {
7871                 result = ISC_R_NOTFOUND;
7872                 rbtdbiter->node = NULL;
7873         }
7874
7875         rbtdbiter->result = result;
7876 #else
7877         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7878                 isc_result_t tresult;
7879                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7880                                                    origin, NULL);
7881                 if (tresult == ISC_R_SUCCESS) {
7882                         rbtdbiter->new_origin = ISC_TRUE;
7883                         reference_iter_node(rbtdbiter);
7884                 } else {
7885                         result = tresult;
7886                         rbtdbiter->node = NULL;
7887                 }
7888         } else
7889                 rbtdbiter->node = NULL;
7890
7891         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7892                             ISC_R_SUCCESS : result;
7893 #endif
7894
7895         return (result);
7896 }
7897
7898 static isc_result_t
7899 dbiterator_prev(dns_dbiterator_t *iterator) {
7900         isc_result_t result;
7901         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7902         dns_name_t *name, *origin;
7903         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7904
7905         REQUIRE(rbtdbiter->node != NULL);
7906
7907         if (rbtdbiter->result != ISC_R_SUCCESS)
7908                 return (rbtdbiter->result);
7909
7910         if (rbtdbiter->paused)
7911                 resume_iteration(rbtdbiter);
7912
7913         name = dns_fixedname_name(&rbtdbiter->name);
7914         origin = dns_fixedname_name(&rbtdbiter->origin);
7915         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7916         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7917             !rbtdbiter->nonsec3 &&
7918             &rbtdbiter->nsec3chain == rbtdbiter->current) {
7919                 rbtdbiter->current = &rbtdbiter->chain;
7920                 dns_rbtnodechain_reset(rbtdbiter->current);
7921                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7922                                                name, origin);
7923                 if (result == ISC_R_NOTFOUND)
7924                         result = ISC_R_NOMORE;
7925         }
7926
7927         dereference_iter_node(rbtdbiter);
7928
7929         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7930                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7931                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7932                                                   NULL, &rbtdbiter->node);
7933         }
7934
7935         if (result == ISC_R_SUCCESS)
7936                 reference_iter_node(rbtdbiter);
7937
7938         rbtdbiter->result = result;
7939
7940         return (result);
7941 }
7942
7943 static isc_result_t
7944 dbiterator_next(dns_dbiterator_t *iterator) {
7945         isc_result_t result;
7946         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7947         dns_name_t *name, *origin;
7948         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7949
7950         REQUIRE(rbtdbiter->node != NULL);
7951
7952         if (rbtdbiter->result != ISC_R_SUCCESS)
7953                 return (rbtdbiter->result);
7954
7955         if (rbtdbiter->paused)
7956                 resume_iteration(rbtdbiter);
7957
7958         name = dns_fixedname_name(&rbtdbiter->name);
7959         origin = dns_fixedname_name(&rbtdbiter->origin);
7960         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
7961         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7962             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
7963                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7964                 dns_rbtnodechain_reset(rbtdbiter->current);
7965                 result = dns_rbtnodechain_first(rbtdbiter->current,
7966                                                 rbtdb->nsec3, name, origin);
7967                 if (result == ISC_R_NOTFOUND)
7968                         result = ISC_R_NOMORE;
7969         }
7970
7971         dereference_iter_node(rbtdbiter);
7972
7973         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7974                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7975                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7976                                                   NULL, &rbtdbiter->node);
7977         }
7978         if (result == ISC_R_SUCCESS)
7979                 reference_iter_node(rbtdbiter);
7980
7981         rbtdbiter->result = result;
7982
7983         return (result);
7984 }
7985
7986 static isc_result_t
7987 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
7988                    dns_name_t *name)
7989 {
7990         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7991         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7992         dns_rbtnode_t *node = rbtdbiter->node;
7993         isc_result_t result;
7994         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
7995         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
7996
7997         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
7998         REQUIRE(rbtdbiter->node != NULL);
7999
8000         if (rbtdbiter->paused)
8001                 resume_iteration(rbtdbiter);
8002
8003         if (name != NULL) {
8004                 if (rbtdbiter->common.relative_names)
8005                         origin = NULL;
8006                 result = dns_name_concatenate(nodename, origin, name, NULL);
8007                 if (result != ISC_R_SUCCESS)
8008                         return (result);
8009                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8010                         result = DNS_R_NEWORIGIN;
8011         } else
8012                 result = ISC_R_SUCCESS;
8013
8014         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8015         new_reference(rbtdb, node);
8016         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8017
8018         *nodep = rbtdbiter->node;
8019
8020         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8021                 isc_result_t expire_result;
8022
8023                 /*
8024                  * If the deletion array is full, flush it before trying
8025                  * to expire the current node.  The current node can't
8026                  * fully deleted while the iteration cursor is still on it.
8027                  */
8028                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8029                         flush_deletions(rbtdbiter);
8030
8031                 expire_result = expirenode(iterator->db, *nodep, 0);
8032
8033                 /*
8034                  * expirenode() currently always returns success.
8035                  */
8036                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8037                         unsigned int refs;
8038
8039                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8040                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8041                         dns_rbtnode_refincrement(node, &refs);
8042                         INSIST(refs != 0);
8043                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8044                 }
8045         }
8046
8047         return (result);
8048 }
8049
8050 static isc_result_t
8051 dbiterator_pause(dns_dbiterator_t *iterator) {
8052         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8053         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8054
8055         if (rbtdbiter->result != ISC_R_SUCCESS &&
8056             rbtdbiter->result != ISC_R_NOMORE)
8057                 return (rbtdbiter->result);
8058
8059         if (rbtdbiter->paused)
8060                 return (ISC_R_SUCCESS);
8061
8062         rbtdbiter->paused = ISC_TRUE;
8063
8064         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8065                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8066                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8067                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8068         }
8069
8070         flush_deletions(rbtdbiter);
8071
8072         return (ISC_R_SUCCESS);
8073 }
8074
8075 static isc_result_t
8076 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8077         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8078         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8079
8080         if (rbtdbiter->result != ISC_R_SUCCESS)
8081                 return (rbtdbiter->result);
8082
8083         return (dns_name_copy(origin, name, NULL));
8084 }
8085
8086 /*%
8087  * Additional cache routines.
8088  */
8089 static isc_result_t
8090 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8091                        dns_rdatatype_t qtype, dns_acache_t *acache,
8092                        dns_zone_t **zonep, dns_db_t **dbp,
8093                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8094                        dns_name_t *fname, dns_message_t *msg,
8095                        isc_stdtime_t now)
8096 {
8097         dns_rbtdb_t *rbtdb = rdataset->private1;
8098         dns_rbtnode_t *rbtnode = rdataset->private2;
8099         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8100         unsigned int current_count = rdataset->privateuint4;
8101         unsigned int count;
8102         rdatasetheader_t *header;
8103         nodelock_t *nodelock;
8104         unsigned int total_count;
8105         acachectl_t *acarray;
8106         dns_acacheentry_t *entry;
8107         isc_result_t result;
8108
8109         UNUSED(qtype); /* we do not use this value at least for now */
8110         UNUSED(acache);
8111
8112         header = (struct rdatasetheader *)(raw - sizeof(*header));
8113
8114         total_count = raw[0] * 256 + raw[1];
8115         INSIST(total_count > current_count);
8116         count = total_count - current_count - 1;
8117
8118         acarray = NULL;
8119
8120         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8121         NODE_LOCK(nodelock, isc_rwlocktype_read);
8122
8123         switch (type) {
8124         case dns_rdatasetadditional_fromauth:
8125                 acarray = header->additional_auth;
8126                 break;
8127         case dns_rdatasetadditional_fromcache:
8128                 acarray = NULL;
8129                 break;
8130         case dns_rdatasetadditional_fromglue:
8131                 acarray = header->additional_glue;
8132                 break;
8133         default:
8134                 INSIST(0);
8135         }
8136
8137         if (acarray == NULL) {
8138                 if (type != dns_rdatasetadditional_fromcache)
8139                         dns_acache_countquerymiss(acache);
8140                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8141                 return (ISC_R_NOTFOUND);
8142         }
8143
8144         if (acarray[count].entry == NULL) {
8145                 dns_acache_countquerymiss(acache);
8146                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8147                 return (ISC_R_NOTFOUND);
8148         }
8149
8150         entry = NULL;
8151         dns_acache_attachentry(acarray[count].entry, &entry);
8152
8153         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8154
8155         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8156                                      nodep, fname, msg, now);
8157
8158         dns_acache_detachentry(&entry);
8159
8160         return (result);
8161 }
8162
8163 static void
8164 acache_callback(dns_acacheentry_t *entry, void **arg) {
8165         dns_rbtdb_t *rbtdb;
8166         dns_rbtnode_t *rbtnode;
8167         nodelock_t *nodelock;
8168         acachectl_t *acarray = NULL;
8169         acache_cbarg_t *cbarg;
8170         unsigned int count;
8171
8172         REQUIRE(arg != NULL);
8173         cbarg = *arg;
8174
8175         /*
8176          * The caller must hold the entry lock.
8177          */
8178
8179         rbtdb = (dns_rbtdb_t *)cbarg->db;
8180         rbtnode = (dns_rbtnode_t *)cbarg->node;
8181
8182         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8183         NODE_LOCK(nodelock, isc_rwlocktype_write);
8184
8185         switch (cbarg->type) {
8186         case dns_rdatasetadditional_fromauth:
8187                 acarray = cbarg->header->additional_auth;
8188                 break;
8189         case dns_rdatasetadditional_fromglue:
8190                 acarray = cbarg->header->additional_glue;
8191                 break;
8192         default:
8193                 INSIST(0);
8194         }
8195
8196         count = cbarg->count;
8197         if (acarray != NULL && acarray[count].entry == entry) {
8198                 acarray[count].entry = NULL;
8199                 INSIST(acarray[count].cbarg == cbarg);
8200                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8201                 acarray[count].cbarg = NULL;
8202         } else
8203                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8204
8205         dns_acache_detachentry(&entry);
8206
8207         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8208
8209         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8210         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8211
8212         *arg = NULL;
8213 }
8214
8215 static void
8216 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8217                       acache_cbarg_t **cbargp)
8218 {
8219         acache_cbarg_t *cbarg;
8220
8221         REQUIRE(mctx != NULL);
8222         REQUIRE(entry != NULL);
8223         REQUIRE(cbargp != NULL && *cbargp != NULL);
8224
8225         cbarg = *cbargp;
8226
8227         dns_acache_cancelentry(entry);
8228         dns_db_detachnode(cbarg->db, &cbarg->node);
8229         dns_db_detach(&cbarg->db);
8230
8231         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8232
8233         *cbargp = NULL;
8234 }
8235
8236 static isc_result_t
8237 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8238                        dns_rdatatype_t qtype, dns_acache_t *acache,
8239                        dns_zone_t *zone, dns_db_t *db,
8240                        dns_dbversion_t *version, dns_dbnode_t *node,
8241                        dns_name_t *fname)
8242 {
8243         dns_rbtdb_t *rbtdb = rdataset->private1;
8244         dns_rbtnode_t *rbtnode = rdataset->private2;
8245         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8246         unsigned int current_count = rdataset->privateuint4;
8247         rdatasetheader_t *header;
8248         unsigned int total_count, count;
8249         nodelock_t *nodelock;
8250         isc_result_t result;
8251         acachectl_t *acarray;
8252         dns_acacheentry_t *newentry, *oldentry = NULL;
8253         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8254
8255         UNUSED(qtype);
8256
8257         if (type == dns_rdatasetadditional_fromcache)
8258                 return (ISC_R_SUCCESS);
8259
8260         header = (struct rdatasetheader *)(raw - sizeof(*header));
8261
8262         total_count = raw[0] * 256 + raw[1];
8263         INSIST(total_count > current_count);
8264         count = total_count - current_count - 1; /* should be private data */
8265
8266         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8267         if (newcbarg == NULL)
8268                 return (ISC_R_NOMEMORY);
8269         newcbarg->type = type;
8270         newcbarg->count = count;
8271         newcbarg->header = header;
8272         newcbarg->db = NULL;
8273         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8274         newcbarg->node = NULL;
8275         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8276                           &newcbarg->node);
8277         newentry = NULL;
8278         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8279                                         acache_callback, newcbarg, &newentry);
8280         if (result != ISC_R_SUCCESS)
8281                 goto fail;
8282         /* Set cache data in the new entry. */
8283         result = dns_acache_setentry(acache, newentry, zone, db,
8284                                      version, node, fname);
8285         if (result != ISC_R_SUCCESS)
8286                 goto fail;
8287
8288         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8289         NODE_LOCK(nodelock, isc_rwlocktype_write);
8290
8291         acarray = NULL;
8292         switch (type) {
8293         case dns_rdatasetadditional_fromauth:
8294                 acarray = header->additional_auth;
8295                 break;
8296         case dns_rdatasetadditional_fromglue:
8297                 acarray = header->additional_glue;
8298                 break;
8299         default:
8300                 INSIST(0);
8301         }
8302
8303         if (acarray == NULL) {
8304                 unsigned int i;
8305
8306                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8307                                       sizeof(acachectl_t));
8308
8309                 if (acarray == NULL) {
8310                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8311                         goto fail;
8312                 }
8313
8314                 for (i = 0; i < total_count; i++) {
8315                         acarray[i].entry = NULL;
8316                         acarray[i].cbarg = NULL;
8317                 }
8318         }
8319         switch (type) {
8320         case dns_rdatasetadditional_fromauth:
8321                 header->additional_auth = acarray;
8322                 break;
8323         case dns_rdatasetadditional_fromglue:
8324                 header->additional_glue = acarray;
8325                 break;
8326         default:
8327                 INSIST(0);
8328         }
8329
8330         if (acarray[count].entry != NULL) {
8331                 /*
8332                  * Swap the entry.  Delay cleaning-up the old entry since
8333                  * it would require a node lock.
8334                  */
8335                 oldentry = acarray[count].entry;
8336                 INSIST(acarray[count].cbarg != NULL);
8337                 oldcbarg = acarray[count].cbarg;
8338         }
8339         acarray[count].entry = newentry;
8340         acarray[count].cbarg = newcbarg;
8341
8342         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8343
8344         if (oldentry != NULL) {
8345                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8346                 dns_acache_detachentry(&oldentry);
8347         }
8348
8349         return (ISC_R_SUCCESS);
8350
8351  fail:
8352         if (newcbarg != NULL) {
8353                 if (newentry != NULL) {
8354                         acache_cancelentry(rbtdb->common.mctx, newentry,
8355                                            &newcbarg);
8356                         dns_acache_detachentry(&newentry);
8357                 } else {
8358                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8359                         dns_db_detach(&newcbarg->db);
8360                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8361                             sizeof(*newcbarg));
8362                 }
8363         }
8364
8365         return (result);
8366 }
8367
8368 static isc_result_t
8369 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8370                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8371 {
8372         dns_rbtdb_t *rbtdb = rdataset->private1;
8373         dns_rbtnode_t *rbtnode = rdataset->private2;
8374         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8375         unsigned int current_count = rdataset->privateuint4;
8376         rdatasetheader_t *header;
8377         nodelock_t *nodelock;
8378         unsigned int total_count, count;
8379         acachectl_t *acarray;
8380         dns_acacheentry_t *entry;
8381         acache_cbarg_t *cbarg;
8382
8383         UNUSED(qtype);          /* we do not use this value at least for now */
8384         UNUSED(acache);
8385
8386         if (type == dns_rdatasetadditional_fromcache)
8387                 return (ISC_R_SUCCESS);
8388
8389         header = (struct rdatasetheader *)(raw - sizeof(*header));
8390
8391         total_count = raw[0] * 256 + raw[1];
8392         INSIST(total_count > current_count);
8393         count = total_count - current_count - 1;
8394
8395         acarray = NULL;
8396         entry = NULL;
8397
8398         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8399         NODE_LOCK(nodelock, isc_rwlocktype_write);
8400
8401         switch (type) {
8402         case dns_rdatasetadditional_fromauth:
8403                 acarray = header->additional_auth;
8404                 break;
8405         case dns_rdatasetadditional_fromglue:
8406                 acarray = header->additional_glue;
8407                 break;
8408         default:
8409                 INSIST(0);
8410         }
8411
8412         if (acarray == NULL) {
8413                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8414                 return (ISC_R_NOTFOUND);
8415         }
8416
8417         entry = acarray[count].entry;
8418         if (entry == NULL) {
8419                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8420                 return (ISC_R_NOTFOUND);
8421         }
8422
8423         acarray[count].entry = NULL;
8424         cbarg = acarray[count].cbarg;
8425         acarray[count].cbarg = NULL;
8426
8427         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8428
8429         if (entry != NULL) {
8430                 if (cbarg != NULL)
8431                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8432                 dns_acache_detachentry(&entry);
8433         }
8434
8435         return (ISC_R_SUCCESS);
8436 }
8437
8438 /*%
8439  * Routines for LRU-based cache management.
8440  */
8441
8442 /*%
8443  * See if a given cache entry that is being reused needs to be updated
8444  * in the LRU-list.  From the LRU management point of view, this function is
8445  * expected to return true for almost all cases.  When used with threads,
8446  * however, this may cause a non-negligible performance penalty because a
8447  * writer lock will have to be acquired before updating the list.
8448  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8449  * function returns true if the entry has not been updated for some period of
8450  * time.  We differentiate the NS or glue address case and the others since
8451  * experiments have shown that the former tends to be accessed relatively
8452  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8453  * may cause external queries at a higher level zone, involving more
8454  * transactions).
8455  *
8456  * Caller must hold the node (read or write) lock.
8457  */
8458 static inline isc_boolean_t
8459 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8460         if ((header->attributes &
8461              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8462                 return (ISC_FALSE);
8463
8464 #if DNS_RBTDB_LIMITLRUUPDATE
8465         if (header->type == dns_rdatatype_ns ||
8466             (header->trust == dns_trust_glue &&
8467              (header->type == dns_rdatatype_a ||
8468               header->type == dns_rdatatype_aaaa))) {
8469                 /*
8470                  * Glue records are updated if at least 60 seconds have passed
8471                  * since the previous update time.
8472                  */
8473                 return (header->last_used + 60 <= now);
8474         }
8475
8476         /* Other records are updated if 5 minutes have passed. */
8477         return (header->last_used + 300 <= now);
8478 #else
8479         UNUSED(now);
8480
8481         return (ISC_TRUE);
8482 #endif
8483 }
8484
8485 /*%
8486  * Update the timestamp of a given cache entry and move it to the head
8487  * of the corresponding LRU list.
8488  *
8489  * Caller must hold the node (write) lock.
8490  *
8491  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8492  */
8493 static void
8494 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8495               isc_stdtime_t now)
8496 {
8497         INSIST(IS_CACHE(rbtdb));
8498
8499         /* To be checked: can we really assume this? XXXMLG */
8500         INSIST(ISC_LINK_LINKED(header, lru_link));
8501
8502         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum],
8503                         header, lru_link);
8504         header->last_used = now;
8505         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum],
8506                          header, lru_link);
8507 }
8508
8509 /*%
8510  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8511  * under an overmem condition.  To recover from this condition quickly, up to
8512  * 2 entries will be purged.  This process is triggered while adding a new
8513  * entry, and we specifically avoid purging entries in the same LRU bucket as
8514  * the one to which the new entry will belong.  Otherwise, we might purge
8515  * entries of the same name of different RR types while adding RRsets from a
8516  * single response (consider the case where we're adding A and AAAA glue records
8517  * of the same NS name).
8518  */
8519 static void
8520 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8521               isc_stdtime_t now, isc_boolean_t tree_locked)
8522 {
8523         rdatasetheader_t *header, *header_prev;
8524         unsigned int locknum;
8525         int purgecount = 2;
8526
8527         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8528              locknum != locknum_start && purgecount > 0;
8529              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8530                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8531                           isc_rwlocktype_write);
8532
8533                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8534                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8535                         expire_header(rbtdb, header, tree_locked);
8536                         purgecount--;
8537                 }
8538
8539                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8540                      header != NULL && purgecount > 0;
8541                      header = header_prev) {
8542                         header_prev = ISC_LIST_PREV(header, lru_link);
8543                         /*
8544                          * Unlink the entry at this point to avoid checking it
8545                          * again even if it's currently used someone else and
8546                          * cannot be purged at this moment.  This entry won't be
8547                          * referenced any more (so unlinking is safe) since the
8548                          * TTL was reset to 0.
8549                          */
8550                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8551                                         lru_link);
8552                         expire_header(rbtdb, header, tree_locked);
8553                         purgecount--;
8554                 }
8555
8556                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8557                                     isc_rwlocktype_write);
8558         }
8559 }
8560
8561 static void
8562 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8563               isc_boolean_t tree_locked)
8564 {
8565         set_ttl(rbtdb, header, 0);
8566         header->attributes |= RDATASET_ATTR_STALE;
8567         header->node->dirty = 1;
8568
8569         /*
8570          * Caller must hold the node (write) lock.
8571          */
8572
8573         if (dns_rbtnode_refcurrent(header->node) == 0) {
8574                 /*
8575                  * If no one else is using the node, we can clean it up now.
8576                  * We first need to gain a new reference to the node to meet a
8577                  * requirement of decrement_reference().
8578                  */
8579                 new_reference(rbtdb, header->node);
8580                 decrement_reference(rbtdb, header->node, 0,
8581                                     isc_rwlocktype_write,
8582                                     tree_locked ? isc_rwlocktype_write :
8583                                     isc_rwlocktype_none, ISC_FALSE);
8584         }
8585 }