]> CyberLeo.Net >> Repos - FreeBSD/releng/8.0.git/blob - contrib/bind9/lib/dns/rbtdb.c
Fix BIND named(8) cache poisoning with DNSSEC validation.
[FreeBSD/releng/8.0.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2009  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.6 2009/05/06 23:34:30 jinmei Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) lru_link;
262         /*%<
263          * Used for LRU-based cache management.  We should probably make
264          * these cache-DB specific.  We might also make it a pointer and
265          * ensure only the top header has a valid link to save memory.
266          * The linked-list is locked by the rbtdb->lrulock.
267          */
268
269         /*
270          * It's possible this should not be here anymore, but instead
271          * referenced from the bucket's heap directly.
272          */
273 #if 0
274         isc_heap_t                      *heap;
275 #endif
276         unsigned int                    heap_index;
277         /*%<
278          * Used for TTL-based cache cleaning.
279          */
280         isc_stdtime_t                   resign;
281 } rdatasetheader_t;
282
283 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
284 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
285
286 #define RDATASET_ATTR_NONEXISTENT       0x0001
287 #define RDATASET_ATTR_STALE             0x0002
288 #define RDATASET_ATTR_IGNORE            0x0004
289 #define RDATASET_ATTR_RETAIN            0x0008
290 #define RDATASET_ATTR_NXDOMAIN          0x0010
291 #define RDATASET_ATTR_RESIGN            0x0020
292 #define RDATASET_ATTR_STATCOUNT         0x0040
293 #define RDATASET_ATTR_OPTOUT            0x0080
294
295 typedef struct acache_cbarg {
296         dns_rdatasetadditional_t        type;
297         unsigned int                    count;
298         dns_db_t                        *db;
299         dns_dbnode_t                    *node;
300         rdatasetheader_t                *header;
301 } acache_cbarg_t;
302
303 struct acachectl {
304         dns_acacheentry_t               *entry;
305         acache_cbarg_t                  *cbarg;
306 };
307
308 /*
309  * XXX
310  * When the cache will pre-expire data (due to memory low or other
311  * situations) before the rdataset's TTL has expired, it MUST
312  * respect the RETAIN bit and not expire the data until its TTL is
313  * expired.
314  */
315
316 #undef IGNORE                   /* WIN32 winbase.h defines this. */
317
318 #define EXISTS(header) \
319         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
320 #define NONEXISTENT(header) \
321         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
322 #define IGNORE(header) \
323         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
324 #define RETAIN(header) \
325         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
326 #define NXDOMAIN(header) \
327         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
328 #define RESIGN(header) \
329         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
330 #define OPTOUT(header) \
331         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
332
333 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
334
335 /*%
336  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
337  * There is a tradeoff issue about configuring this value: if this is too
338  * small, it may cause heavier contention between threads; if this is too large,
339  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
340  * The default value should work well for most environments, but this can
341  * also be configurable at compilation time via the
342  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
343  * 1 due to the assumption of overmem_purge().
344  */
345 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
346 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
347 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
348 #else
349 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
350 #endif
351 #else
352 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
353 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
354
355 typedef struct {
356         nodelock_t                      lock;
357         /* Protected in the refcount routines. */
358         isc_refcount_t                  references;
359         /* Locked by lock. */
360         isc_boolean_t                   exiting;
361 } rbtdb_nodelock_t;
362
363 typedef struct rbtdb_changed {
364         dns_rbtnode_t *                 node;
365         isc_boolean_t                   dirty;
366         ISC_LINK(struct rbtdb_changed)  link;
367 } rbtdb_changed_t;
368
369 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
370
371 typedef enum {
372         dns_db_insecure,
373         dns_db_partial,
374         dns_db_secure
375 } dns_db_secure_t;
376
377 typedef struct rbtdb_version {
378         /* Not locked */
379         rbtdb_serial_t                  serial;
380         /*
381          * Protected in the refcount routines.
382          * XXXJT: should we change the lock policy based on the refcount
383          * performance?
384          */
385         isc_refcount_t                  references;
386         /* Locked by database lock. */
387         isc_boolean_t                   writer;
388         isc_boolean_t                   commit_ok;
389         rbtdb_changedlist_t             changed_list;
390         rdatasetheaderlist_t            resigned_list;
391         ISC_LINK(struct rbtdb_version)  link;
392         dns_db_secure_t                 secure;
393         isc_boolean_t                   havensec3;
394         /* NSEC3 parameters */
395         dns_hash_t                      hash;
396         isc_uint8_t                     flags;
397         isc_uint16_t                    iterations;
398         isc_uint8_t                     salt_length;
399         unsigned char                   salt[NSEC3_MAX_HASH_LENGTH];
400 } rbtdb_version_t;
401
402 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
403
404 typedef struct {
405         /* Unlocked. */
406         dns_db_t                        common;
407 #if DNS_RBTDB_USERWLOCK
408         isc_rwlock_t                    lock;
409 #else
410         isc_mutex_t                     lock;
411 #endif
412         isc_rwlock_t                    tree_lock;
413         unsigned int                    node_lock_count;
414         rbtdb_nodelock_t *              node_locks;
415         dns_rbtnode_t *                 origin_node;
416         dns_stats_t *                   rrsetstats; /* cache DB only */
417         /* Locked by lock. */
418         unsigned int                    active;
419         isc_refcount_t                  references;
420         unsigned int                    attributes;
421         rbtdb_serial_t                  current_serial;
422         rbtdb_serial_t                  least_serial;
423         rbtdb_serial_t                  next_serial;
424         rbtdb_version_t *               current_version;
425         rbtdb_version_t *               future_version;
426         rbtdb_versionlist_t             open_versions;
427         isc_boolean_t                   overmem;
428         isc_task_t *                    task;
429         dns_dbnode_t                    *soanode;
430         dns_dbnode_t                    *nsnode;
431
432         /*
433          * This is a linked list used to implement the LRU cache.  There will
434          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
435          * placed on the linked list rdatasets[1].
436          */
437         rdatasetheaderlist_t            *rdatasets;
438
439         /*%
440          * Temporary storage for stale cache nodes and dynamically deleted
441          * nodes that await being cleaned up.
442          */
443         rbtnodelist_t                   *deadnodes;
444
445         /*
446          * Heaps.  Each of these is used for TTL based expiry.
447          */
448         isc_heap_t                      **heaps;
449
450         /* Locked by tree_lock. */
451         dns_rbt_t *                     tree;
452         dns_rbt_t *                     nsec3;
453
454         /* Unlocked */
455         unsigned int                    quantum;
456 } dns_rbtdb_t;
457
458 #define RBTDB_ATTR_LOADED               0x01
459 #define RBTDB_ATTR_LOADING              0x02
460
461 /*%
462  * Search Context
463  */
464 typedef struct {
465         dns_rbtdb_t *           rbtdb;
466         rbtdb_version_t *       rbtversion;
467         rbtdb_serial_t          serial;
468         unsigned int            options;
469         dns_rbtnodechain_t      chain;
470         isc_boolean_t           copy_name;
471         isc_boolean_t           need_cleanup;
472         isc_boolean_t           wild;
473         dns_rbtnode_t *         zonecut;
474         rdatasetheader_t *      zonecut_rdataset;
475         rdatasetheader_t *      zonecut_sigrdataset;
476         dns_fixedname_t         zonecut_name;
477         isc_stdtime_t           now;
478 } rbtdb_search_t;
479
480 /*%
481  * Load Context
482  */
483 typedef struct {
484         dns_rbtdb_t *           rbtdb;
485         isc_stdtime_t           now;
486 } rbtdb_load_t;
487
488 static void rdataset_disassociate(dns_rdataset_t *rdataset);
489 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
490 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
491 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
492 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
493 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
494 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
495                                         dns_name_t *name,
496                                         dns_rdataset_t *neg,
497                                         dns_rdataset_t *negsig);
498 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
499                                         dns_name_t *name,
500                                         dns_rdataset_t *neg,
501                                         dns_rdataset_t *negsig);
502 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
503                                            dns_rdatasetadditional_t type,
504                                            dns_rdatatype_t qtype,
505                                            dns_acache_t *acache,
506                                            dns_zone_t **zonep,
507                                            dns_db_t **dbp,
508                                            dns_dbversion_t **versionp,
509                                            dns_dbnode_t **nodep,
510                                            dns_name_t *fname,
511                                            dns_message_t *msg,
512                                            isc_stdtime_t now);
513 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
514                                            dns_rdatasetadditional_t type,
515                                            dns_rdatatype_t qtype,
516                                            dns_acache_t *acache,
517                                            dns_zone_t *zone,
518                                            dns_db_t *db,
519                                            dns_dbversion_t *version,
520                                            dns_dbnode_t *node,
521                                            dns_name_t *fname);
522 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
523                                            dns_rdataset_t *rdataset,
524                                            dns_rdatasetadditional_t type,
525                                            dns_rdatatype_t qtype);
526 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
527                                               isc_stdtime_t now);
528 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
529                           isc_stdtime_t now);
530 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
531                           isc_boolean_t tree_locked);
532 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
533                           isc_stdtime_t now, isc_boolean_t tree_locked);
534 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
535                                   rdatasetheader_t *newheader);
536 static void prune_tree(isc_task_t *task, isc_event_t *event);
537
538 static dns_rdatasetmethods_t rdataset_methods = {
539         rdataset_disassociate,
540         rdataset_first,
541         rdataset_next,
542         rdataset_current,
543         rdataset_clone,
544         rdataset_count,
545         NULL,
546         rdataset_getnoqname,
547         NULL,
548         rdataset_getclosest,
549         rdataset_getadditional,
550         rdataset_setadditional,
551         rdataset_putadditional
552 };
553
554 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
555 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
556 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
557 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
558                                  dns_rdataset_t *rdataset);
559
560 static dns_rdatasetitermethods_t rdatasetiter_methods = {
561         rdatasetiter_destroy,
562         rdatasetiter_first,
563         rdatasetiter_next,
564         rdatasetiter_current
565 };
566
567 typedef struct rbtdb_rdatasetiter {
568         dns_rdatasetiter_t              common;
569         rdatasetheader_t *              current;
570 } rbtdb_rdatasetiter_t;
571
572 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
573 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
574 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
575 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
576                                         dns_name_t *name);
577 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
578 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
579 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
580                                            dns_dbnode_t **nodep,
581                                            dns_name_t *name);
582 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
583 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
584                                           dns_name_t *name);
585
586 static dns_dbiteratormethods_t dbiterator_methods = {
587         dbiterator_destroy,
588         dbiterator_first,
589         dbiterator_last,
590         dbiterator_seek,
591         dbiterator_prev,
592         dbiterator_next,
593         dbiterator_current,
594         dbiterator_pause,
595         dbiterator_origin
596 };
597
598 #define DELETION_BATCH_MAX 64
599
600 /*
601  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
602  */
603 typedef struct rbtdb_dbiterator {
604         dns_dbiterator_t                common;
605         isc_boolean_t                   paused;
606         isc_boolean_t                   new_origin;
607         isc_rwlocktype_t                tree_locked;
608         isc_result_t                    result;
609         dns_fixedname_t                 name;
610         dns_fixedname_t                 origin;
611         dns_rbtnodechain_t              chain;
612         dns_rbtnodechain_t              nsec3chain;
613         dns_rbtnodechain_t              *current;
614         dns_rbtnode_t                   *node;
615         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
616         int                             delete;
617         isc_boolean_t                   nsec3only;
618         isc_boolean_t                   nonsec3;
619 } rbtdb_dbiterator_t;
620
621
622 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
623 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
624
625 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
626                        isc_event_t *event);
627 static void overmem(dns_db_t *db, isc_boolean_t overmem);
628 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
629                                isc_boolean_t *nsec3createflag);
630
631 /*%
632  * 'init_count' is used to initialize 'newheader->count' which inturn
633  * is used to determine where in the cycle rrset-order cyclic starts.
634  * We don't lock this as we don't care about simultaneous updates.
635  *
636  * Note:
637  *      Both init_count and header->count can be ISC_UINT32_MAX.
638  *      The count on the returned rdataset however can't be as
639  *      that indicates that the database does not implement cyclic
640  *      processing.
641  */
642 static unsigned int init_count;
643
644 /*
645  * Locking
646  *
647  * If a routine is going to lock more than one lock in this module, then
648  * the locking must be done in the following order:
649  *
650  *      Tree Lock
651  *
652  *      Node Lock       (Only one from the set may be locked at one time by
653  *                       any caller)
654  *
655  *      Database Lock
656  *
657  * Failure to follow this hierarchy can result in deadlock.
658  */
659
660 /*
661  * Deleting Nodes
662  *
663  * For zone databases the node for the origin of the zone MUST NOT be deleted.
664  */
665
666
667 /*
668  * DB Routines
669  */
670
671 static void
672 attach(dns_db_t *source, dns_db_t **targetp) {
673         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
674
675         REQUIRE(VALID_RBTDB(rbtdb));
676
677         isc_refcount_increment(&rbtdb->references, NULL);
678
679         *targetp = source;
680 }
681
682 static void
683 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
684         dns_rbtdb_t *rbtdb = event->ev_arg;
685
686         UNUSED(task);
687
688         free_rbtdb(rbtdb, ISC_TRUE, event);
689 }
690
691 static void
692 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
693                   isc_boolean_t increment)
694 {
695         dns_rdatastatstype_t statattributes = 0;
696         dns_rdatastatstype_t base = 0;
697         dns_rdatastatstype_t type;
698
699         /* At the moment we count statistics only for cache DB */
700         INSIST(IS_CACHE(rbtdb));
701
702         if (NXDOMAIN(header))
703                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
704         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
705                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
706                 base = RBTDB_RDATATYPE_EXT(header->type);
707         } else
708                 base = RBTDB_RDATATYPE_BASE(header->type);
709
710         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
711         if (increment)
712                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
713         else
714                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
715 }
716
717 static void
718 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
719         int idx;
720         isc_heap_t *heap;
721         dns_ttl_t oldttl;
722
723         oldttl = header->rdh_ttl;
724         header->rdh_ttl = newttl;
725
726         if (!IS_CACHE(rbtdb))
727                 return;
728
729         /*
730          * It's possible the rbtdb is not a cache.  If this is the case,
731          * we will not have a heap, and we move on.  If we do, though,
732          * we might need to adjust things.
733          */
734         if (header->heap_index == 0 || newttl == oldttl)
735                 return;
736         idx = header->node->locknum;
737         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
738             return;
739         heap = rbtdb->heaps[idx];
740
741         if (newttl < oldttl)
742                 isc_heap_increased(heap, header->heap_index);
743         else
744                 isc_heap_decreased(heap, header->heap_index);
745 }
746
747 /*%
748  * These functions allow the heap code to rank the priority of each
749  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
750  */
751 static isc_boolean_t
752 ttl_sooner(void *v1, void *v2) {
753         rdatasetheader_t *h1 = v1;
754         rdatasetheader_t *h2 = v2;
755
756         if (h1->rdh_ttl < h2->rdh_ttl)
757                 return (ISC_TRUE);
758         return (ISC_FALSE);
759 }
760
761 static isc_boolean_t
762 resign_sooner(void *v1, void *v2) {
763         rdatasetheader_t *h1 = v1;
764         rdatasetheader_t *h2 = v2;
765
766         if (h1->resign < h2->resign)
767                 return (ISC_TRUE);
768         return (ISC_FALSE);
769 }
770
771 /*%
772  * This function sets the heap index into the header.
773  */
774 static void
775 set_index(void *what, unsigned int index) {
776         rdatasetheader_t *h = what;
777
778         h->heap_index = index;
779 }
780
781 /*%
782  * Work out how many nodes can be deleted in the time between two
783  * requests to the nameserver.  Smooth the resulting number and use it
784  * as a estimate for the number of nodes to be deleted in the next
785  * iteration.
786  */
787 static unsigned int
788 adjust_quantum(unsigned int old, isc_time_t *start) {
789         unsigned int pps = dns_pps;     /* packets per second */
790         unsigned int interval;
791         isc_uint64_t usecs;
792         isc_time_t end;
793         unsigned int new;
794
795         if (pps < 100)
796                 pps = 100;
797         isc_time_now(&end);
798
799         interval = 1000000 / pps;       /* interval in usec */
800         if (interval == 0)
801                 interval = 1;
802         usecs = isc_time_microdiff(&end, start);
803         if (usecs == 0) {
804                 /*
805                  * We were unable to measure the amount of time taken.
806                  * Double the nodes deleted next time.
807                  */
808                 old *= 2;
809                 if (old > 1000)
810                         old = 1000;
811                 return (old);
812         }
813         new = old * interval;
814         new /= (unsigned int)usecs;
815         if (new == 0)
816                 new = 1;
817         else if (new > 1000)
818                 new = 1000;
819
820         /* Smooth */
821         new = (new + old * 3) / 4;
822
823         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
824                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
825
826         return (new);
827 }
828
829 static void
830 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
831         unsigned int i;
832         isc_ondestroy_t ondest;
833         isc_result_t result;
834         char buf[DNS_NAME_FORMATSIZE];
835         isc_time_t start;
836
837         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
838                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
839
840         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
841         REQUIRE(rbtdb->future_version == NULL);
842
843         if (rbtdb->current_version != NULL) {
844                 unsigned int refs;
845
846                 isc_refcount_decrement(&rbtdb->current_version->references,
847                                        &refs);
848                 INSIST(refs == 0);
849                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
850                 isc_refcount_destroy(&rbtdb->current_version->references);
851                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
852                             sizeof(rbtdb_version_t));
853         }
854
855         /*
856          * We assume the number of remaining dead nodes is reasonably small;
857          * the overhead of unlinking all nodes here should be negligible.
858          */
859         for (i = 0; i < rbtdb->node_lock_count; i++) {
860                 dns_rbtnode_t *node;
861
862                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
863                 while (node != NULL) {
864                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
865                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
866                 }
867         }
868
869         if (event == NULL)
870                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
871  again:
872         if (rbtdb->tree != NULL) {
873                 isc_time_now(&start);
874                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
875                 if (result == ISC_R_QUOTA) {
876                         INSIST(rbtdb->task != NULL);
877                         if (rbtdb->quantum != 0)
878                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
879                                                                 &start);
880                         if (event == NULL)
881                                 event = isc_event_allocate(rbtdb->common.mctx,
882                                                            NULL,
883                                                          DNS_EVENT_FREESTORAGE,
884                                                            free_rbtdb_callback,
885                                                            rbtdb,
886                                                            sizeof(isc_event_t));
887                         if (event == NULL)
888                                 goto again;
889                         isc_task_send(rbtdb->task, &event);
890                         return;
891                 }
892                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
893         }
894
895         if (rbtdb->nsec3 != NULL) {
896                 isc_time_now(&start);
897                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
898                 if (result == ISC_R_QUOTA) {
899                         INSIST(rbtdb->task != NULL);
900                         if (rbtdb->quantum != 0)
901                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
902                                                                 &start);
903                         if (event == NULL)
904                                 event = isc_event_allocate(rbtdb->common.mctx,
905                                                            NULL,
906                                                          DNS_EVENT_FREESTORAGE,
907                                                            free_rbtdb_callback,
908                                                            rbtdb,
909                                                            sizeof(isc_event_t));
910                         if (event == NULL)
911                                 goto again;
912                         isc_task_send(rbtdb->task, &event);
913                         return;
914                 }
915                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
916         }
917
918         if (event != NULL)
919                 isc_event_free(&event);
920         if (log) {
921                 if (dns_name_dynamic(&rbtdb->common.origin))
922                         dns_name_format(&rbtdb->common.origin, buf,
923                                         sizeof(buf));
924                 else
925                         strcpy(buf, "<UNKNOWN>");
926                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
927                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
928                               "done free_rbtdb(%s)", buf);
929         }
930         if (dns_name_dynamic(&rbtdb->common.origin))
931                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
932         for (i = 0; i < rbtdb->node_lock_count; i++) {
933                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
934                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
935         }
936
937         /*
938          * Clean up LRU / re-signing order lists.
939          */
940         if (rbtdb->rdatasets != NULL) {
941                 for (i = 0; i < rbtdb->node_lock_count; i++)
942                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
943                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
944                             rbtdb->node_lock_count *
945                             sizeof(rdatasetheaderlist_t));
946         }
947         /*
948          * Clean up dead node buckets.
949          */
950         if (rbtdb->deadnodes != NULL) {
951                 for (i = 0; i < rbtdb->node_lock_count; i++)
952                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
953                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
954                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
955         }
956         /*
957          * Clean up heap objects.
958          */
959         if (rbtdb->heaps != NULL) {
960                 for (i = 0; i < rbtdb->node_lock_count; i++)
961                         isc_heap_destroy(&rbtdb->heaps[i]);
962                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
963                             rbtdb->node_lock_count *
964                             sizeof(isc_heap_t *));
965         }
966
967         if (rbtdb->rrsetstats != NULL)
968                 dns_stats_detach(&rbtdb->rrsetstats);
969
970         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
971                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
972         isc_rwlock_destroy(&rbtdb->tree_lock);
973         isc_refcount_destroy(&rbtdb->references);
974         if (rbtdb->task != NULL)
975                 isc_task_detach(&rbtdb->task);
976
977         RBTDB_DESTROYLOCK(&rbtdb->lock);
978         rbtdb->common.magic = 0;
979         rbtdb->common.impmagic = 0;
980         ondest = rbtdb->common.ondest;
981         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
982         isc_ondestroy_notify(&ondest, rbtdb);
983 }
984
985 static inline void
986 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
987         isc_boolean_t want_free = ISC_FALSE;
988         unsigned int i;
989         unsigned int inactive = 0;
990
991         /* XXX check for open versions here */
992
993         if (rbtdb->soanode != NULL)
994                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
995         if (rbtdb->nsnode != NULL)
996                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
997
998         /*
999          * Even though there are no external direct references, there still
1000          * may be nodes in use.
1001          */
1002         for (i = 0; i < rbtdb->node_lock_count; i++) {
1003                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1004                 rbtdb->node_locks[i].exiting = ISC_TRUE;
1005                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1006                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1007                     == 0) {
1008                         inactive++;
1009                 }
1010         }
1011
1012         if (inactive != 0) {
1013                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1014                 rbtdb->active -= inactive;
1015                 if (rbtdb->active == 0)
1016                         want_free = ISC_TRUE;
1017                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1018                 if (want_free) {
1019                         char buf[DNS_NAME_FORMATSIZE];
1020                         if (dns_name_dynamic(&rbtdb->common.origin))
1021                                 dns_name_format(&rbtdb->common.origin, buf,
1022                                                 sizeof(buf));
1023                         else
1024                                 strcpy(buf, "<UNKNOWN>");
1025                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1026                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1027                                       "calling free_rbtdb(%s)", buf);
1028                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1029                 }
1030         }
1031 }
1032
1033 static void
1034 detach(dns_db_t **dbp) {
1035         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1036         unsigned int refs;
1037
1038         REQUIRE(VALID_RBTDB(rbtdb));
1039
1040         isc_refcount_decrement(&rbtdb->references, &refs);
1041
1042         if (refs == 0)
1043                 maybe_free_rbtdb(rbtdb);
1044
1045         *dbp = NULL;
1046 }
1047
1048 static void
1049 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1050         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1051         rbtdb_version_t *version;
1052         unsigned int refs;
1053
1054         REQUIRE(VALID_RBTDB(rbtdb));
1055
1056         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1057         version = rbtdb->current_version;
1058         isc_refcount_increment(&version->references, &refs);
1059         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1060
1061         *versionp = (dns_dbversion_t *)version;
1062 }
1063
1064 static inline rbtdb_version_t *
1065 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1066                  unsigned int references, isc_boolean_t writer)
1067 {
1068         isc_result_t result;
1069         rbtdb_version_t *version;
1070
1071         version = isc_mem_get(mctx, sizeof(*version));
1072         if (version == NULL)
1073                 return (NULL);
1074         version->serial = serial;
1075         result = isc_refcount_init(&version->references, references);
1076         if (result != ISC_R_SUCCESS) {
1077                 isc_mem_put(mctx, version, sizeof(*version));
1078                 return (NULL);
1079         }
1080         version->writer = writer;
1081         version->commit_ok = ISC_FALSE;
1082         ISC_LIST_INIT(version->changed_list);
1083         ISC_LIST_INIT(version->resigned_list);
1084         ISC_LINK_INIT(version, link);
1085
1086         return (version);
1087 }
1088
1089 static isc_result_t
1090 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1091         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1092         rbtdb_version_t *version;
1093
1094         REQUIRE(VALID_RBTDB(rbtdb));
1095         REQUIRE(versionp != NULL && *versionp == NULL);
1096         REQUIRE(rbtdb->future_version == NULL);
1097
1098         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1099         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1100         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1101                                    ISC_TRUE);
1102         if (version != NULL) {
1103                 version->commit_ok = ISC_TRUE;
1104                 version->secure = rbtdb->current_version->secure;
1105                 version->havensec3 = rbtdb->current_version->havensec3;
1106                 if (version->havensec3) {
1107                         version->flags = rbtdb->current_version->flags;
1108                         version->iterations =
1109                                 rbtdb->current_version->iterations;
1110                         version->hash = rbtdb->current_version->hash;
1111                         version->salt_length =
1112                                 rbtdb->current_version->salt_length;
1113                         memcpy(version->salt, rbtdb->current_version->salt,
1114                                version->salt_length);
1115                 } else {
1116                         version->flags = 0;
1117                         version->iterations = 0;
1118                         version->hash = 0;
1119                         version->salt_length = 0;
1120                         memset(version->salt, 0, sizeof(version->salt));
1121                 }
1122                 rbtdb->next_serial++;
1123                 rbtdb->future_version = version;
1124         }
1125         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1126
1127         if (version == NULL)
1128                 return (ISC_R_NOMEMORY);
1129
1130         *versionp = version;
1131
1132         return (ISC_R_SUCCESS);
1133 }
1134
1135 static void
1136 attachversion(dns_db_t *db, dns_dbversion_t *source,
1137               dns_dbversion_t **targetp)
1138 {
1139         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1140         rbtdb_version_t *rbtversion = source;
1141         unsigned int refs;
1142
1143         REQUIRE(VALID_RBTDB(rbtdb));
1144
1145         isc_refcount_increment(&rbtversion->references, &refs);
1146         INSIST(refs > 1);
1147
1148         *targetp = rbtversion;
1149 }
1150
1151 static rbtdb_changed_t *
1152 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1153             dns_rbtnode_t *node)
1154 {
1155         rbtdb_changed_t *changed;
1156         unsigned int refs;
1157
1158         /*
1159          * Caller must be holding the node lock if its reference must be
1160          * protected by the lock.
1161          */
1162
1163         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1164
1165         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1166
1167         REQUIRE(version->writer);
1168
1169         if (changed != NULL) {
1170                 dns_rbtnode_refincrement(node, &refs);
1171                 INSIST(refs != 0);
1172                 changed->node = node;
1173                 changed->dirty = ISC_FALSE;
1174                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1175         } else
1176                 version->commit_ok = ISC_FALSE;
1177
1178         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1179
1180         return (changed);
1181 }
1182
1183 static void
1184 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1185                  acachectl_t *array)
1186 {
1187         unsigned int count;
1188         unsigned int i;
1189         unsigned char *raw;     /* RDATASLAB */
1190
1191         /*
1192          * The caller must be holding the corresponding node lock.
1193          */
1194
1195         if (array == NULL)
1196                 return;
1197
1198         raw = (unsigned char *)header + sizeof(*header);
1199         count = raw[0] * 256 + raw[1];
1200
1201         /*
1202          * Sanity check: since an additional cache entry has a reference to
1203          * the original DB node (in the callback arg), there should be no
1204          * acache entries when the node can be freed.
1205          */
1206         for (i = 0; i < count; i++)
1207                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1208
1209         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1210 }
1211
1212 static inline void
1213 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1214
1215         if (dns_name_dynamic(&(*noqname)->name))
1216                 dns_name_free(&(*noqname)->name, mctx);
1217         if ((*noqname)->neg != NULL)
1218                 isc_mem_put(mctx, (*noqname)->neg,
1219                             dns_rdataslab_size((*noqname)->neg, 0));
1220         if ((*noqname)->negsig != NULL)
1221                 isc_mem_put(mctx, (*noqname)->negsig,
1222                             dns_rdataslab_size((*noqname)->negsig, 0));
1223         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1224         *noqname = NULL;
1225 }
1226
1227 static inline void
1228 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1229 {
1230         ISC_LINK_INIT(h, lru_link);
1231         h->heap_index = 0;
1232
1233 #if TRACE_HEADER
1234         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1235                 fprintf(stderr, "initialized header: %p\n", h);
1236 #else
1237         UNUSED(rbtdb);
1238 #endif
1239 }
1240
1241 static inline rdatasetheader_t *
1242 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1243 {
1244         rdatasetheader_t *h;
1245
1246         h = isc_mem_get(mctx, sizeof(*h));
1247         if (h == NULL)
1248                 return (NULL);
1249
1250 #if TRACE_HEADER
1251         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1252                 fprintf(stderr, "allocated header: %p\n", h);
1253 #endif
1254         init_rdataset(rbtdb, h);
1255         return (h);
1256 }
1257
1258 static inline void
1259 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1260 {
1261         unsigned int size;
1262         int idx;
1263
1264         if (EXISTS(rdataset) &&
1265             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1266                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1267         }
1268
1269         idx = rdataset->node->locknum;
1270         if (ISC_LINK_LINKED(rdataset, lru_link))
1271                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, lru_link);
1272         if (rdataset->heap_index != 0)
1273                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1274         rdataset->heap_index = 0;
1275
1276         if (rdataset->noqname != NULL)
1277                 free_noqname(mctx, &rdataset->noqname);
1278         if (rdataset->closest != NULL)
1279                 free_noqname(mctx, &rdataset->closest);
1280
1281         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1282         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1283
1284         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1285                 size = sizeof(*rdataset);
1286         else
1287                 size = dns_rdataslab_size((unsigned char *)rdataset,
1288                                           sizeof(*rdataset));
1289         isc_mem_put(mctx, rdataset, size);
1290 }
1291
1292 static inline void
1293 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1294         rdatasetheader_t *header, *dcurrent;
1295         isc_boolean_t make_dirty = ISC_FALSE;
1296
1297         /*
1298          * Caller must hold the node lock.
1299          */
1300
1301         /*
1302          * We set the IGNORE attribute on rdatasets with serial number
1303          * 'serial'.  When the reference count goes to zero, these rdatasets
1304          * will be cleaned up; until that time, they will be ignored.
1305          */
1306         for (header = node->data; header != NULL; header = header->next) {
1307                 if (header->serial == serial) {
1308                         header->attributes |= RDATASET_ATTR_IGNORE;
1309                         make_dirty = ISC_TRUE;
1310                 }
1311                 for (dcurrent = header->down;
1312                      dcurrent != NULL;
1313                      dcurrent = dcurrent->down) {
1314                         if (dcurrent->serial == serial) {
1315                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1316                                 make_dirty = ISC_TRUE;
1317                         }
1318                 }
1319         }
1320         if (make_dirty)
1321                 node->dirty = 1;
1322 }
1323
1324 static inline void
1325 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1326 {
1327         rdatasetheader_t *d, *down_next;
1328
1329         for (d = top->down; d != NULL; d = down_next) {
1330                 down_next = d->down;
1331                 free_rdataset(rbtdb, mctx, d);
1332         }
1333         top->down = NULL;
1334 }
1335
1336 static inline void
1337 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1338         rdatasetheader_t *current, *top_prev, *top_next;
1339         isc_mem_t *mctx = rbtdb->common.mctx;
1340
1341         /*
1342          * Caller must be holding the node lock.
1343          */
1344
1345         top_prev = NULL;
1346         for (current = node->data; current != NULL; current = top_next) {
1347                 top_next = current->next;
1348                 clean_stale_headers(rbtdb, mctx, current);
1349                 /*
1350                  * If current is nonexistent or stale, we can clean it up.
1351                  */
1352                 if ((current->attributes &
1353                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1354                         if (top_prev != NULL)
1355                                 top_prev->next = current->next;
1356                         else
1357                                 node->data = current->next;
1358                         free_rdataset(rbtdb, mctx, current);
1359                 } else
1360                         top_prev = current;
1361         }
1362         node->dirty = 0;
1363 }
1364
1365 static inline void
1366 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1367                 rbtdb_serial_t least_serial)
1368 {
1369         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1370         rdatasetheader_t *top_prev, *top_next;
1371         isc_mem_t *mctx = rbtdb->common.mctx;
1372         isc_boolean_t still_dirty = ISC_FALSE;
1373
1374         /*
1375          * Caller must be holding the node lock.
1376          */
1377         REQUIRE(least_serial != 0);
1378
1379         top_prev = NULL;
1380         for (current = node->data; current != NULL; current = top_next) {
1381                 top_next = current->next;
1382
1383                 /*
1384                  * First, we clean up any instances of multiple rdatasets
1385                  * with the same serial number, or that have the IGNORE
1386                  * attribute.
1387                  */
1388                 dparent = current;
1389                 for (dcurrent = current->down;
1390                      dcurrent != NULL;
1391                      dcurrent = down_next) {
1392                         down_next = dcurrent->down;
1393                         INSIST(dcurrent->serial <= dparent->serial);
1394                         if (dcurrent->serial == dparent->serial ||
1395                             IGNORE(dcurrent)) {
1396                                 if (down_next != NULL)
1397                                         down_next->next = dparent;
1398                                 dparent->down = down_next;
1399                                 free_rdataset(rbtdb, mctx, dcurrent);
1400                         } else
1401                                 dparent = dcurrent;
1402                 }
1403
1404                 /*
1405                  * We've now eliminated all IGNORE datasets with the possible
1406                  * exception of current, which we now check.
1407                  */
1408                 if (IGNORE(current)) {
1409                         down_next = current->down;
1410                         if (down_next == NULL) {
1411                                 if (top_prev != NULL)
1412                                         top_prev->next = current->next;
1413                                 else
1414                                         node->data = current->next;
1415                                 free_rdataset(rbtdb, mctx, current);
1416                                 /*
1417                                  * current no longer exists, so we can
1418                                  * just continue with the loop.
1419                                  */
1420                                 continue;
1421                         } else {
1422                                 /*
1423                                  * Pull up current->down, making it the new
1424                                  * current.
1425                                  */
1426                                 if (top_prev != NULL)
1427                                         top_prev->next = down_next;
1428                                 else
1429                                         node->data = down_next;
1430                                 down_next->next = top_next;
1431                                 free_rdataset(rbtdb, mctx, current);
1432                                 current = down_next;
1433                         }
1434                 }
1435
1436                 /*
1437                  * We now try to find the first down node less than the
1438                  * least serial.
1439                  */
1440                 dparent = current;
1441                 for (dcurrent = current->down;
1442                      dcurrent != NULL;
1443                      dcurrent = down_next) {
1444                         down_next = dcurrent->down;
1445                         if (dcurrent->serial < least_serial)
1446                                 break;
1447                         dparent = dcurrent;
1448                 }
1449
1450                 /*
1451                  * If there is a such an rdataset, delete it and any older
1452                  * versions.
1453                  */
1454                 if (dcurrent != NULL) {
1455                         do {
1456                                 down_next = dcurrent->down;
1457                                 INSIST(dcurrent->serial <= least_serial);
1458                                 free_rdataset(rbtdb, mctx, dcurrent);
1459                                 dcurrent = down_next;
1460                         } while (dcurrent != NULL);
1461                         dparent->down = NULL;
1462                 }
1463
1464                 /*
1465                  * Note.  The serial number of 'current' might be less than
1466                  * least_serial too, but we cannot delete it because it is
1467                  * the most recent version, unless it is a NONEXISTENT
1468                  * rdataset.
1469                  */
1470                 if (current->down != NULL) {
1471                         still_dirty = ISC_TRUE;
1472                         top_prev = current;
1473                 } else {
1474                         /*
1475                          * If this is a NONEXISTENT rdataset, we can delete it.
1476                          */
1477                         if (NONEXISTENT(current)) {
1478                                 if (top_prev != NULL)
1479                                         top_prev->next = current->next;
1480                                 else
1481                                         node->data = current->next;
1482                                 free_rdataset(rbtdb, mctx, current);
1483                         } else
1484                                 top_prev = current;
1485                 }
1486         }
1487         if (!still_dirty)
1488                 node->dirty = 0;
1489 }
1490
1491 /*%
1492  * Clean up dead nodes.  These are nodes which have no references, and
1493  * have no data.  They are dead but we could not or chose not to delete
1494  * them when we deleted all the data at that node because we did not want
1495  * to wait for the tree write lock.
1496  *
1497  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1498  */
1499 static void
1500 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1501         dns_rbtnode_t *node;
1502         isc_result_t result;
1503         int count = 10;         /* XXXJT: should be adjustable */
1504
1505         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1506         while (node != NULL && count > 0) {
1507                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1508
1509                 /*
1510                  * Since we're holding a tree write lock, it should be
1511                  * impossible for this node to be referenced by others.
1512                  */
1513                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1514                        node->data == NULL);
1515
1516                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1517                 if (node->nsec3)
1518                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1519                                                     ISC_FALSE);
1520                 else
1521                         result = dns_rbt_deletenode(rbtdb->tree, node,
1522                                                     ISC_FALSE);
1523                 if (result != ISC_R_SUCCESS)
1524                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1525                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1526                                       "cleanup_dead_nodes: "
1527                                       "dns_rbt_deletenode: %s",
1528                                       isc_result_totext(result));
1529                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1530                 count--;
1531         }
1532 }
1533
1534 /*
1535  * Caller must be holding the node lock if its reference must be protected
1536  * by the lock.
1537  */
1538 static inline void
1539 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1540         unsigned int lockrefs, noderefs;
1541         isc_refcount_t *lockref;
1542
1543         dns_rbtnode_refincrement0(node, &noderefs);
1544         if (noderefs == 1) {    /* this is the first reference to the node */
1545                 lockref = &rbtdb->node_locks[node->locknum].references;
1546                 isc_refcount_increment0(lockref, &lockrefs);
1547                 INSIST(lockrefs != 0);
1548         }
1549         INSIST(noderefs != 0);
1550 }
1551
1552 /*
1553  * This function is assumed to be called when a node is newly referenced
1554  * and can be in the deadnode list.  In that case the node must be retrieved
1555  * from the list because it is going to be used.  In addition, if the caller
1556  * happens to hold a write lock on the tree, it's a good chance to purge dead
1557  * nodes.
1558  * Note: while a new reference is gained in multiple places, there are only very
1559  * few cases where the node can be in the deadnode list (only empty nodes can
1560  * have been added to the list).
1561  */
1562 static inline void
1563 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1564                 isc_rwlocktype_t treelocktype)
1565 {
1566         isc_boolean_t need_relock = ISC_FALSE;
1567
1568         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1569         new_reference(rbtdb, node);
1570
1571         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1572                       isc_rwlocktype_read);
1573         if (ISC_LINK_LINKED(node, deadlink))
1574                 need_relock = ISC_TRUE;
1575         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1576                  treelocktype == isc_rwlocktype_write)
1577                 need_relock = ISC_TRUE;
1578         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1579                         isc_rwlocktype_read);
1580         if (need_relock) {
1581                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1582                               isc_rwlocktype_write);
1583                 if (ISC_LINK_LINKED(node, deadlink))
1584                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1585                                         node, deadlink);
1586                 if (treelocktype == isc_rwlocktype_write)
1587                         cleanup_dead_nodes(rbtdb, node->locknum);
1588                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1589                                 isc_rwlocktype_write);
1590         }
1591
1592         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1593 }
1594
1595 /*
1596  * Caller must be holding the node lock; either the "strong", read or write
1597  * lock.  Note that the lock must be held even when node references are
1598  * atomically modified; in that case the decrement operation itself does not
1599  * have to be protected, but we must avoid a race condition where multiple
1600  * threads are decreasing the reference to zero simultaneously and at least
1601  * one of them is going to free the node.
1602  * This function returns ISC_TRUE if and only if the node reference decreases
1603  * to zero.
1604  */
1605 static isc_boolean_t
1606 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1607                     rbtdb_serial_t least_serial,
1608                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1609                     isc_boolean_t pruning)
1610 {
1611         isc_result_t result;
1612         isc_boolean_t write_locked;
1613         rbtdb_nodelock_t *nodelock;
1614         unsigned int refs, nrefs;
1615         int bucket = node->locknum;
1616         isc_boolean_t no_reference;
1617
1618         nodelock = &rbtdb->node_locks[bucket];
1619
1620         /* Handle easy and typical case first. */
1621         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1622                 dns_rbtnode_refdecrement(node, &nrefs);
1623                 INSIST((int)nrefs >= 0);
1624                 if (nrefs == 0) {
1625                         isc_refcount_decrement(&nodelock->references, &refs);
1626                         INSIST((int)refs >= 0);
1627                 }
1628                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1629         }
1630
1631         /* Upgrade the lock? */
1632         if (nlock == isc_rwlocktype_read) {
1633                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1634                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1635         }
1636         dns_rbtnode_refdecrement(node, &nrefs);
1637         INSIST((int)nrefs >= 0);
1638         if (nrefs > 0) {
1639                 /* Restore the lock? */
1640                 if (nlock == isc_rwlocktype_read)
1641                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1642                 return (ISC_FALSE);
1643         }
1644
1645         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1646                 if (IS_CACHE(rbtdb))
1647                         clean_cache_node(rbtdb, node);
1648                 else {
1649                         if (least_serial == 0) {
1650                                 /*
1651                                  * Caller doesn't know the least serial.
1652                                  * Get it.
1653                                  */
1654                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1655                                 least_serial = rbtdb->least_serial;
1656                                 RBTDB_UNLOCK(&rbtdb->lock,
1657                                              isc_rwlocktype_read);
1658                         }
1659                         clean_zone_node(rbtdb, node, least_serial);
1660                 }
1661         }
1662
1663         isc_refcount_decrement(&nodelock->references, &refs);
1664         INSIST((int)refs >= 0);
1665
1666         /*
1667          * XXXDCL should this only be done for cache zones?
1668          */
1669         if (node->data != NULL || node->down != NULL) {
1670                 /* Restore the lock? */
1671                 if (nlock == isc_rwlocktype_read)
1672                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1673                 return (ISC_TRUE);
1674         }
1675
1676         /*
1677          * Attempt to switch to a write lock on the tree.  If this fails,
1678          * we will add this node to a linked list of nodes in this locking
1679          * bucket which we will free later.
1680          */
1681         if (tlock != isc_rwlocktype_write) {
1682                 /*
1683                  * Locking hierarchy notwithstanding, we don't need to free
1684                  * the node lock before acquiring the tree write lock because
1685                  * we only do a trylock.
1686                  */
1687                 if (tlock == isc_rwlocktype_read)
1688                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1689                 else
1690                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1691                                                     isc_rwlocktype_write);
1692                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1693                               result == ISC_R_LOCKBUSY);
1694
1695                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1696         } else
1697                 write_locked = ISC_TRUE;
1698
1699         no_reference = ISC_TRUE;
1700         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1701                 /*
1702                  * We can now delete the node if the reference counter is
1703                  * zero.  This should be typically the case, but a different
1704                  * thread may still gain a (new) reference just before the
1705                  * current thread locks the tree (e.g., in findnode()).
1706                  */
1707
1708                 /*
1709                  * If this node is the only one in the level it's in, deleting
1710                  * this node may recursively make its parent the only node in
1711                  * the parent level; if so, and if no one is currently using
1712                  * the parent node, this is almost the only opportunity to
1713                  * clean it up.  But the recursive cleanup is not that trivial
1714                  * since the child and parent may be in different lock buckets,
1715                  * which would cause a lock order reversal problem.  To avoid
1716                  * the trouble, we'll dispatch a separate event for batch
1717                  * cleaning.  We need to check whether we're deleting the node
1718                  * as a result of pruning to avoid infinite dispatching.
1719                  * Note: pruning happens only when a task has been set for the
1720                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1721                  * it's their responsibility to purge stale leaves (e.g. by
1722                  * periodic walk-through).
1723                  */
1724                 if (!pruning && node->parent != NULL &&
1725                     node->parent->down == node && node->left == NULL &&
1726                     node->right == NULL && rbtdb->task != NULL) {
1727                         isc_event_t *ev;
1728                         dns_db_t *db;
1729
1730                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1731                                                 DNS_EVENT_RBTPRUNE,
1732                                                 prune_tree, node,
1733                                                 sizeof(isc_event_t));
1734                         if (ev != NULL) {
1735                                 new_reference(rbtdb, node);
1736                                 db = NULL;
1737                                 attach((dns_db_t *)rbtdb, &db);
1738                                 ev->ev_sender = db;
1739                                 isc_task_send(rbtdb->task, &ev);
1740                                 no_reference = ISC_FALSE;
1741                         } else {
1742                                 /*
1743                                  * XXX: this is a weird situation.  We could
1744                                  * ignore this error case, but then the stale
1745                                  * node will unlikely be purged except via a
1746                                  * rare condition such as manual cleanup.  So
1747                                  * we queue it in the deadnodes list, hoping
1748                                  * the memory shortage is temporary and the node
1749                                  * will be deleted later.
1750                                  */
1751                                 isc_log_write(dns_lctx,
1752                                               DNS_LOGCATEGORY_DATABASE,
1753                                               DNS_LOGMODULE_CACHE,
1754                                               ISC_LOG_INFO,
1755                                               "decrement_reference: failed to "
1756                                               "allocate pruning event");
1757                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1758                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1759                                                 deadlink);
1760                         }
1761                 } else {
1762                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1763                                 char printname[DNS_NAME_FORMATSIZE];
1764
1765                                 isc_log_write(dns_lctx,
1766                                               DNS_LOGCATEGORY_DATABASE,
1767                                               DNS_LOGMODULE_CACHE,
1768                                               ISC_LOG_DEBUG(1),
1769                                               "decrement_reference: "
1770                                               "delete from rbt: %p %s",
1771                                               node,
1772                                               dns_rbt_formatnodename(node,
1773                                                         printname,
1774                                                         sizeof(printname)));
1775                         }
1776
1777                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1778                         if (node->nsec3)
1779                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1780                                                             ISC_FALSE);
1781                         else
1782                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1783                                                             ISC_FALSE);
1784                         if (result != ISC_R_SUCCESS) {
1785                                 isc_log_write(dns_lctx,
1786                                               DNS_LOGCATEGORY_DATABASE,
1787                                               DNS_LOGMODULE_CACHE,
1788                                               ISC_LOG_WARNING,
1789                                               "decrement_reference: "
1790                                               "dns_rbt_deletenode: %s",
1791                                               isc_result_totext(result));
1792                         }
1793                 }
1794         } else if (dns_rbtnode_refcurrent(node) == 0) {
1795                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1796                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1797         } else
1798                 no_reference = ISC_FALSE;
1799
1800         /* Restore the lock? */
1801         if (nlock == isc_rwlocktype_read)
1802                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1803
1804         /*
1805          * Relock a read lock, or unlock the write lock if no lock was held.
1806          */
1807         if (tlock == isc_rwlocktype_none)
1808                 if (write_locked)
1809                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1810
1811         if (tlock == isc_rwlocktype_read)
1812                 if (write_locked)
1813                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1814
1815         return (no_reference);
1816 }
1817
1818 /*
1819  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1820  * case, the number of iteration is the number of tree levels, which is at
1821  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1822  * should be much smaller (only a few times), and even the worst case would be
1823  * acceptable for a single event.
1824  */
1825 static void
1826 prune_tree(isc_task_t *task, isc_event_t *event) {
1827         dns_rbtdb_t *rbtdb = event->ev_sender;
1828         dns_rbtnode_t *node = event->ev_arg;
1829         dns_rbtnode_t *parent;
1830         unsigned int locknum;
1831
1832         UNUSED(task);
1833
1834         isc_event_free(&event);
1835
1836         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1837         locknum = node->locknum;
1838         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1839         do {
1840                 parent = node->parent;
1841                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1842                                     isc_rwlocktype_write, ISC_TRUE);
1843
1844                 if (parent != NULL && parent->down == NULL) {
1845                         /*
1846                          * node was the only down child of the parent and has
1847                          * just been removed.  We'll then need to examine the
1848                          * parent.  Keep the lock if possible; otherwise,
1849                          * release the old lock and acquire one for the parent.
1850                          */
1851                         if (parent->locknum != locknum) {
1852                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1853                                             isc_rwlocktype_write);
1854                                 locknum = parent->locknum;
1855                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1856                                           isc_rwlocktype_write);
1857                         }
1858
1859                         /*
1860                          * We need to gain a reference to the node before
1861                          * decrementing it in the next iteration.  In addition,
1862                          * if the node is in the dead-nodes list, extract it
1863                          * from the list beforehand as we do in
1864                          * reactivate_node().
1865                          */
1866                         new_reference(rbtdb, parent);
1867                         if (ISC_LINK_LINKED(parent, deadlink)) {
1868                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1869                                                 parent, deadlink);
1870                         }
1871                 } else
1872                         parent = NULL;
1873
1874                 node = parent;
1875         } while (node != NULL);
1876         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1877         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1878
1879         detach((dns_db_t **)&rbtdb);
1880 }
1881
1882 static inline void
1883 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1884                    rbtdb_changedlist_t *cleanup_list)
1885 {
1886         /*
1887          * Caller must be holding the database lock.
1888          */
1889
1890         rbtdb->least_serial = version->serial;
1891         *cleanup_list = version->changed_list;
1892         ISC_LIST_INIT(version->changed_list);
1893 }
1894
1895 static inline void
1896 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1897         rbtdb_changed_t *changed, *next_changed;
1898
1899         /*
1900          * If the changed record is dirty, then
1901          * an update created multiple versions of
1902          * a given rdataset.  We keep this list
1903          * until we're the least open version, at
1904          * which point it's safe to get rid of any
1905          * older versions.
1906          *
1907          * If the changed record isn't dirty, then
1908          * we don't need it anymore since we're
1909          * committing and not rolling back.
1910          *
1911          * The caller must be holding the database lock.
1912          */
1913         for (changed = HEAD(version->changed_list);
1914              changed != NULL;
1915              changed = next_changed) {
1916                 next_changed = NEXT(changed, link);
1917                 if (!changed->dirty) {
1918                         UNLINK(version->changed_list,
1919                                changed, link);
1920                         APPEND(*cleanup_list,
1921                                changed, link);
1922                 }
1923         }
1924 }
1925
1926 static void
1927 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1928         dns_rdataset_t keyset;
1929         dns_rdataset_t nsecset, signsecset;
1930         dns_rdata_t rdata = DNS_RDATA_INIT;
1931         isc_boolean_t haszonekey = ISC_FALSE;
1932         isc_boolean_t hasnsec = ISC_FALSE;
1933         isc_boolean_t hasoptbit = ISC_FALSE;
1934         isc_boolean_t nsec3createflag = ISC_FALSE;
1935         isc_result_t result;
1936
1937         dns_rdataset_init(&keyset);
1938         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1939                                      0, 0, &keyset, NULL);
1940         if (result == ISC_R_SUCCESS) {
1941                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1942                 result = dns_rdataset_first(&keyset);
1943                 while (result == ISC_R_SUCCESS) {
1944                         dns_rdataset_current(&keyset, &keyrdata);
1945                         if (dns_zonekey_iszonekey(&keyrdata)) {
1946                                 haszonekey = ISC_TRUE;
1947                                 break;
1948                         }
1949                         result = dns_rdataset_next(&keyset);
1950                 }
1951                 dns_rdataset_disassociate(&keyset);
1952         }
1953         if (!haszonekey) {
1954                 version->secure = dns_db_insecure;
1955                 version->havensec3 = ISC_FALSE;
1956                 return;
1957         }
1958
1959         dns_rdataset_init(&nsecset);
1960         dns_rdataset_init(&signsecset);
1961         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1962                                      0, 0, &nsecset, &signsecset);
1963         if (result == ISC_R_SUCCESS) {
1964                 if (dns_rdataset_isassociated(&signsecset)) {
1965                         hasnsec = ISC_TRUE;
1966                         result = dns_rdataset_first(&nsecset);
1967                         if (result == ISC_R_SUCCESS) {
1968                                 dns_rdataset_current(&nsecset, &rdata);
1969                                 hasoptbit = dns_nsec_typepresent(&rdata,
1970                                                              dns_rdatatype_opt);
1971                         }
1972                         dns_rdataset_disassociate(&signsecset);
1973                 }
1974                 dns_rdataset_disassociate(&nsecset);
1975         }
1976
1977         setnsec3parameters(db, version, &nsec3createflag);
1978
1979         /*
1980          * Do we have a valid NSEC/NSEC3 chain?
1981          */
1982         if (version->havensec3 || (hasnsec && !hasoptbit))
1983                 version->secure = dns_db_secure;
1984         /*
1985          * Do we have a NSEC/NSEC3 chain under creation?
1986          */
1987         else if (hasoptbit || nsec3createflag)
1988                 version->secure = dns_db_partial;
1989         else
1990                 version->secure = dns_db_insecure;
1991 }
1992
1993 /*%<
1994  * Walk the origin node looking for NSEC3PARAM records.
1995  * Cache the nsec3 parameters.
1996  */
1997 static void
1998 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1999                    isc_boolean_t *nsec3createflag)
2000 {
2001         dns_rbtnode_t *node;
2002         dns_rdata_nsec3param_t nsec3param;
2003         dns_rdata_t rdata = DNS_RDATA_INIT;
2004         isc_region_t region;
2005         isc_result_t result;
2006         rdatasetheader_t *header, *header_next;
2007         unsigned char *raw;             /* RDATASLAB */
2008         unsigned int count, length;
2009         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2010
2011         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2012         version->havensec3 = ISC_FALSE;
2013         node = rbtdb->origin_node;
2014         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2015                   isc_rwlocktype_read);
2016         for (header = node->data;
2017              header != NULL;
2018              header = header_next) {
2019                 header_next = header->next;
2020                 do {
2021                         if (header->serial <= version->serial &&
2022                             !IGNORE(header)) {
2023                                 if (NONEXISTENT(header))
2024                                         header = NULL;
2025                                 break;
2026                         } else
2027                                 header = header->down;
2028                 } while (header != NULL);
2029
2030                 if (header != NULL &&
2031                     header->type == dns_rdatatype_nsec3param) {
2032                         /*
2033                          * Find A NSEC3PARAM with a supported algorithm.
2034                          */
2035                         raw = (unsigned char *)header + sizeof(*header);
2036                         count = raw[0] * 256 + raw[1]; /* count */
2037 #if DNS_RDATASET_FIXED
2038                         raw += count * 4 + 2;
2039 #else
2040                         raw += 2;
2041 #endif
2042                         while (count-- > 0U) {
2043                                 length = raw[0] * 256 + raw[1];
2044 #if DNS_RDATASET_FIXED
2045                                 raw += 4;
2046 #else
2047                                 raw += 2;
2048 #endif
2049                                 region.base = raw;
2050                                 region.length = length;
2051                                 raw += length;
2052                                 dns_rdata_fromregion(&rdata,
2053                                                      rbtdb->common.rdclass,
2054                                                      dns_rdatatype_nsec3param,
2055                                                      &region);
2056                                 result = dns_rdata_tostruct(&rdata,
2057                                                             &nsec3param,
2058                                                             NULL);
2059                                 INSIST(result == ISC_R_SUCCESS);
2060                                 dns_rdata_reset(&rdata);
2061
2062                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2063                                     !dns_nsec3_supportedhash(nsec3param.hash))
2064                                         continue;
2065
2066 #ifdef RFC5155_STRICT
2067                                 if (nsec3param.flags != 0)
2068                                         continue;
2069 #else
2070                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2071                                     != 0)
2072                                         *nsec3createflag = ISC_TRUE;
2073                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2074                                     != 0)
2075                                         continue;
2076 #endif
2077
2078                                 INSIST(nsec3param.salt_length <=
2079                                        sizeof(version->salt));
2080                                 memcpy(version->salt, nsec3param.salt,
2081                                        nsec3param.salt_length);
2082                                 version->hash = nsec3param.hash;
2083                                 version->salt_length = nsec3param.salt_length;
2084                                 version->iterations = nsec3param.iterations;
2085                                 version->flags = nsec3param.flags;
2086                                 version->havensec3 = ISC_TRUE;
2087                                 /*
2088                                  * Look for a better algorithm than the
2089                                  * unknown test algorithm.
2090                                  */
2091                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2092                                         goto unlock;
2093                         }
2094                 }
2095         }
2096  unlock:
2097         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2098                     isc_rwlocktype_read);
2099         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2100 }
2101
2102 static void
2103 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2104         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2105         rbtdb_version_t *version, *cleanup_version, *least_greater;
2106         isc_boolean_t rollback = ISC_FALSE;
2107         rbtdb_changedlist_t cleanup_list;
2108         rdatasetheaderlist_t resigned_list;
2109         rbtdb_changed_t *changed, *next_changed;
2110         rbtdb_serial_t serial, least_serial;
2111         dns_rbtnode_t *rbtnode;
2112         unsigned int refs;
2113         rdatasetheader_t *header;
2114         isc_boolean_t writer;
2115
2116         REQUIRE(VALID_RBTDB(rbtdb));
2117         version = (rbtdb_version_t *)*versionp;
2118
2119         cleanup_version = NULL;
2120         ISC_LIST_INIT(cleanup_list);
2121         ISC_LIST_INIT(resigned_list);
2122
2123         isc_refcount_decrement(&version->references, &refs);
2124         if (refs > 0) {         /* typical and easy case first */
2125                 if (commit) {
2126                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2127                         INSIST(!version->writer);
2128                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2129                 }
2130                 goto end;
2131         }
2132
2133         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2134         serial = version->serial;
2135         writer = version->writer;
2136         if (version->writer) {
2137                 if (commit) {
2138                         unsigned cur_ref;
2139                         rbtdb_version_t *cur_version;
2140
2141                         INSIST(version->commit_ok);
2142                         INSIST(version == rbtdb->future_version);
2143                         /*
2144                          * The current version is going to be replaced.
2145                          * Release the (likely last) reference to it from the
2146                          * DB itself and unlink it from the open list.
2147                          */
2148                         cur_version = rbtdb->current_version;
2149                         isc_refcount_decrement(&cur_version->references,
2150                                                &cur_ref);
2151                         if (cur_ref == 0) {
2152                                 if (cur_version->serial == rbtdb->least_serial)
2153                                         INSIST(EMPTY(cur_version->changed_list));
2154                                 UNLINK(rbtdb->open_versions,
2155                                        cur_version, link);
2156                         }
2157                         if (EMPTY(rbtdb->open_versions)) {
2158                                 /*
2159                                  * We're going to become the least open
2160                                  * version.
2161                                  */
2162                                 make_least_version(rbtdb, version,
2163                                                    &cleanup_list);
2164                         } else {
2165                                 /*
2166                                  * Some other open version is the
2167                                  * least version.  We can't cleanup
2168                                  * records that were changed in this
2169                                  * version because the older versions
2170                                  * may still be in use by an open
2171                                  * version.
2172                                  *
2173                                  * We can, however, discard the
2174                                  * changed records for things that
2175                                  * we've added that didn't exist in
2176                                  * prior versions.
2177                                  */
2178                                 cleanup_nondirty(version, &cleanup_list);
2179                         }
2180                         /*
2181                          * If the (soon to be former) current version
2182                          * isn't being used by anyone, we can clean
2183                          * it up.
2184                          */
2185                         if (cur_ref == 0) {
2186                                 cleanup_version = cur_version;
2187                                 APPENDLIST(version->changed_list,
2188                                            cleanup_version->changed_list,
2189                                            link);
2190                         }
2191                         /*
2192                          * Become the current version.
2193                          */
2194                         version->writer = ISC_FALSE;
2195                         rbtdb->current_version = version;
2196                         rbtdb->current_serial = version->serial;
2197                         rbtdb->future_version = NULL;
2198
2199                         /*
2200                          * Keep the current version in the open list, and
2201                          * gain a reference for the DB itself (see the DB
2202                          * creation function below).  This must be the only
2203                          * case where we need to increment the counter from
2204                          * zero and need to use isc_refcount_increment0().
2205                          */
2206                         isc_refcount_increment0(&version->references,
2207                                                 &cur_ref);
2208                         INSIST(cur_ref == 1);
2209                         PREPEND(rbtdb->open_versions,
2210                                 rbtdb->current_version, link);
2211                         resigned_list = version->resigned_list;
2212                         ISC_LIST_INIT(version->resigned_list);
2213                 } else {
2214                         /*
2215                          * We're rolling back this transaction.
2216                          */
2217                         cleanup_list = version->changed_list;
2218                         ISC_LIST_INIT(version->changed_list);
2219                         resigned_list = version->resigned_list;
2220                         ISC_LIST_INIT(version->resigned_list);
2221                         rollback = ISC_TRUE;
2222                         cleanup_version = version;
2223                         rbtdb->future_version = NULL;
2224                 }
2225         } else {
2226                 if (version != rbtdb->current_version) {
2227                         /*
2228                          * There are no external or internal references
2229                          * to this version and it can be cleaned up.
2230                          */
2231                         cleanup_version = version;
2232
2233                         /*
2234                          * Find the version with the least serial
2235                          * number greater than ours.
2236                          */
2237                         least_greater = PREV(version, link);
2238                         if (least_greater == NULL)
2239                                 least_greater = rbtdb->current_version;
2240
2241                         INSIST(version->serial < least_greater->serial);
2242                         /*
2243                          * Is this the least open version?
2244                          */
2245                         if (version->serial == rbtdb->least_serial) {
2246                                 /*
2247                                  * Yes.  Install the new least open
2248                                  * version.
2249                                  */
2250                                 make_least_version(rbtdb,
2251                                                    least_greater,
2252                                                    &cleanup_list);
2253                         } else {
2254                                 /*
2255                                  * Add any unexecuted cleanups to
2256                                  * those of the least greater version.
2257                                  */
2258                                 APPENDLIST(least_greater->changed_list,
2259                                            version->changed_list,
2260                                            link);
2261                         }
2262                 } else if (version->serial == rbtdb->least_serial)
2263                         INSIST(EMPTY(version->changed_list));
2264                 UNLINK(rbtdb->open_versions, version, link);
2265         }
2266         least_serial = rbtdb->least_serial;
2267         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2268
2269         /*
2270          * Update the zone's secure status.
2271          */
2272         if (writer && commit && !IS_CACHE(rbtdb))
2273                 iszonesecure(db, version, rbtdb->origin_node);
2274
2275         if (cleanup_version != NULL) {
2276                 INSIST(EMPTY(cleanup_version->changed_list));
2277                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2278                             sizeof(*cleanup_version));
2279         }
2280
2281         /*
2282          * Commit/rollback re-signed headers.
2283          */
2284         for (header = HEAD(resigned_list);
2285              header != NULL;
2286              header = HEAD(resigned_list)) {
2287                 ISC_LIST_UNLINK(resigned_list, header, lru_link);
2288                 if (rollback) {
2289                         nodelock_t *lock;
2290                         lock = &rbtdb->node_locks[header->node->locknum].lock;
2291                         NODE_LOCK(lock, isc_rwlocktype_write);
2292                         resign_insert(rbtdb, header->node->locknum, header);
2293                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2294                 }
2295                 decrement_reference(rbtdb, header->node, least_serial,
2296                                     isc_rwlocktype_write, isc_rwlocktype_none,
2297                                     ISC_FALSE);
2298         }
2299
2300         if (!EMPTY(cleanup_list)) {
2301                 /*
2302                  * We acquire a tree write lock here in order to make sure
2303                  * that stale nodes will be removed in decrement_reference().
2304                  * If we didn't have the lock, those nodes could miss the
2305                  * chance to be removed until the server stops.  The write lock
2306                  * is expensive, but this event should be rare enough to justify
2307                  * the cost.
2308                  */
2309                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2310                 for (changed = HEAD(cleanup_list);
2311                      changed != NULL;
2312                      changed = next_changed) {
2313                         nodelock_t *lock;
2314
2315                         next_changed = NEXT(changed, link);
2316                         rbtnode = changed->node;
2317                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2318
2319                         NODE_LOCK(lock, isc_rwlocktype_write);
2320                         /*
2321                          * This is a good opportunity to purge any dead nodes,
2322                          * so use it.
2323                          */
2324                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2325
2326                         if (rollback)
2327                                 rollback_node(rbtnode, serial);
2328                         decrement_reference(rbtdb, rbtnode, least_serial,
2329                                             isc_rwlocktype_write,
2330                                             isc_rwlocktype_write, ISC_FALSE);
2331
2332                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2333
2334                         isc_mem_put(rbtdb->common.mctx, changed,
2335                                     sizeof(*changed));
2336                 }
2337                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2338         }
2339
2340  end:
2341         *versionp = NULL;
2342 }
2343
2344 /*
2345  * Add the necessary magic for the wildcard name 'name'
2346  * to be found in 'rbtdb'.
2347  *
2348  * In order for wildcard matching to work correctly in
2349  * zone_find(), we must ensure that a node for the wildcarding
2350  * level exists in the database, and has its 'find_callback'
2351  * and 'wild' bits set.
2352  *
2353  * E.g. if the wildcard name is "*.sub.example." then we
2354  * must ensure that "sub.example." exists and is marked as
2355  * a wildcard level.
2356  */
2357 static isc_result_t
2358 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2359         isc_result_t result;
2360         dns_name_t foundname;
2361         dns_offsets_t offsets;
2362         unsigned int n;
2363         dns_rbtnode_t *node = NULL;
2364
2365         dns_name_init(&foundname, offsets);
2366         n = dns_name_countlabels(name);
2367         INSIST(n >= 2);
2368         n--;
2369         dns_name_getlabelsequence(name, 1, n, &foundname);
2370         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2371         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2372                 return (result);
2373         node->nsec3 = 0;
2374         node->find_callback = 1;
2375         node->wild = 1;
2376         return (ISC_R_SUCCESS);
2377 }
2378
2379 static isc_result_t
2380 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2381         isc_result_t result;
2382         dns_name_t foundname;
2383         dns_offsets_t offsets;
2384         unsigned int n, l, i;
2385
2386         dns_name_init(&foundname, offsets);
2387         n = dns_name_countlabels(name);
2388         l = dns_name_countlabels(&rbtdb->common.origin);
2389         i = l + 1;
2390         while (i < n) {
2391                 dns_rbtnode_t *node = NULL;     /* dummy */
2392                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2393                 if (dns_name_iswildcard(&foundname)) {
2394                         result = add_wildcard_magic(rbtdb, &foundname);
2395                         if (result != ISC_R_SUCCESS)
2396                                 return (result);
2397                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2398                                                  &node);
2399                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2400                                 return (result);
2401                         node->nsec3 = 0;
2402                 }
2403                 i++;
2404         }
2405         return (ISC_R_SUCCESS);
2406 }
2407
2408 static isc_result_t
2409 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2410          dns_dbnode_t **nodep)
2411 {
2412         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2413         dns_rbtnode_t *node = NULL;
2414         dns_name_t nodename;
2415         isc_result_t result;
2416         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2417
2418         REQUIRE(VALID_RBTDB(rbtdb));
2419
2420         dns_name_init(&nodename, NULL);
2421         RWLOCK(&rbtdb->tree_lock, locktype);
2422         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2423                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2424         if (result != ISC_R_SUCCESS) {
2425                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2426                 if (!create) {
2427                         if (result == DNS_R_PARTIALMATCH)
2428                                 result = ISC_R_NOTFOUND;
2429                         return (result);
2430                 }
2431                 /*
2432                  * It would be nice to try to upgrade the lock instead of
2433                  * unlocking then relocking.
2434                  */
2435                 locktype = isc_rwlocktype_write;
2436                 RWLOCK(&rbtdb->tree_lock, locktype);
2437                 node = NULL;
2438                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2439                 if (result == ISC_R_SUCCESS) {
2440                         dns_rbt_namefromnode(node, &nodename);
2441 #ifdef DNS_RBT_USEHASH
2442                         node->locknum = node->hashval % rbtdb->node_lock_count;
2443 #else
2444                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2445                                 rbtdb->node_lock_count;
2446 #endif
2447                         node->nsec3 = 0;
2448                         add_empty_wildcards(rbtdb, name);
2449
2450                         if (dns_name_iswildcard(name)) {
2451                                 result = add_wildcard_magic(rbtdb, name);
2452                                 if (result != ISC_R_SUCCESS) {
2453                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2454                                         return (result);
2455                                 }
2456                         }
2457                 } else if (result != ISC_R_EXISTS) {
2458                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2459                         return (result);
2460                 }
2461         }
2462         reactivate_node(rbtdb, node, locktype);
2463         RWUNLOCK(&rbtdb->tree_lock, locktype);
2464
2465         *nodep = (dns_dbnode_t *)node;
2466
2467         return (ISC_R_SUCCESS);
2468 }
2469
2470 static isc_result_t
2471 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2472               dns_dbnode_t **nodep)
2473 {
2474         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2475         dns_rbtnode_t *node = NULL;
2476         dns_name_t nodename;
2477         isc_result_t result;
2478         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2479
2480         REQUIRE(VALID_RBTDB(rbtdb));
2481
2482         dns_name_init(&nodename, NULL);
2483         RWLOCK(&rbtdb->tree_lock, locktype);
2484         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2485                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2486         if (result != ISC_R_SUCCESS) {
2487                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2488                 if (!create) {
2489                         if (result == DNS_R_PARTIALMATCH)
2490                                 result = ISC_R_NOTFOUND;
2491                         return (result);
2492                 }
2493                 /*
2494                  * It would be nice to try to upgrade the lock instead of
2495                  * unlocking then relocking.
2496                  */
2497                 locktype = isc_rwlocktype_write;
2498                 RWLOCK(&rbtdb->tree_lock, locktype);
2499                 node = NULL;
2500                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2501                 if (result == ISC_R_SUCCESS) {
2502                         dns_rbt_namefromnode(node, &nodename);
2503 #ifdef DNS_RBT_USEHASH
2504                         node->locknum = node->hashval % rbtdb->node_lock_count;
2505 #else
2506                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2507                                 rbtdb->node_lock_count;
2508 #endif
2509                         node->nsec3 = 1U;
2510                 } else if (result != ISC_R_EXISTS) {
2511                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2512                         return (result);
2513                 }
2514         } else
2515                 INSIST(node->nsec3);
2516         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2517         new_reference(rbtdb, node);
2518         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2519         RWUNLOCK(&rbtdb->tree_lock, locktype);
2520
2521         *nodep = (dns_dbnode_t *)node;
2522
2523         return (ISC_R_SUCCESS);
2524 }
2525
2526 static isc_result_t
2527 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2528         rbtdb_search_t *search = arg;
2529         rdatasetheader_t *header, *header_next;
2530         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2531         rdatasetheader_t *found;
2532         isc_result_t result;
2533         dns_rbtnode_t *onode;
2534
2535         /*
2536          * We only want to remember the topmost zone cut, since it's the one
2537          * that counts, so we'll just continue if we've already found a
2538          * zonecut.
2539          */
2540         if (search->zonecut != NULL)
2541                 return (DNS_R_CONTINUE);
2542
2543         found = NULL;
2544         result = DNS_R_CONTINUE;
2545         onode = search->rbtdb->origin_node;
2546
2547         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2548                   isc_rwlocktype_read);
2549
2550         /*
2551          * Look for an NS or DNAME rdataset active in our version.
2552          */
2553         ns_header = NULL;
2554         dname_header = NULL;
2555         sigdname_header = NULL;
2556         for (header = node->data; header != NULL; header = header_next) {
2557                 header_next = header->next;
2558                 if (header->type == dns_rdatatype_ns ||
2559                     header->type == dns_rdatatype_dname ||
2560                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2561                         do {
2562                                 if (header->serial <= search->serial &&
2563                                     !IGNORE(header)) {
2564                                         /*
2565                                          * Is this a "this rdataset doesn't
2566                                          * exist" record?
2567                                          */
2568                                         if (NONEXISTENT(header))
2569                                                 header = NULL;
2570                                         break;
2571                                 } else
2572                                         header = header->down;
2573                         } while (header != NULL);
2574                         if (header != NULL) {
2575                                 if (header->type == dns_rdatatype_dname)
2576                                         dname_header = header;
2577                                 else if (header->type ==
2578                                            RBTDB_RDATATYPE_SIGDNAME)
2579                                         sigdname_header = header;
2580                                 else if (node != onode ||
2581                                          IS_STUB(search->rbtdb)) {
2582                                         /*
2583                                          * We've found an NS rdataset that
2584                                          * isn't at the origin node.  We check
2585                                          * that they're not at the origin node,
2586                                          * because otherwise we'd erroneously
2587                                          * treat the zone top as if it were
2588                                          * a delegation.
2589                                          */
2590                                         ns_header = header;
2591                                 }
2592                         }
2593                 }
2594         }
2595
2596         /*
2597          * Did we find anything?
2598          */
2599         if (dname_header != NULL) {
2600                 /*
2601                  * Note that DNAME has precedence over NS if both exist.
2602                  */
2603                 found = dname_header;
2604                 search->zonecut_sigrdataset = sigdname_header;
2605         } else if (ns_header != NULL) {
2606                 found = ns_header;
2607                 search->zonecut_sigrdataset = NULL;
2608         }
2609
2610         if (found != NULL) {
2611                 /*
2612                  * We increment the reference count on node to ensure that
2613                  * search->zonecut_rdataset will still be valid later.
2614                  */
2615                 new_reference(search->rbtdb, node);
2616                 search->zonecut = node;
2617                 search->zonecut_rdataset = found;
2618                 search->need_cleanup = ISC_TRUE;
2619                 /*
2620                  * Since we've found a zonecut, anything beneath it is
2621                  * glue and is not subject to wildcard matching, so we
2622                  * may clear search->wild.
2623                  */
2624                 search->wild = ISC_FALSE;
2625                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2626                         /*
2627                          * If the caller does not want to find glue, then
2628                          * this is the best answer and the search should
2629                          * stop now.
2630                          */
2631                         result = DNS_R_PARTIALMATCH;
2632                 } else {
2633                         dns_name_t *zcname;
2634
2635                         /*
2636                          * The search will continue beneath the zone cut.
2637                          * This may or may not be the best match.  In case it
2638                          * is, we need to remember the node name.
2639                          */
2640                         zcname = dns_fixedname_name(&search->zonecut_name);
2641                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2642                                       ISC_R_SUCCESS);
2643                         search->copy_name = ISC_TRUE;
2644                 }
2645         } else {
2646                 /*
2647                  * There is no zonecut at this node which is active in this
2648                  * version.
2649                  *
2650                  * If this is a "wild" node and the caller hasn't disabled
2651                  * wildcard matching, remember that we've seen a wild node
2652                  * in case we need to go searching for wildcard matches
2653                  * later on.
2654                  */
2655                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2656                         search->wild = ISC_TRUE;
2657         }
2658
2659         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2660                     isc_rwlocktype_read);
2661
2662         return (result);
2663 }
2664
2665 static inline void
2666 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2667               rdatasetheader_t *header, isc_stdtime_t now,
2668               dns_rdataset_t *rdataset)
2669 {
2670         unsigned char *raw;     /* RDATASLAB */
2671
2672         /*
2673          * Caller must be holding the node reader lock.
2674          * XXXJT: technically, we need a writer lock, since we'll increment
2675          * the header count below.  However, since the actual counter value
2676          * doesn't matter, we prioritize performance here.  (We may want to
2677          * use atomic increment when available).
2678          */
2679
2680         if (rdataset == NULL)
2681                 return;
2682
2683         new_reference(rbtdb, node);
2684
2685         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2686
2687         rdataset->methods = &rdataset_methods;
2688         rdataset->rdclass = rbtdb->common.rdclass;
2689         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2690         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2691         rdataset->ttl = header->rdh_ttl - now;
2692         rdataset->trust = header->trust;
2693         if (NXDOMAIN(header))
2694                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2695         if (OPTOUT(header))
2696                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2697         rdataset->private1 = rbtdb;
2698         rdataset->private2 = node;
2699         raw = (unsigned char *)header + sizeof(*header);
2700         rdataset->private3 = raw;
2701         rdataset->count = header->count++;
2702         if (rdataset->count == ISC_UINT32_MAX)
2703                 rdataset->count = 0;
2704
2705         /*
2706          * Reset iterator state.
2707          */
2708         rdataset->privateuint4 = 0;
2709         rdataset->private5 = NULL;
2710
2711         /*
2712          * Add noqname proof.
2713          */
2714         rdataset->private6 = header->noqname;
2715         if (rdataset->private6 != NULL)
2716                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2717         rdataset->private7 = header->closest;
2718         if (rdataset->private7 != NULL)
2719                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2720
2721         /*
2722          * Copy out re-signing information.
2723          */
2724         if (RESIGN(header)) {
2725                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2726                 rdataset->resign = header->resign;
2727         } else
2728                 rdataset->resign = 0;
2729 }
2730
2731 static inline isc_result_t
2732 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2733                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2734                  dns_rdataset_t *sigrdataset)
2735 {
2736         isc_result_t result;
2737         dns_name_t *zcname;
2738         rbtdb_rdatatype_t type;
2739         dns_rbtnode_t *node;
2740
2741         /*
2742          * The caller MUST NOT be holding any node locks.
2743          */
2744
2745         node = search->zonecut;
2746         type = search->zonecut_rdataset->type;
2747
2748         /*
2749          * If we have to set foundname, we do it before anything else.
2750          * If we were to set foundname after we had set nodep or bound the
2751          * rdataset, then we'd have to undo that work if dns_name_copy()
2752          * failed.  By setting foundname first, there's nothing to undo if
2753          * we have trouble.
2754          */
2755         if (foundname != NULL && search->copy_name) {
2756                 zcname = dns_fixedname_name(&search->zonecut_name);
2757                 result = dns_name_copy(zcname, foundname, NULL);
2758                 if (result != ISC_R_SUCCESS)
2759                         return (result);
2760         }
2761         if (nodep != NULL) {
2762                 /*
2763                  * Note that we don't have to increment the node's reference
2764                  * count here because we're going to use the reference we
2765                  * already have in the search block.
2766                  */
2767                 *nodep = node;
2768                 search->need_cleanup = ISC_FALSE;
2769         }
2770         if (rdataset != NULL) {
2771                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2772                           isc_rwlocktype_read);
2773                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2774                               search->now, rdataset);
2775                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2776                         bind_rdataset(search->rbtdb, node,
2777                                       search->zonecut_sigrdataset,
2778                                       search->now, sigrdataset);
2779                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2780                             isc_rwlocktype_read);
2781         }
2782
2783         if (type == dns_rdatatype_dname)
2784                 return (DNS_R_DNAME);
2785         return (DNS_R_DELEGATION);
2786 }
2787
2788 static inline isc_boolean_t
2789 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2790            dns_rbtnode_t *node)
2791 {
2792         unsigned char *raw;     /* RDATASLAB */
2793         unsigned int count, size;
2794         dns_name_t ns_name;
2795         isc_boolean_t valid = ISC_FALSE;
2796         dns_offsets_t offsets;
2797         isc_region_t region;
2798         rdatasetheader_t *header;
2799
2800         /*
2801          * No additional locking is required.
2802          */
2803
2804         /*
2805          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2806          * if it occurs at a zone cut, but is not valid below it.
2807          */
2808         if (type == dns_rdatatype_ns) {
2809                 if (node != search->zonecut) {
2810                         return (ISC_FALSE);
2811                 }
2812         } else if (type != dns_rdatatype_a &&
2813                    type != dns_rdatatype_aaaa &&
2814                    type != dns_rdatatype_a6) {
2815                 return (ISC_FALSE);
2816         }
2817
2818         header = search->zonecut_rdataset;
2819         raw = (unsigned char *)header + sizeof(*header);
2820         count = raw[0] * 256 + raw[1];
2821 #if DNS_RDATASET_FIXED
2822         raw += 2 + (4 * count);
2823 #else
2824         raw += 2;
2825 #endif
2826
2827         while (count > 0) {
2828                 count--;
2829                 size = raw[0] * 256 + raw[1];
2830 #if DNS_RDATASET_FIXED
2831                 raw += 4;
2832 #else
2833                 raw += 2;
2834 #endif
2835                 region.base = raw;
2836                 region.length = size;
2837                 raw += size;
2838                 /*
2839                  * XXX Until we have rdata structures, we have no choice but
2840                  * to directly access the rdata format.
2841                  */
2842                 dns_name_init(&ns_name, offsets);
2843                 dns_name_fromregion(&ns_name, &region);
2844                 if (dns_name_compare(&ns_name, name) == 0) {
2845                         valid = ISC_TRUE;
2846                         break;
2847                 }
2848         }
2849
2850         return (valid);
2851 }
2852
2853 static inline isc_boolean_t
2854 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2855             dns_name_t *name)
2856 {
2857         dns_fixedname_t fnext;
2858         dns_fixedname_t forigin;
2859         dns_name_t *next;
2860         dns_name_t *origin;
2861         dns_name_t prefix;
2862         dns_rbtdb_t *rbtdb;
2863         dns_rbtnode_t *node;
2864         isc_result_t result;
2865         isc_boolean_t answer = ISC_FALSE;
2866         rdatasetheader_t *header;
2867
2868         rbtdb = search->rbtdb;
2869
2870         dns_name_init(&prefix, NULL);
2871         dns_fixedname_init(&fnext);
2872         next = dns_fixedname_name(&fnext);
2873         dns_fixedname_init(&forigin);
2874         origin = dns_fixedname_name(&forigin);
2875
2876         result = dns_rbtnodechain_next(chain, NULL, NULL);
2877         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2878                 node = NULL;
2879                 result = dns_rbtnodechain_current(chain, &prefix,
2880                                                   origin, &node);
2881                 if (result != ISC_R_SUCCESS)
2882                         break;
2883                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2884                           isc_rwlocktype_read);
2885                 for (header = node->data;
2886                      header != NULL;
2887                      header = header->next) {
2888                         if (header->serial <= search->serial &&
2889                             !IGNORE(header) && EXISTS(header))
2890                                 break;
2891                 }
2892                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2893                             isc_rwlocktype_read);
2894                 if (header != NULL)
2895                         break;
2896                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2897         }
2898         if (result == ISC_R_SUCCESS)
2899                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2900         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2901                 answer = ISC_TRUE;
2902         return (answer);
2903 }
2904
2905 static inline isc_boolean_t
2906 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2907         dns_fixedname_t fnext;
2908         dns_fixedname_t forigin;
2909         dns_fixedname_t fprev;
2910         dns_name_t *next;
2911         dns_name_t *origin;
2912         dns_name_t *prev;
2913         dns_name_t name;
2914         dns_name_t rname;
2915         dns_name_t tname;
2916         dns_rbtdb_t *rbtdb;
2917         dns_rbtnode_t *node;
2918         dns_rbtnodechain_t chain;
2919         isc_boolean_t check_next = ISC_TRUE;
2920         isc_boolean_t check_prev = ISC_TRUE;
2921         isc_boolean_t answer = ISC_FALSE;
2922         isc_result_t result;
2923         rdatasetheader_t *header;
2924         unsigned int n;
2925
2926         rbtdb = search->rbtdb;
2927
2928         dns_name_init(&name, NULL);
2929         dns_name_init(&tname, NULL);
2930         dns_name_init(&rname, NULL);
2931         dns_fixedname_init(&fnext);
2932         next = dns_fixedname_name(&fnext);
2933         dns_fixedname_init(&fprev);
2934         prev = dns_fixedname_name(&fprev);
2935         dns_fixedname_init(&forigin);
2936         origin = dns_fixedname_name(&forigin);
2937
2938         /*
2939          * Find if qname is at or below a empty node.
2940          * Use our own copy of the chain.
2941          */
2942
2943         chain = search->chain;
2944         do {
2945                 node = NULL;
2946                 result = dns_rbtnodechain_current(&chain, &name,
2947                                                   origin, &node);
2948                 if (result != ISC_R_SUCCESS)
2949                         break;
2950                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2951                           isc_rwlocktype_read);
2952                 for (header = node->data;
2953                      header != NULL;
2954                      header = header->next) {
2955                         if (header->serial <= search->serial &&
2956                             !IGNORE(header) && EXISTS(header))
2957                                 break;
2958                 }
2959                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2960                             isc_rwlocktype_read);
2961                 if (header != NULL)
2962                         break;
2963                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2964         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2965         if (result == ISC_R_SUCCESS)
2966                 result = dns_name_concatenate(&name, origin, prev, NULL);
2967         if (result != ISC_R_SUCCESS)
2968                 check_prev = ISC_FALSE;
2969
2970         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2971         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2972                 node = NULL;
2973                 result = dns_rbtnodechain_current(&chain, &name,
2974                                                   origin, &node);
2975                 if (result != ISC_R_SUCCESS)
2976                         break;
2977                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2978                           isc_rwlocktype_read);
2979                 for (header = node->data;
2980                      header != NULL;
2981                      header = header->next) {
2982                         if (header->serial <= search->serial &&
2983                             !IGNORE(header) && EXISTS(header))
2984                                 break;
2985                 }
2986                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2987                             isc_rwlocktype_read);
2988                 if (header != NULL)
2989                         break;
2990                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2991         }
2992         if (result == ISC_R_SUCCESS)
2993                 result = dns_name_concatenate(&name, origin, next, NULL);
2994         if (result != ISC_R_SUCCESS)
2995                 check_next = ISC_FALSE;
2996
2997         dns_name_clone(qname, &rname);
2998
2999         /*
3000          * Remove the wildcard label to find the terminal name.
3001          */
3002         n = dns_name_countlabels(wname);
3003         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3004
3005         do {
3006                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3007                     (check_next && dns_name_issubdomain(next, &rname))) {
3008                         answer = ISC_TRUE;
3009                         break;
3010                 }
3011                 /*
3012                  * Remove the left hand label.
3013                  */
3014                 n = dns_name_countlabels(&rname);
3015                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3016         } while (!dns_name_equal(&rname, &tname));
3017         return (answer);
3018 }
3019
3020 static inline isc_result_t
3021 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3022               dns_name_t *qname)
3023 {
3024         unsigned int i, j;
3025         dns_rbtnode_t *node, *level_node, *wnode;
3026         rdatasetheader_t *header;
3027         isc_result_t result = ISC_R_NOTFOUND;
3028         dns_name_t name;
3029         dns_name_t *wname;
3030         dns_fixedname_t fwname;
3031         dns_rbtdb_t *rbtdb;
3032         isc_boolean_t done, wild, active;
3033         dns_rbtnodechain_t wchain;
3034
3035         /*
3036          * Caller must be holding the tree lock and MUST NOT be holding
3037          * any node locks.
3038          */
3039
3040         /*
3041          * Examine each ancestor level.  If the level's wild bit
3042          * is set, then construct the corresponding wildcard name and
3043          * search for it.  If the wildcard node exists, and is active in
3044          * this version, we're done.  If not, then we next check to see
3045          * if the ancestor is active in this version.  If so, then there
3046          * can be no possible wildcard match and again we're done.  If not,
3047          * continue the search.
3048          */
3049
3050         rbtdb = search->rbtdb;
3051         i = search->chain.level_matches;
3052         done = ISC_FALSE;
3053         node = *nodep;
3054         do {
3055                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3056                           isc_rwlocktype_read);
3057
3058                 /*
3059                  * First we try to figure out if this node is active in
3060                  * the search's version.  We do this now, even though we
3061                  * may not need the information, because it simplifies the
3062                  * locking and code flow.
3063                  */
3064                 for (header = node->data;
3065                      header != NULL;
3066                      header = header->next) {
3067                         if (header->serial <= search->serial &&
3068                             !IGNORE(header) && EXISTS(header))
3069                                 break;
3070                 }
3071                 if (header != NULL)
3072                         active = ISC_TRUE;
3073                 else
3074                         active = ISC_FALSE;
3075
3076                 if (node->wild)
3077                         wild = ISC_TRUE;
3078                 else
3079                         wild = ISC_FALSE;
3080
3081                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3082                             isc_rwlocktype_read);
3083
3084                 if (wild) {
3085                         /*
3086                          * Construct the wildcard name for this level.
3087                          */
3088                         dns_name_init(&name, NULL);
3089                         dns_rbt_namefromnode(node, &name);
3090                         dns_fixedname_init(&fwname);
3091                         wname = dns_fixedname_name(&fwname);
3092                         result = dns_name_concatenate(dns_wildcardname, &name,
3093                                                       wname, NULL);
3094                         j = i;
3095                         while (result == ISC_R_SUCCESS && j != 0) {
3096                                 j--;
3097                                 level_node = search->chain.levels[j];
3098                                 dns_name_init(&name, NULL);
3099                                 dns_rbt_namefromnode(level_node, &name);
3100                                 result = dns_name_concatenate(wname,
3101                                                               &name,
3102                                                               wname,
3103                                                               NULL);
3104                         }
3105                         if (result != ISC_R_SUCCESS)
3106                                 break;
3107
3108                         wnode = NULL;
3109                         dns_rbtnodechain_init(&wchain, NULL);
3110                         result = dns_rbt_findnode(rbtdb->tree, wname,
3111                                                   NULL, &wnode, &wchain,
3112                                                   DNS_RBTFIND_EMPTYDATA,
3113                                                   NULL, NULL);
3114                         if (result == ISC_R_SUCCESS) {
3115                                 nodelock_t *lock;
3116
3117                                 /*
3118                                  * We have found the wildcard node.  If it
3119                                  * is active in the search's version, we're
3120                                  * done.
3121                                  */
3122                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3123                                 NODE_LOCK(lock, isc_rwlocktype_read);
3124                                 for (header = wnode->data;
3125                                      header != NULL;
3126                                      header = header->next) {
3127                                         if (header->serial <= search->serial &&
3128                                             !IGNORE(header) && EXISTS(header))
3129                                                 break;
3130                                 }
3131                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3132                                 if (header != NULL ||
3133                                     activeempty(search, &wchain, wname)) {
3134                                         if (activeemtpynode(search, qname,
3135                                                             wname)) {
3136                                                 return (ISC_R_NOTFOUND);
3137                                         }
3138                                         /*
3139                                          * The wildcard node is active!
3140                                          *
3141                                          * Note: result is still ISC_R_SUCCESS
3142                                          * so we don't have to set it.
3143                                          */
3144                                         *nodep = wnode;
3145                                         break;
3146                                 }
3147                         } else if (result != ISC_R_NOTFOUND &&
3148                                    result != DNS_R_PARTIALMATCH) {
3149                                 /*
3150                                  * An error has occurred.  Bail out.
3151                                  */
3152                                 break;
3153                         }
3154                 }
3155
3156                 if (active) {
3157                         /*
3158                          * The level node is active.  Any wildcarding
3159                          * present at higher levels has no
3160                          * effect and we're done.
3161                          */
3162                         result = ISC_R_NOTFOUND;
3163                         break;
3164                 }
3165
3166                 if (i > 0) {
3167                         i--;
3168                         node = search->chain.levels[i];
3169                 } else
3170                         done = ISC_TRUE;
3171         } while (!done);
3172
3173         return (result);
3174 }
3175
3176 static isc_boolean_t
3177 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3178 {
3179         dns_rdata_t rdata = DNS_RDATA_INIT;
3180         dns_rdata_nsec3_t nsec3;
3181         unsigned char *raw;                     /* RDATASLAB */
3182         unsigned int rdlen, count;
3183         isc_region_t region;
3184         isc_result_t result;
3185
3186         REQUIRE(header->type == dns_rdatatype_nsec3);
3187
3188         raw = (unsigned char *)header + sizeof(*header);
3189         count = raw[0] * 256 + raw[1]; /* count */
3190 #if DNS_RDATASET_FIXED
3191         raw += count * 4 + 2;
3192 #else
3193         raw += 2;
3194 #endif
3195         while (count-- > 0) {
3196                 rdlen = raw[0] * 256 + raw[1];
3197 #if DNS_RDATASET_FIXED
3198                 raw += 4;
3199 #else
3200                 raw += 2;
3201 #endif
3202                 region.base = raw;
3203                 region.length = rdlen;
3204                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3205                                      dns_rdatatype_nsec3, &region);
3206                 raw += rdlen;
3207                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3208                 INSIST(result == ISC_R_SUCCESS);
3209                 if (nsec3.hash == search->rbtversion->hash &&
3210                     nsec3.iterations == search->rbtversion->iterations &&
3211                     nsec3.salt_length == search->rbtversion->salt_length &&
3212                     memcmp(nsec3.salt, search->rbtversion->salt,
3213                            nsec3.salt_length) == 0)
3214                         return (ISC_TRUE);
3215                 dns_rdata_reset(&rdata);
3216         }
3217         return (ISC_FALSE);
3218 }
3219
3220 static inline isc_result_t
3221 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3222                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3223                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3224                   dns_db_secure_t secure)
3225 {
3226         dns_rbtnode_t *node;
3227         rdatasetheader_t *header, *header_next, *found, *foundsig;
3228         isc_boolean_t empty_node;
3229         isc_result_t result;
3230         dns_fixedname_t fname, forigin;
3231         dns_name_t *name, *origin;
3232         dns_rdatatype_t type;
3233         rbtdb_rdatatype_t sigtype;
3234         isc_boolean_t wraps;
3235         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3236
3237         if (tree == search->rbtdb->nsec3) {
3238                 type = dns_rdatatype_nsec3;
3239                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3240                 wraps = ISC_TRUE;
3241         } else {
3242                 type = dns_rdatatype_nsec;
3243                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3244                 wraps = ISC_FALSE;
3245         }
3246
3247  again:
3248         do {
3249                 node = NULL;
3250                 dns_fixedname_init(&fname);
3251                 name = dns_fixedname_name(&fname);
3252                 dns_fixedname_init(&forigin);
3253                 origin = dns_fixedname_name(&forigin);
3254                 result = dns_rbtnodechain_current(&search->chain, name,
3255                                                   origin, &node);
3256                 if (result != ISC_R_SUCCESS)
3257                         return (result);
3258                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3259                           isc_rwlocktype_read);
3260                 found = NULL;
3261                 foundsig = NULL;
3262                 empty_node = ISC_TRUE;
3263                 for (header = node->data;
3264                      header != NULL;
3265                      header = header_next) {
3266                         header_next = header->next;
3267                         /*
3268                          * Look for an active, extant NSEC or RRSIG NSEC.
3269                          */
3270                         do {
3271                                 if (header->serial <= search->serial &&
3272                                     !IGNORE(header)) {
3273                                         /*
3274                                          * Is this a "this rdataset doesn't
3275                                          * exist" record?
3276                                          */
3277                                         if (NONEXISTENT(header))
3278                                                 header = NULL;
3279                                         break;
3280                                 } else
3281                                         header = header->down;
3282                         } while (header != NULL);
3283                         if (header != NULL) {
3284                                 /*
3285                                  * We now know that there is at least one
3286                                  * active rdataset at this node.
3287                                  */
3288                                 empty_node = ISC_FALSE;
3289                                 if (header->type == type) {
3290                                         found = header;
3291                                         if (foundsig != NULL)
3292                                                 break;
3293                                 } else if (header->type == sigtype) {
3294                                         foundsig = header;
3295                                         if (found != NULL)
3296                                                 break;
3297                                 }
3298                         }
3299                 }
3300                 if (!empty_node) {
3301                         if (found != NULL && search->rbtversion->havensec3 &&
3302                             found->type == dns_rdatatype_nsec3 &&
3303                             !matchparams(found, search)) {
3304                                 empty_node = ISC_TRUE;
3305                                 found = NULL;
3306                                 foundsig = NULL;
3307                                 result = dns_rbtnodechain_prev(&search->chain,
3308                                                                NULL, NULL);
3309                         } else if (found != NULL &&
3310                                    (foundsig != NULL || !need_sig))
3311                         {
3312                                 /*
3313                                  * We've found the right NSEC/NSEC3 record.
3314                                  *
3315                                  * Note: for this to really be the right
3316                                  * NSEC record, it's essential that the NSEC
3317                                  * records of any nodes obscured by a zone
3318                                  * cut have been removed; we assume this is
3319                                  * the case.
3320                                  */
3321                                 result = dns_name_concatenate(name, origin,
3322                                                               foundname, NULL);
3323                                 if (result == ISC_R_SUCCESS) {
3324                                         if (nodep != NULL) {
3325                                                 new_reference(search->rbtdb,
3326                                                               node);
3327                                                 *nodep = node;
3328                                         }
3329                                         bind_rdataset(search->rbtdb, node,
3330                                                       found, search->now,
3331                                                       rdataset);
3332                                         if (foundsig != NULL)
3333                                                 bind_rdataset(search->rbtdb,
3334                                                               node,
3335                                                               foundsig,
3336                                                               search->now,
3337                                                               sigrdataset);
3338                                 }
3339                         } else if (found == NULL && foundsig == NULL) {
3340                                 /*
3341                                  * This node is active, but has no NSEC or
3342                                  * RRSIG NSEC.  That means it's glue or
3343                                  * other obscured zone data that isn't
3344                                  * relevant for our search.  Treat the
3345                                  * node as if it were empty and keep looking.
3346                                  */
3347                                 empty_node = ISC_TRUE;
3348                                 result = dns_rbtnodechain_prev(&search->chain,
3349                                                                NULL, NULL);
3350                         } else {
3351                                 /*
3352                                  * We found an active node, but either the
3353                                  * NSEC or the RRSIG NSEC is missing.  This
3354                                  * shouldn't happen.
3355                                  */
3356                                 result = DNS_R_BADDB;
3357                         }
3358                 } else {
3359                         /*
3360                          * This node isn't active.  We've got to keep
3361                          * looking.
3362                          */
3363                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3364                                                        NULL);
3365                 }
3366                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3367                             isc_rwlocktype_read);
3368         } while (empty_node && result == ISC_R_SUCCESS);
3369
3370         if (result == ISC_R_NOMORE && wraps) {
3371                 result = dns_rbtnodechain_last(&search->chain, tree,
3372                                                NULL, NULL);
3373                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3374                         wraps = ISC_FALSE;
3375                         goto again;
3376                 }
3377         }
3378
3379         /*
3380          * If the result is ISC_R_NOMORE, then we got to the beginning of
3381          * the database and didn't find a NSEC record.  This shouldn't
3382          * happen.
3383          */
3384         if (result == ISC_R_NOMORE)
3385                 result = DNS_R_BADDB;
3386
3387         return (result);
3388 }
3389
3390 static isc_result_t
3391 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3392           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3393           dns_dbnode_t **nodep, dns_name_t *foundname,
3394           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3395 {
3396         dns_rbtnode_t *node = NULL;
3397         isc_result_t result;
3398         rbtdb_search_t search;
3399         isc_boolean_t cname_ok = ISC_TRUE;
3400         isc_boolean_t close_version = ISC_FALSE;
3401         isc_boolean_t maybe_zonecut = ISC_FALSE;
3402         isc_boolean_t at_zonecut = ISC_FALSE;
3403         isc_boolean_t wild;
3404         isc_boolean_t empty_node;
3405         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3406         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3407         rbtdb_rdatatype_t sigtype;
3408         isc_boolean_t active;
3409         dns_rbtnodechain_t chain;
3410         nodelock_t *lock;
3411         dns_rbt_t *tree;
3412
3413         search.rbtdb = (dns_rbtdb_t *)db;
3414
3415         REQUIRE(VALID_RBTDB(search.rbtdb));
3416
3417         /*
3418          * We don't care about 'now'.
3419          */
3420         UNUSED(now);
3421
3422         /*
3423          * If the caller didn't supply a version, attach to the current
3424          * version.
3425          */
3426         if (version == NULL) {
3427                 currentversion(db, &version);
3428                 close_version = ISC_TRUE;
3429         }
3430
3431         search.rbtversion = version;
3432         search.serial = search.rbtversion->serial;
3433         search.options = options;
3434         search.copy_name = ISC_FALSE;
3435         search.need_cleanup = ISC_FALSE;
3436         search.wild = ISC_FALSE;
3437         search.zonecut = NULL;
3438         dns_fixedname_init(&search.zonecut_name);
3439         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3440         search.now = 0;
3441
3442         /*
3443          * 'wild' will be true iff. we've matched a wildcard.
3444          */
3445         wild = ISC_FALSE;
3446
3447         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3448
3449         /*
3450          * Search down from the root of the tree.  If, while going down, we
3451          * encounter a callback node, zone_zonecut_callback() will search the
3452          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3453          */
3454         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3455                                                          search.rbtdb->tree;
3456         result = dns_rbt_findnode(tree, name, foundname, &node,
3457                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3458                                   zone_zonecut_callback, &search);
3459
3460         if (result == DNS_R_PARTIALMATCH) {
3461         partial_match:
3462                 if (search.zonecut != NULL) {
3463                     result = setup_delegation(&search, nodep, foundname,
3464                                               rdataset, sigrdataset);
3465                     goto tree_exit;
3466                 }
3467
3468                 if (search.wild) {
3469                         /*
3470                          * At least one of the levels in the search chain
3471                          * potentially has a wildcard.  For each such level,
3472                          * we must see if there's a matching wildcard active
3473                          * in the current version.
3474                          */
3475                         result = find_wildcard(&search, &node, name);
3476                         if (result == ISC_R_SUCCESS) {
3477                                 result = dns_name_copy(name, foundname, NULL);
3478                                 if (result != ISC_R_SUCCESS)
3479                                         goto tree_exit;
3480                                 wild = ISC_TRUE;
3481                                 goto found;
3482                         }
3483                         else if (result != ISC_R_NOTFOUND)
3484                                 goto tree_exit;
3485                 }
3486
3487                 chain = search.chain;
3488                 active = activeempty(&search, &chain, name);
3489
3490                 /*
3491                  * If we're here, then the name does not exist, is not
3492                  * beneath a zonecut, and there's no matching wildcard.
3493                  */
3494                 if ((search.rbtversion->secure == dns_db_secure &&
3495                      !search.rbtversion->havensec3) ||
3496                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3497                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3498                 {
3499                         result = find_closest_nsec(&search, nodep, foundname,
3500                                                    rdataset, sigrdataset, tree,
3501                                                    search.rbtversion->secure);
3502                         if (result == ISC_R_SUCCESS)
3503                                 result = active ? DNS_R_EMPTYNAME :
3504                                                   DNS_R_NXDOMAIN;
3505                 } else
3506                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3507                 goto tree_exit;
3508         } else if (result != ISC_R_SUCCESS)
3509                 goto tree_exit;
3510
3511  found:
3512         /*
3513          * We have found a node whose name is the desired name, or we
3514          * have matched a wildcard.
3515          */
3516
3517         if (search.zonecut != NULL) {
3518                 /*
3519                  * If we're beneath a zone cut, we don't want to look for
3520                  * CNAMEs because they're not legitimate zone glue.
3521                  */
3522                 cname_ok = ISC_FALSE;
3523         } else {
3524                 /*
3525                  * The node may be a zone cut itself.  If it might be one,
3526                  * make sure we check for it later.
3527                  */
3528                 if (node->find_callback &&
3529                     (node != search.rbtdb->origin_node ||
3530                      IS_STUB(search.rbtdb)) &&
3531                     !dns_rdatatype_atparent(type))
3532                         maybe_zonecut = ISC_TRUE;
3533         }
3534
3535         /*
3536          * Certain DNSSEC types are not subject to CNAME matching
3537          * (RFC4035, section 2.5 and RFC3007).
3538          *
3539          * We don't check for RRSIG, because we don't store RRSIG records
3540          * directly.
3541          */
3542         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3543                 cname_ok = ISC_FALSE;
3544
3545         /*
3546          * We now go looking for rdata...
3547          */
3548
3549         NODE_LOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3550                   isc_rwlocktype_read);
3551
3552         found = NULL;
3553         foundsig = NULL;
3554         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3555         nsecheader = NULL;
3556         nsecsig = NULL;
3557         cnamesig = NULL;
3558         empty_node = ISC_TRUE;
3559         for (header = node->data; header != NULL; header = header_next) {
3560                 header_next = header->next;
3561                 /*
3562                  * Look for an active, extant rdataset.
3563                  */
3564                 do {
3565                         if (header->serial <= search.serial &&
3566                             !IGNORE(header)) {
3567                                 /*
3568                                  * Is this a "this rdataset doesn't
3569                                  * exist" record?
3570                                  */
3571                                 if (NONEXISTENT(header))
3572                                         header = NULL;
3573                                 break;
3574                         } else
3575                                 header = header->down;
3576                 } while (header != NULL);
3577                 if (header != NULL) {
3578                         /*
3579                          * We now know that there is at least one active
3580                          * rdataset at this node.
3581                          */
3582                         empty_node = ISC_FALSE;
3583
3584                         /*
3585                          * Do special zone cut handling, if requested.
3586                          */
3587                         if (maybe_zonecut &&
3588                             header->type == dns_rdatatype_ns) {
3589                                 /*
3590                                  * We increment the reference count on node to
3591                                  * ensure that search->zonecut_rdataset will
3592                                  * still be valid later.
3593                                  */
3594                                 new_reference(search.rbtdb, node);
3595                                 search.zonecut = node;
3596                                 search.zonecut_rdataset = header;
3597                                 search.zonecut_sigrdataset = NULL;
3598                                 search.need_cleanup = ISC_TRUE;
3599                                 maybe_zonecut = ISC_FALSE;
3600                                 at_zonecut = ISC_TRUE;
3601                                 /*
3602                                  * It is not clear if KEY should still be
3603                                  * allowed at the parent side of the zone
3604                                  * cut or not.  It is needed for RFC3007
3605                                  * validated updates.
3606                                  */
3607                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3608                                     && type != dns_rdatatype_nsec
3609                                     && type != dns_rdatatype_key) {
3610                                         /*
3611                                          * Glue is not OK, but any answer we
3612                                          * could return would be glue.  Return
3613                                          * the delegation.
3614                                          */
3615                                         found = NULL;
3616                                         break;
3617                                 }
3618                                 if (found != NULL && foundsig != NULL)
3619                                         break;
3620                         }
3621
3622
3623                         /*
3624                          * If the NSEC3 record doesn't match the chain
3625                          * we are using behave as if it isn't here.
3626                          */
3627                         if (header->type == dns_rdatatype_nsec3 &&
3628                             !matchparams(header, &search))
3629                                 goto partial_match;
3630                         /*
3631                          * If we found a type we were looking for,
3632                          * remember it.
3633                          */
3634                         if (header->type == type ||
3635                             type == dns_rdatatype_any ||
3636                             (header->type == dns_rdatatype_cname &&
3637                              cname_ok)) {
3638                                 /*
3639                                  * We've found the answer!
3640                                  */
3641                                 found = header;
3642                                 if (header->type == dns_rdatatype_cname &&
3643                                     cname_ok) {
3644                                         /*
3645                                          * We may be finding a CNAME instead
3646                                          * of the desired type.
3647                                          *
3648                                          * If we've already got the CNAME RRSIG,
3649                                          * use it, otherwise change sigtype
3650                                          * so that we find it.
3651                                          */
3652                                         if (cnamesig != NULL)
3653                                                 foundsig = cnamesig;
3654                                         else
3655                                                 sigtype =
3656                                                     RBTDB_RDATATYPE_SIGCNAME;
3657                                 }
3658                                 /*
3659                                  * If we've got all we need, end the search.
3660                                  */
3661                                 if (!maybe_zonecut && foundsig != NULL)
3662                                         break;
3663                         } else if (header->type == sigtype) {
3664                                 /*
3665                                  * We've found the RRSIG rdataset for our
3666                                  * target type.  Remember it.
3667                                  */
3668                                 foundsig = header;
3669                                 /*
3670                                  * If we've got all we need, end the search.
3671                                  */
3672                                 if (!maybe_zonecut && found != NULL)
3673                                         break;
3674                         } else if (header->type == dns_rdatatype_nsec &&
3675                                    !search.rbtversion->havensec3) {
3676                                 /*
3677                                  * Remember a NSEC rdataset even if we're
3678                                  * not specifically looking for it, because
3679                                  * we might need it later.
3680                                  */
3681                                 nsecheader = header;
3682                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3683                                    !search.rbtversion->havensec3) {
3684                                 /*
3685                                  * If we need the NSEC rdataset, we'll also
3686                                  * need its signature.
3687                                  */
3688                                 nsecsig = header;
3689                         } else if (cname_ok &&
3690                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3691                                 /*
3692                                  * If we get a CNAME match, we'll also need
3693                                  * its signature.
3694                                  */
3695                                 cnamesig = header;
3696                         }
3697                 }
3698         }
3699
3700         if (empty_node) {
3701                 /*
3702                  * We have an exact match for the name, but there are no
3703                  * active rdatasets in the desired version.  That means that
3704                  * this node doesn't exist in the desired version, and that
3705                  * we really have a partial match.
3706                  */
3707                 if (!wild) {
3708                         lock = &search.rbtdb->node_locks[node->locknum].lock;
3709                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3710                         goto partial_match;
3711                 }
3712         }
3713
3714         /*
3715          * If we didn't find what we were looking for...
3716          */
3717         if (found == NULL) {
3718                 if (search.zonecut != NULL) {
3719                         /*
3720                          * We were trying to find glue at a node beneath a
3721                          * zone cut, but didn't.
3722                          *
3723                          * Return the delegation.
3724                          */
3725                         lock = &search.rbtdb->node_locks[node->locknum].lock;
3726                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3727                         result = setup_delegation(&search, nodep, foundname,
3728                                                   rdataset, sigrdataset);
3729                         goto tree_exit;
3730                 }
3731                 /*
3732                  * The desired type doesn't exist.
3733                  */
3734                 result = DNS_R_NXRRSET;
3735                 if (search.rbtversion->secure == dns_db_secure &&
3736                     !search.rbtversion->havensec3 &&
3737                     (nsecheader == NULL || nsecsig == NULL)) {
3738                         /*
3739                          * The zone is secure but there's no NSEC,
3740                          * or the NSEC has no signature!
3741                          */
3742                         if (!wild) {
3743                                 result = DNS_R_BADDB;
3744                                 goto node_exit;
3745                         }
3746
3747                         lock = &search.rbtdb->node_locks[node->locknum].lock;
3748                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3749                         result = find_closest_nsec(&search, nodep, foundname,
3750                                                    rdataset, sigrdataset,
3751                                                    search.rbtdb->tree,
3752                                                    search.rbtversion->secure);
3753                         if (result == ISC_R_SUCCESS)
3754                                 result = DNS_R_EMPTYWILD;
3755                         goto tree_exit;
3756                 }
3757                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3758                     nsecheader == NULL)
3759                 {
3760                         /*
3761                          * There's no NSEC record, and we were told
3762                          * to find one.
3763                          */
3764                         result = DNS_R_BADDB;
3765                         goto node_exit;
3766                 }
3767                 if (nodep != NULL) {
3768                         new_reference(search.rbtdb, node);
3769                         *nodep = node;
3770                 }
3771                 if ((search.rbtversion->secure == dns_db_secure &&
3772                      !search.rbtversion->havensec3) ||
3773                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3774                 {
3775                         bind_rdataset(search.rbtdb, node, nsecheader,
3776                                       0, rdataset);
3777                         if (nsecsig != NULL)
3778                                 bind_rdataset(search.rbtdb, node,
3779                                               nsecsig, 0, sigrdataset);
3780                 }
3781                 if (wild)
3782                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3783                 goto node_exit;
3784         }
3785
3786         /*
3787          * We found what we were looking for, or we found a CNAME.
3788          */
3789
3790         if (type != found->type &&
3791             type != dns_rdatatype_any &&
3792             found->type == dns_rdatatype_cname) {
3793                 /*
3794                  * We weren't doing an ANY query and we found a CNAME instead
3795                  * of the type we were looking for, so we need to indicate
3796                  * that result to the caller.
3797                  */
3798                 result = DNS_R_CNAME;
3799         } else if (search.zonecut != NULL) {
3800                 /*
3801                  * If we're beneath a zone cut, we must indicate that the
3802                  * result is glue, unless we're actually at the zone cut
3803                  * and the type is NSEC or KEY.
3804                  */
3805                 if (search.zonecut == node) {
3806                         /*
3807                          * It is not clear if KEY should still be
3808                          * allowed at the parent side of the zone
3809                          * cut or not.  It is needed for RFC3007
3810                          * validated updates.
3811                          */
3812                         if (type == dns_rdatatype_nsec ||
3813                             type == dns_rdatatype_nsec3 ||
3814                             type == dns_rdatatype_key)
3815                                 result = ISC_R_SUCCESS;
3816                         else if (type == dns_rdatatype_any)
3817                                 result = DNS_R_ZONECUT;
3818                         else
3819                                 result = DNS_R_GLUE;
3820                 } else
3821                         result = DNS_R_GLUE;
3822                 /*
3823                  * We might have found data that isn't glue, but was occluded
3824                  * by a dynamic update.  If the caller cares about this, they
3825                  * will have told us to validate glue.
3826                  *
3827                  * XXX We should cache the glue validity state!
3828                  */
3829                 if (result == DNS_R_GLUE &&
3830                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3831                     !valid_glue(&search, foundname, type, node)) {
3832                         lock = &search.rbtdb->node_locks[node->locknum].lock;
3833                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3834                         result = setup_delegation(&search, nodep, foundname,
3835                                                   rdataset, sigrdataset);
3836                     goto tree_exit;
3837                 }
3838         } else {
3839                 /*
3840                  * An ordinary successful query!
3841                  */
3842                 result = ISC_R_SUCCESS;
3843         }
3844
3845         if (nodep != NULL) {
3846                 if (!at_zonecut)
3847                         new_reference(search.rbtdb, node);
3848                 else
3849                         search.need_cleanup = ISC_FALSE;
3850                 *nodep = node;
3851         }
3852
3853         if (type != dns_rdatatype_any) {
3854                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3855                 if (foundsig != NULL)
3856                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3857                                       sigrdataset);
3858         }
3859
3860         if (wild)
3861                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3862
3863  node_exit:
3864         NODE_UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3865                     isc_rwlocktype_read);
3866
3867  tree_exit:
3868         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3869
3870         /*
3871          * If we found a zonecut but aren't going to use it, we have to
3872          * let go of it.
3873          */
3874         if (search.need_cleanup) {
3875                 node = search.zonecut;
3876                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3877
3878                 NODE_LOCK(lock, isc_rwlocktype_read);
3879                 decrement_reference(search.rbtdb, node, 0,
3880                                     isc_rwlocktype_read, isc_rwlocktype_none,
3881                                     ISC_FALSE);
3882                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3883         }
3884
3885         if (close_version)
3886                 closeversion(db, &version, ISC_FALSE);
3887
3888         dns_rbtnodechain_reset(&search.chain);
3889
3890         return (result);
3891 }
3892
3893 static isc_result_t
3894 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3895                  isc_stdtime_t now, dns_dbnode_t **nodep,
3896                  dns_name_t *foundname,
3897                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3898 {
3899         UNUSED(db);
3900         UNUSED(name);
3901         UNUSED(options);
3902         UNUSED(now);
3903         UNUSED(nodep);
3904         UNUSED(foundname);
3905         UNUSED(rdataset);
3906         UNUSED(sigrdataset);
3907
3908         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3909
3910         return (ISC_R_NOTIMPLEMENTED);
3911 }
3912
3913 static isc_result_t
3914 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3915         rbtdb_search_t *search = arg;
3916         rdatasetheader_t *header, *header_prev, *header_next;
3917         rdatasetheader_t *dname_header, *sigdname_header;
3918         isc_result_t result;
3919         nodelock_t *lock;
3920         isc_rwlocktype_t locktype;
3921
3922         /* XXX comment */
3923
3924         REQUIRE(search->zonecut == NULL);
3925
3926         /*
3927          * Keep compiler silent.
3928          */
3929         UNUSED(name);
3930
3931         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3932         locktype = isc_rwlocktype_read;
3933         NODE_LOCK(lock, locktype);
3934
3935         /*
3936          * Look for a DNAME or RRSIG DNAME rdataset.
3937          */
3938         dname_header = NULL;
3939         sigdname_header = NULL;
3940         header_prev = NULL;
3941         for (header = node->data; header != NULL; header = header_next) {
3942                 header_next = header->next;
3943                 if (header->rdh_ttl <= search->now) {
3944                         /*
3945                          * This rdataset is stale.  If no one else is
3946                          * using the node, we can clean it up right
3947                          * now, otherwise we mark it as stale, and
3948                          * the node as dirty, so it will get cleaned
3949                          * up later.
3950                          */
3951                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3952                             (locktype == isc_rwlocktype_write ||
3953                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3954                                 /*
3955                                  * We update the node's status only when we
3956                                  * can get write access; otherwise, we leave
3957                                  * others to this work.  Periodical cleaning
3958                                  * will eventually take the job as the last
3959                                  * resort.
3960                                  * We won't downgrade the lock, since other
3961                                  * rdatasets are probably stale, too.
3962                                  */
3963                                 locktype = isc_rwlocktype_write;
3964
3965                                 if (dns_rbtnode_refcurrent(node) == 0) {
3966                                         isc_mem_t *mctx;
3967
3968                                         /*
3969                                          * header->down can be non-NULL if the
3970                                          * refcount has just decremented to 0
3971                                          * but decrement_reference() has not
3972                                          * performed clean_cache_node(), in
3973                                          * which case we need to purge the
3974                                          * stale headers first.
3975                                          */
3976                                         mctx = search->rbtdb->common.mctx;
3977                                         clean_stale_headers(search->rbtdb,
3978                                                             mctx,
3979                                                             header);
3980                                         if (header_prev != NULL)
3981                                                 header_prev->next =
3982                                                         header->next;
3983                                         else
3984                                                 node->data = header->next;
3985                                         free_rdataset(search->rbtdb, mctx,
3986                                                       header);
3987                                 } else {
3988                                         header->attributes |=
3989                                                 RDATASET_ATTR_STALE;
3990                                         node->dirty = 1;
3991                                         header_prev = header;
3992                                 }
3993                         } else
3994                                 header_prev = header;
3995                 } else if (header->type == dns_rdatatype_dname &&
3996                            EXISTS(header)) {
3997                         dname_header = header;
3998                         header_prev = header;
3999                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4000                          EXISTS(header)) {
4001                         sigdname_header = header;
4002                         header_prev = header;
4003                 } else
4004                         header_prev = header;
4005         }
4006
4007         if (dname_header != NULL &&
4008             (!DNS_TRUST_PENDING(dname_header->trust) ||
4009              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4010                 /*
4011                  * We increment the reference count on node to ensure that
4012                  * search->zonecut_rdataset will still be valid later.
4013                  */
4014                 new_reference(search->rbtdb, node);
4015                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4016                 search->zonecut = node;
4017                 search->zonecut_rdataset = dname_header;
4018                 search->zonecut_sigrdataset = sigdname_header;
4019                 search->need_cleanup = ISC_TRUE;
4020                 result = DNS_R_PARTIALMATCH;
4021         } else
4022                 result = DNS_R_CONTINUE;
4023
4024         NODE_UNLOCK(lock, locktype);
4025
4026         return (result);
4027 }
4028
4029 static inline isc_result_t
4030 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4031                      dns_dbnode_t **nodep, dns_name_t *foundname,
4032                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4033 {
4034         unsigned int i;
4035         dns_rbtnode_t *level_node;
4036         rdatasetheader_t *header, *header_prev, *header_next;
4037         rdatasetheader_t *found, *foundsig;
4038         isc_result_t result = ISC_R_NOTFOUND;
4039         dns_name_t name;
4040         dns_rbtdb_t *rbtdb;
4041         isc_boolean_t done;
4042         nodelock_t *lock;
4043         isc_rwlocktype_t locktype;
4044
4045         /*
4046          * Caller must be holding the tree lock.
4047          */
4048
4049         rbtdb = search->rbtdb;
4050         i = search->chain.level_matches;
4051         done = ISC_FALSE;
4052         do {
4053                 locktype = isc_rwlocktype_read;
4054                 lock = &rbtdb->node_locks[node->locknum].lock;
4055                 NODE_LOCK(lock, locktype);
4056
4057                 /*
4058                  * Look for NS and RRSIG NS rdatasets.
4059                  */
4060                 found = NULL;
4061                 foundsig = NULL;
4062                 header_prev = NULL;
4063                 for (header = node->data;
4064                      header != NULL;
4065                      header = header_next) {
4066                         header_next = header->next;
4067                         if (header->rdh_ttl <= search->now) {
4068                                 /*
4069                                  * This rdataset is stale.  If no one else is
4070                                  * using the node, we can clean it up right
4071                                  * now, otherwise we mark it as stale, and
4072                                  * the node as dirty, so it will get cleaned
4073                                  * up later.
4074                                  */
4075                                 if ((header->rdh_ttl <= search->now -
4076                                                     RBTDB_VIRTUAL) &&
4077                                     (locktype == isc_rwlocktype_write ||
4078                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4079                                         /*
4080                                          * We update the node's status only
4081                                          * when we can get write access.
4082                                          */
4083                                         locktype = isc_rwlocktype_write;
4084
4085                                         if (dns_rbtnode_refcurrent(node)
4086                                             == 0) {
4087                                                 isc_mem_t *m;
4088
4089                                                 m = search->rbtdb->common.mctx;
4090                                                 clean_stale_headers(
4091                                                         search->rbtdb,
4092                                                         m, header);
4093                                                 if (header_prev != NULL)
4094                                                         header_prev->next =
4095                                                                 header->next;
4096                                                 else
4097                                                         node->data =
4098                                                                 header->next;
4099                                                 free_rdataset(rbtdb, m,
4100                                                               header);
4101                                         } else {
4102                                                 header->attributes |=
4103                                                         RDATASET_ATTR_STALE;
4104                                                 node->dirty = 1;
4105                                                 header_prev = header;
4106                                         }
4107                                 } else
4108                                         header_prev = header;
4109                         } else if (EXISTS(header)) {
4110                                 /*
4111                                  * We've found an extant rdataset.  See if
4112                                  * we're interested in it.
4113                                  */
4114                                 if (header->type == dns_rdatatype_ns) {
4115                                         found = header;
4116                                         if (foundsig != NULL)
4117                                                 break;
4118                                 } else if (header->type ==
4119                                            RBTDB_RDATATYPE_SIGNS) {
4120                                         foundsig = header;
4121                                         if (found != NULL)
4122                                                 break;
4123                                 }
4124                                 header_prev = header;
4125                         } else
4126                                 header_prev = header;
4127                 }
4128
4129                 if (found != NULL) {
4130                         /*
4131                          * If we have to set foundname, we do it before
4132                          * anything else.  If we were to set foundname after
4133                          * we had set nodep or bound the rdataset, then we'd
4134                          * have to undo that work if dns_name_concatenate()
4135                          * failed.  By setting foundname first, there's
4136                          * nothing to undo if we have trouble.
4137                          */
4138                         if (foundname != NULL) {
4139                                 dns_name_init(&name, NULL);
4140                                 dns_rbt_namefromnode(node, &name);
4141                                 result = dns_name_copy(&name, foundname, NULL);
4142                                 while (result == ISC_R_SUCCESS && i > 0) {
4143                                         i--;
4144                                         level_node = search->chain.levels[i];
4145                                         dns_name_init(&name, NULL);
4146                                         dns_rbt_namefromnode(level_node,
4147                                                              &name);
4148                                         result =
4149                                                 dns_name_concatenate(foundname,
4150                                                                      &name,
4151                                                                      foundname,
4152                                                                      NULL);
4153                                 }
4154                                 if (result != ISC_R_SUCCESS) {
4155                                         *nodep = NULL;
4156                                         goto node_exit;
4157                                 }
4158                         }
4159                         result = DNS_R_DELEGATION;
4160                         if (nodep != NULL) {
4161                                 new_reference(search->rbtdb, node);
4162                                 *nodep = node;
4163                         }
4164                         bind_rdataset(search->rbtdb, node, found, search->now,
4165                                       rdataset);
4166                         if (foundsig != NULL)
4167                                 bind_rdataset(search->rbtdb, node, foundsig,
4168                                               search->now, sigrdataset);
4169                         if (need_headerupdate(found, search->now) ||
4170                             (foundsig != NULL &&
4171                              need_headerupdate(foundsig, search->now))) {
4172                                 if (locktype != isc_rwlocktype_write) {
4173                                         NODE_UNLOCK(lock, locktype);
4174                                         NODE_LOCK(lock, isc_rwlocktype_write);
4175                                         locktype = isc_rwlocktype_write;
4176                                 }
4177                                 if (need_headerupdate(found, search->now))
4178                                         update_header(search->rbtdb, found,
4179                                                       search->now);
4180                                 if (foundsig != NULL &&
4181                                     need_headerupdate(foundsig, search->now)) {
4182                                         update_header(search->rbtdb, foundsig,
4183                                                       search->now);
4184                                 }
4185                         }
4186                 }
4187
4188         node_exit:
4189                 NODE_UNLOCK(lock, locktype);
4190
4191                 if (found == NULL && i > 0) {
4192                         i--;
4193                         node = search->chain.levels[i];
4194                 } else
4195                         done = ISC_TRUE;
4196
4197         } while (!done);
4198
4199         return (result);
4200 }
4201
4202 static isc_result_t
4203 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4204                   isc_stdtime_t now, dns_name_t *foundname,
4205                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4206 {
4207         dns_rbtnode_t *node;
4208         rdatasetheader_t *header, *header_next, *header_prev;
4209         rdatasetheader_t *found, *foundsig;
4210         isc_boolean_t empty_node;
4211         isc_result_t result;
4212         dns_fixedname_t fname, forigin;
4213         dns_name_t *name, *origin;
4214         rbtdb_rdatatype_t matchtype, sigmatchtype;
4215         nodelock_t *lock;
4216         isc_rwlocktype_t locktype;
4217
4218         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4219         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4220                                              dns_rdatatype_nsec);
4221
4222         do {
4223                 node = NULL;
4224                 dns_fixedname_init(&fname);
4225                 name = dns_fixedname_name(&fname);
4226                 dns_fixedname_init(&forigin);
4227                 origin = dns_fixedname_name(&forigin);
4228                 result = dns_rbtnodechain_current(&search->chain, name,
4229                                                   origin, &node);
4230                 if (result != ISC_R_SUCCESS)
4231                         return (result);
4232                 locktype = isc_rwlocktype_read;
4233                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4234                 NODE_LOCK(lock, locktype);
4235                 found = NULL;
4236                 foundsig = NULL;
4237                 empty_node = ISC_TRUE;
4238                 header_prev = NULL;
4239                 for (header = node->data;
4240                      header != NULL;
4241                      header = header_next) {
4242                         header_next = header->next;
4243                         if (header->rdh_ttl <= now) {
4244                                 /*
4245                                  * This rdataset is stale.  If no one else is
4246                                  * using the node, we can clean it up right
4247                                  * now, otherwise we mark it as stale, and the
4248                                  * node as dirty, so it will get cleaned up
4249                                  * later.
4250                                  */
4251                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4252                                     (locktype == isc_rwlocktype_write ||
4253                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4254                                         /*
4255                                          * We update the node's status only
4256                                          * when we can get write access.
4257                                          */
4258                                         locktype = isc_rwlocktype_write;
4259
4260                                         if (dns_rbtnode_refcurrent(node)
4261                                             == 0) {
4262                                                 isc_mem_t *m;
4263
4264                                                 m = search->rbtdb->common.mctx;
4265                                                 clean_stale_headers(
4266                                                         search->rbtdb,
4267                                                         m, header);
4268                                                 if (header_prev != NULL)
4269                                                         header_prev->next =
4270                                                                 header->next;
4271                                                 else
4272                                                         node->data = header->next;
4273                                                 free_rdataset(search->rbtdb, m,
4274                                                               header);
4275                                         } else {
4276                                                 header->attributes |=
4277                                                         RDATASET_ATTR_STALE;
4278                                                 node->dirty = 1;
4279                                                 header_prev = header;
4280                                         }
4281                                 } else
4282                                         header_prev = header;
4283                                 continue;
4284                         }
4285                         if (NONEXISTENT(header) ||
4286                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4287                                 header_prev = header;
4288                                 continue;
4289                         }
4290                         empty_node = ISC_FALSE;
4291                         if (header->type == matchtype)
4292                                 found = header;
4293                         else if (header->type == sigmatchtype)
4294                                 foundsig = header;
4295                         header_prev = header;
4296                 }
4297                 if (found != NULL) {
4298                         result = dns_name_concatenate(name, origin,
4299                                                       foundname, NULL);
4300                         if (result != ISC_R_SUCCESS)
4301                                 goto unlock_node;
4302                         bind_rdataset(search->rbtdb, node, found,
4303                                       now, rdataset);
4304                         if (foundsig != NULL)
4305                                 bind_rdataset(search->rbtdb, node, foundsig,
4306                                               now, sigrdataset);
4307                         new_reference(search->rbtdb, node);
4308                         *nodep = node;
4309                         result = DNS_R_COVERINGNSEC;
4310                 } else if (!empty_node) {
4311                         result = ISC_R_NOTFOUND;
4312                 } else
4313                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4314                                                        NULL);
4315  unlock_node:
4316                 NODE_UNLOCK(lock, locktype);
4317         } while (empty_node && result == ISC_R_SUCCESS);
4318         return (result);
4319 }
4320
4321 static isc_result_t
4322 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4323            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4324            dns_dbnode_t **nodep, dns_name_t *foundname,
4325            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4326 {
4327         dns_rbtnode_t *node = NULL;
4328         isc_result_t result;
4329         rbtdb_search_t search;
4330         isc_boolean_t cname_ok = ISC_TRUE;
4331         isc_boolean_t empty_node;
4332         nodelock_t *lock;
4333         isc_rwlocktype_t locktype;
4334         rdatasetheader_t *header, *header_prev, *header_next;
4335         rdatasetheader_t *found, *nsheader;
4336         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4337         rdatasetheader_t *update, *updatesig;
4338         rbtdb_rdatatype_t sigtype, negtype;
4339
4340         UNUSED(version);
4341
4342         search.rbtdb = (dns_rbtdb_t *)db;
4343
4344         REQUIRE(VALID_RBTDB(search.rbtdb));
4345         REQUIRE(version == NULL);
4346
4347         if (now == 0)
4348                 isc_stdtime_get(&now);
4349
4350         search.rbtversion = NULL;
4351         search.serial = 1;
4352         search.options = options;
4353         search.copy_name = ISC_FALSE;
4354         search.need_cleanup = ISC_FALSE;
4355         search.wild = ISC_FALSE;
4356         search.zonecut = NULL;
4357         dns_fixedname_init(&search.zonecut_name);
4358         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4359         search.now = now;
4360         update = NULL;
4361         updatesig = NULL;
4362
4363         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4364
4365         /*
4366          * Search down from the root of the tree.  If, while going down, we
4367          * encounter a callback node, cache_zonecut_callback() will search the
4368          * rdatasets at the zone cut for a DNAME rdataset.
4369          */
4370         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4371                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4372                                   cache_zonecut_callback, &search);
4373
4374         if (result == DNS_R_PARTIALMATCH) {
4375                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4376                         result = find_coveringnsec(&search, nodep, now,
4377                                                    foundname, rdataset,
4378                                                    sigrdataset);
4379                         if (result == DNS_R_COVERINGNSEC)
4380                                 goto tree_exit;
4381                 }
4382                 if (search.zonecut != NULL) {
4383                     result = setup_delegation(&search, nodep, foundname,
4384                                               rdataset, sigrdataset);
4385                     goto tree_exit;
4386                 } else {
4387                 find_ns:
4388                         result = find_deepest_zonecut(&search, node, nodep,
4389                                                       foundname, rdataset,
4390                                                       sigrdataset);
4391                         goto tree_exit;
4392                 }
4393         } else if (result != ISC_R_SUCCESS)
4394                 goto tree_exit;
4395
4396         /*
4397          * Certain DNSSEC types are not subject to CNAME matching
4398          * (RFC4035, section 2.5 and RFC3007).
4399          *
4400          * We don't check for RRSIG, because we don't store RRSIG records
4401          * directly.
4402          */
4403         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4404                 cname_ok = ISC_FALSE;
4405
4406         /*
4407          * We now go looking for rdata...
4408          */
4409
4410         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4411         locktype = isc_rwlocktype_read;
4412         NODE_LOCK(lock, locktype);
4413
4414         found = NULL;
4415         foundsig = NULL;
4416         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4417         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4418         nsheader = NULL;
4419         nssig = NULL;
4420         cnamesig = NULL;
4421         empty_node = ISC_TRUE;
4422         header_prev = NULL;
4423         for (header = node->data; header != NULL; header = header_next) {
4424                 header_next = header->next;
4425                 if (header->rdh_ttl <= now) {
4426                         /*
4427                          * This rdataset is stale.  If no one else is using the
4428                          * node, we can clean it up right now, otherwise we
4429                          * mark it as stale, and the node as dirty, so it will
4430                          * get cleaned up later.
4431                          */
4432                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4433                             (locktype == isc_rwlocktype_write ||
4434                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4435                                 /*
4436                                  * We update the node's status only when we
4437                                  * can get write access.
4438                                  */
4439                                 locktype = isc_rwlocktype_write;
4440
4441                                 if (dns_rbtnode_refcurrent(node) == 0) {
4442                                         isc_mem_t *mctx;
4443
4444                                         mctx = search.rbtdb->common.mctx;
4445                                         clean_stale_headers(search.rbtdb, mctx,
4446                                                             header);
4447                                         if (header_prev != NULL)
4448                                                 header_prev->next =
4449                                                         header->next;
4450                                         else
4451                                                 node->data = header->next;
4452                                         free_rdataset(search.rbtdb, mctx,
4453                                                       header);
4454                                 } else {
4455                                         header->attributes |=
4456                                                 RDATASET_ATTR_STALE;
4457                                         node->dirty = 1;
4458                                         header_prev = header;
4459                                 }
4460                         } else
4461                                 header_prev = header;
4462                 } else if (EXISTS(header)) {
4463                         /*
4464                          * We now know that there is at least one active
4465                          * non-stale rdataset at this node.
4466                          */
4467                         empty_node = ISC_FALSE;
4468
4469                         /*
4470                          * If we found a type we were looking for, remember
4471                          * it.
4472                          */
4473                         if (header->type == type ||
4474                             (type == dns_rdatatype_any &&
4475                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4476                             (cname_ok && header->type ==
4477                              dns_rdatatype_cname)) {
4478                                 /*
4479                                  * We've found the answer.
4480                                  */
4481                                 found = header;
4482                                 if (header->type == dns_rdatatype_cname &&
4483                                     cname_ok &&
4484                                     cnamesig != NULL) {
4485                                         /*
4486                                          * If we've already got the CNAME RRSIG,
4487                                          * use it, otherwise change sigtype
4488                                          * so that we find it.
4489                                          */
4490                                         if (cnamesig != NULL)
4491                                                 foundsig = cnamesig;
4492                                         else
4493                                                 sigtype =
4494                                                     RBTDB_RDATATYPE_SIGCNAME;
4495                                         foundsig = cnamesig;
4496                                 }
4497                         } else if (header->type == sigtype) {
4498                                 /*
4499                                  * We've found the RRSIG rdataset for our
4500                                  * target type.  Remember it.
4501                                  */
4502                                 foundsig = header;
4503                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4504                                    header->type == negtype) {
4505                                 /*
4506                                  * We've found a negative cache entry.
4507                                  */
4508                                 found = header;
4509                         } else if (header->type == dns_rdatatype_ns) {
4510                                 /*
4511                                  * Remember a NS rdataset even if we're
4512                                  * not specifically looking for it, because
4513                                  * we might need it later.
4514                                  */
4515                                 nsheader = header;
4516                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4517                                 /*
4518                                  * If we need the NS rdataset, we'll also
4519                                  * need its signature.
4520                                  */
4521                                 nssig = header;
4522                         } else if (cname_ok &&
4523                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4524                                 /*
4525                                  * If we get a CNAME match, we'll also need
4526                                  * its signature.
4527                                  */
4528                                 cnamesig = header;
4529                         }
4530                         header_prev = header;
4531                 } else
4532                         header_prev = header;
4533         }
4534
4535         if (empty_node) {
4536                 /*
4537                  * We have an exact match for the name, but there are no
4538                  * extant rdatasets.  That means that this node doesn't
4539                  * meaningfully exist, and that we really have a partial match.
4540                  */
4541                 NODE_UNLOCK(lock, locktype);
4542                 goto find_ns;
4543         }
4544
4545         /*
4546          * If we didn't find what we were looking for...
4547          */
4548         if (found == NULL ||
4549             (found->trust == dns_trust_glue &&
4550              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4551             (DNS_TRUST_PENDING(found->trust) &&
4552              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4553                 /*
4554                  * If there is an NS rdataset at this node, then this is the
4555                  * deepest zone cut.
4556                  */
4557                 if (nsheader != NULL) {
4558                         if (nodep != NULL) {
4559                                 new_reference(search.rbtdb, node);
4560                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4561                                 *nodep = node;
4562                         }
4563                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4564                                       rdataset);
4565                         if (need_headerupdate(nsheader, search.now))
4566                                 update = nsheader;
4567                         if (nssig != NULL) {
4568                                 bind_rdataset(search.rbtdb, node, nssig,
4569                                               search.now, sigrdataset);
4570                                 if (need_headerupdate(nssig, search.now))
4571                                         updatesig = nssig;
4572                         }
4573                         result = DNS_R_DELEGATION;
4574                         goto node_exit;
4575                 }
4576
4577                 /*
4578                  * Go find the deepest zone cut.
4579                  */
4580                 NODE_UNLOCK(lock, locktype);
4581                 goto find_ns;
4582         }
4583
4584         /*
4585          * We found what we were looking for, or we found a CNAME.
4586          */
4587
4588         if (nodep != NULL) {
4589                 new_reference(search.rbtdb, node);
4590                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4591                 *nodep = node;
4592         }
4593
4594         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4595                 /*
4596                  * We found a negative cache entry.
4597                  */
4598                 if (NXDOMAIN(found))
4599                         result = DNS_R_NCACHENXDOMAIN;
4600                 else
4601                         result = DNS_R_NCACHENXRRSET;
4602         } else if (type != found->type &&
4603                    type != dns_rdatatype_any &&
4604                    found->type == dns_rdatatype_cname) {
4605                 /*
4606                  * We weren't doing an ANY query and we found a CNAME instead
4607                  * of the type we were looking for, so we need to indicate
4608                  * that result to the caller.
4609                  */
4610                 result = DNS_R_CNAME;
4611         } else {
4612                 /*
4613                  * An ordinary successful query!
4614                  */
4615                 result = ISC_R_SUCCESS;
4616         }
4617
4618         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4619             result == DNS_R_NCACHENXRRSET) {
4620                 bind_rdataset(search.rbtdb, node, found, search.now,
4621                               rdataset);
4622                 if (need_headerupdate(found, search.now))
4623                         update = found;
4624                 if (foundsig != NULL) {
4625                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4626                                       sigrdataset);
4627                         if (need_headerupdate(foundsig, search.now))
4628                                 updatesig = foundsig;
4629                 }
4630         }
4631
4632  node_exit:
4633         if ((update != NULL || updatesig != NULL) &&
4634             locktype != isc_rwlocktype_write) {
4635                 NODE_UNLOCK(lock, locktype);
4636                 NODE_LOCK(lock, isc_rwlocktype_write);
4637                 locktype = isc_rwlocktype_write;
4638         }
4639         if (update != NULL && need_headerupdate(update, search.now))
4640                 update_header(search.rbtdb, update, search.now);
4641         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4642                 update_header(search.rbtdb, updatesig, search.now);
4643
4644         NODE_UNLOCK(lock, locktype);
4645
4646  tree_exit:
4647         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4648
4649         /*
4650          * If we found a zonecut but aren't going to use it, we have to
4651          * let go of it.
4652          */
4653         if (search.need_cleanup) {
4654                 node = search.zonecut;
4655                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4656
4657                 NODE_LOCK(lock, isc_rwlocktype_read);
4658                 decrement_reference(search.rbtdb, node, 0,
4659                                     isc_rwlocktype_read, isc_rwlocktype_none,
4660                                     ISC_FALSE);
4661                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4662         }
4663
4664         dns_rbtnodechain_reset(&search.chain);
4665
4666         return (result);
4667 }
4668
4669 static isc_result_t
4670 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4671                   isc_stdtime_t now, dns_dbnode_t **nodep,
4672                   dns_name_t *foundname,
4673                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4674 {
4675         dns_rbtnode_t *node = NULL;
4676         nodelock_t *lock;
4677         isc_result_t result;
4678         rbtdb_search_t search;
4679         rdatasetheader_t *header, *header_prev, *header_next;
4680         rdatasetheader_t *found, *foundsig;
4681         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4682         isc_rwlocktype_t locktype;
4683
4684         search.rbtdb = (dns_rbtdb_t *)db;
4685
4686         REQUIRE(VALID_RBTDB(search.rbtdb));
4687
4688         if (now == 0)
4689                 isc_stdtime_get(&now);
4690
4691         search.rbtversion = NULL;
4692         search.serial = 1;
4693         search.options = options;
4694         search.copy_name = ISC_FALSE;
4695         search.need_cleanup = ISC_FALSE;
4696         search.wild = ISC_FALSE;
4697         search.zonecut = NULL;
4698         dns_fixedname_init(&search.zonecut_name);
4699         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4700         search.now = now;
4701
4702         if ((options & DNS_DBFIND_NOEXACT) != 0)
4703                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4704
4705         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4706
4707         /*
4708          * Search down from the root of the tree.
4709          */
4710         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4711                                   &search.chain, rbtoptions, NULL, &search);
4712
4713         if (result == DNS_R_PARTIALMATCH) {
4714         find_ns:
4715                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4716                                               rdataset, sigrdataset);
4717                 goto tree_exit;
4718         } else if (result != ISC_R_SUCCESS)
4719                 goto tree_exit;
4720
4721         /*
4722          * We now go looking for an NS rdataset at the node.
4723          */
4724
4725         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4726         locktype = isc_rwlocktype_read;
4727         NODE_LOCK(lock, locktype);
4728
4729         found = NULL;
4730         foundsig = NULL;
4731         header_prev = NULL;
4732         for (header = node->data; header != NULL; header = header_next) {
4733                 header_next = header->next;
4734                 if (header->rdh_ttl <= now) {
4735                         /*
4736                          * This rdataset is stale.  If no one else is using the
4737                          * node, we can clean it up right now, otherwise we
4738                          * mark it as stale, and the node as dirty, so it will
4739                          * get cleaned up later.
4740                          */
4741                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4742                             (locktype == isc_rwlocktype_write ||
4743                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4744                                 /*
4745                                  * We update the node's status only when we
4746                                  * can get write access.
4747                                  */
4748                                 locktype = isc_rwlocktype_write;
4749
4750                                 if (dns_rbtnode_refcurrent(node) == 0) {
4751                                         isc_mem_t *mctx;
4752
4753                                         mctx = search.rbtdb->common.mctx;
4754                                         clean_stale_headers(search.rbtdb, mctx,
4755                                                             header);
4756                                         if (header_prev != NULL)
4757                                                 header_prev->next =
4758                                                         header->next;
4759                                         else
4760                                                 node->data = header->next;
4761                                         free_rdataset(search.rbtdb, mctx,
4762                                                       header);
4763                                 } else {
4764                                         header->attributes |=
4765                                                 RDATASET_ATTR_STALE;
4766                                         node->dirty = 1;
4767                                         header_prev = header;
4768                                 }
4769                         } else
4770                                 header_prev = header;
4771                 } else if (EXISTS(header)) {
4772                         /*
4773                          * If we found a type we were looking for, remember
4774                          * it.
4775                          */
4776                         if (header->type == dns_rdatatype_ns) {
4777                                 /*
4778                                  * Remember a NS rdataset even if we're
4779                                  * not specifically looking for it, because
4780                                  * we might need it later.
4781                                  */
4782                                 found = header;
4783                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4784                                 /*
4785                                  * If we need the NS rdataset, we'll also
4786                                  * need its signature.
4787                                  */
4788                                 foundsig = header;
4789                         }
4790                         header_prev = header;
4791                 } else
4792                         header_prev = header;
4793         }
4794
4795         if (found == NULL) {
4796                 /*
4797                  * No NS records here.
4798                  */
4799                 NODE_UNLOCK(lock, locktype);
4800                 goto find_ns;
4801         }
4802
4803         if (nodep != NULL) {
4804                 new_reference(search.rbtdb, node);
4805                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4806                 *nodep = node;
4807         }
4808
4809         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4810         if (foundsig != NULL)
4811                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4812                               sigrdataset);
4813
4814         if (need_headerupdate(found, search.now) ||
4815             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4816                 if (locktype != isc_rwlocktype_write) {
4817                         NODE_UNLOCK(lock, locktype);
4818                         NODE_LOCK(lock, isc_rwlocktype_write);
4819                         locktype = isc_rwlocktype_write;
4820                 }
4821                 if (need_headerupdate(found, search.now))
4822                         update_header(search.rbtdb, found, search.now);
4823                 if (foundsig != NULL &&
4824                     need_headerupdate(foundsig, search.now)) {
4825                         update_header(search.rbtdb, foundsig, search.now);
4826                 }
4827         }
4828
4829         NODE_UNLOCK(lock, locktype);
4830
4831  tree_exit:
4832         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4833
4834         INSIST(!search.need_cleanup);
4835
4836         dns_rbtnodechain_reset(&search.chain);
4837
4838         if (result == DNS_R_DELEGATION)
4839                 result = ISC_R_SUCCESS;
4840
4841         return (result);
4842 }
4843
4844 static void
4845 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4846         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4847         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4848         unsigned int refs;
4849
4850         REQUIRE(VALID_RBTDB(rbtdb));
4851         REQUIRE(targetp != NULL && *targetp == NULL);
4852
4853         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4854         dns_rbtnode_refincrement(node, &refs);
4855         INSIST(refs != 0);
4856         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4857
4858         *targetp = source;
4859 }
4860
4861 static void
4862 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4863         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4864         dns_rbtnode_t *node;
4865         isc_boolean_t want_free = ISC_FALSE;
4866         isc_boolean_t inactive = ISC_FALSE;
4867         rbtdb_nodelock_t *nodelock;
4868
4869         REQUIRE(VALID_RBTDB(rbtdb));
4870         REQUIRE(targetp != NULL && *targetp != NULL);
4871
4872         node = (dns_rbtnode_t *)(*targetp);
4873         nodelock = &rbtdb->node_locks[node->locknum];
4874
4875         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4876
4877         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4878                                 isc_rwlocktype_none, ISC_FALSE)) {
4879                 if (isc_refcount_current(&nodelock->references) == 0 &&
4880                     nodelock->exiting) {
4881                         inactive = ISC_TRUE;
4882                 }
4883         }
4884
4885         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4886
4887         *targetp = NULL;
4888
4889         if (inactive) {
4890                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4891                 rbtdb->active--;
4892                 if (rbtdb->active == 0)
4893                         want_free = ISC_TRUE;
4894                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4895                 if (want_free) {
4896                         char buf[DNS_NAME_FORMATSIZE];
4897                         if (dns_name_dynamic(&rbtdb->common.origin))
4898                                 dns_name_format(&rbtdb->common.origin, buf,
4899                                                 sizeof(buf));
4900                         else
4901                                 strcpy(buf, "<UNKNOWN>");
4902                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4903                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4904                                       "calling free_rbtdb(%s)", buf);
4905                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4906                 }
4907         }
4908 }
4909
4910 static isc_result_t
4911 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4912         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4913         dns_rbtnode_t *rbtnode = node;
4914         rdatasetheader_t *header;
4915         isc_boolean_t force_expire = ISC_FALSE;
4916         /*
4917          * These are the category and module used by the cache cleaner.
4918          */
4919         isc_boolean_t log = ISC_FALSE;
4920         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4921         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4922         int level = ISC_LOG_DEBUG(2);
4923         char printname[DNS_NAME_FORMATSIZE];
4924
4925         REQUIRE(VALID_RBTDB(rbtdb));
4926
4927         /*
4928          * Caller must hold a tree lock.
4929          */
4930
4931         if (now == 0)
4932                 isc_stdtime_get(&now);
4933
4934         if (rbtdb->overmem) {
4935                 isc_uint32_t val;
4936
4937                 isc_random_get(&val);
4938                 /*
4939                  * XXXDCL Could stand to have a better policy, like LRU.
4940                  */
4941                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4942
4943                 /*
4944                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
4945                  * rbtdb->overmem can currently only be true for cache
4946                  * databases -- hence all of the "overmem cache" log strings.
4947                  */
4948                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4949                 if (log)
4950                         isc_log_write(dns_lctx, category, module, level,
4951                                       "overmem cache: %s %s",
4952                                       force_expire ? "FORCE" : "check",
4953                                       dns_rbt_formatnodename(rbtnode,
4954                                                            printname,
4955                                                            sizeof(printname)));
4956         }
4957
4958         /*
4959          * We may not need write access, but this code path is not performance
4960          * sensitive, so it should be okay to always lock as a writer.
4961          */
4962         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4963                   isc_rwlocktype_write);
4964
4965         for (header = rbtnode->data; header != NULL; header = header->next)
4966                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4967                         /*
4968                          * We don't check if refcurrent(rbtnode) == 0 and try
4969                          * to free like we do in cache_find(), because
4970                          * refcurrent(rbtnode) must be non-zero.  This is so
4971                          * because 'node' is an argument to the function.
4972                          */
4973                         header->attributes |= RDATASET_ATTR_STALE;
4974                         rbtnode->dirty = 1;
4975                         if (log)
4976                                 isc_log_write(dns_lctx, category, module,
4977                                               level, "overmem cache: stale %s",
4978                                               printname);
4979                 } else if (force_expire) {
4980                         if (! RETAIN(header)) {
4981                                 set_ttl(rbtdb, header, 0);
4982                                 header->attributes |= RDATASET_ATTR_STALE;
4983                                 rbtnode->dirty = 1;
4984                         } else if (log) {
4985                                 isc_log_write(dns_lctx, category, module,
4986                                               level, "overmem cache: "
4987                                               "reprieve by RETAIN() %s",
4988                                               printname);
4989                         }
4990                 } else if (rbtdb->overmem && log)
4991                         isc_log_write(dns_lctx, category, module, level,
4992                                       "overmem cache: saved %s", printname);
4993
4994         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4995                     isc_rwlocktype_write);
4996
4997         return (ISC_R_SUCCESS);
4998 }
4999
5000 static void
5001 overmem(dns_db_t *db, isc_boolean_t overmem) {
5002         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5003
5004         if (IS_CACHE(rbtdb))
5005                 rbtdb->overmem = overmem;
5006 }
5007
5008 static void
5009 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5010         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5011         dns_rbtnode_t *rbtnode = node;
5012         isc_boolean_t first;
5013
5014         REQUIRE(VALID_RBTDB(rbtdb));
5015
5016         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5017                   isc_rwlocktype_read);
5018
5019         fprintf(out, "node %p, %u references, locknum = %u\n",
5020                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5021                 rbtnode->locknum);
5022         if (rbtnode->data != NULL) {
5023                 rdatasetheader_t *current, *top_next;
5024
5025                 for (current = rbtnode->data; current != NULL;
5026                      current = top_next) {
5027                         top_next = current->next;
5028                         first = ISC_TRUE;
5029                         fprintf(out, "\ttype %u", current->type);
5030                         do {
5031                                 if (!first)
5032                                         fprintf(out, "\t");
5033                                 first = ISC_FALSE;
5034                                 fprintf(out,
5035                                         "\tserial = %lu, ttl = %u, "
5036                                         "trust = %u, attributes = %u, "
5037                                         "resign = %u\n",
5038                                         (unsigned long)current->serial,
5039                                         current->rdh_ttl,
5040                                         current->trust,
5041                                         current->attributes,
5042                                         current->resign);
5043                                 current = current->down;
5044                         } while (current != NULL);
5045                 }
5046         } else
5047                 fprintf(out, "(empty)\n");
5048
5049         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5050                     isc_rwlocktype_read);
5051 }
5052
5053 static isc_result_t
5054 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5055 {
5056         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5057         rbtdb_dbiterator_t *rbtdbiter;
5058
5059         REQUIRE(VALID_RBTDB(rbtdb));
5060
5061         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5062         if (rbtdbiter == NULL)
5063                 return (ISC_R_NOMEMORY);
5064
5065         rbtdbiter->common.methods = &dbiterator_methods;
5066         rbtdbiter->common.db = NULL;
5067         dns_db_attach(db, &rbtdbiter->common.db);
5068         rbtdbiter->common.relative_names =
5069                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5070         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5071         rbtdbiter->common.cleaning = ISC_FALSE;
5072         rbtdbiter->paused = ISC_TRUE;
5073         rbtdbiter->tree_locked = isc_rwlocktype_none;
5074         rbtdbiter->result = ISC_R_SUCCESS;
5075         dns_fixedname_init(&rbtdbiter->name);
5076         dns_fixedname_init(&rbtdbiter->origin);
5077         rbtdbiter->node = NULL;
5078         rbtdbiter->delete = 0;
5079         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5080         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5081         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5082         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5083         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5084         if (rbtdbiter->nsec3only)
5085                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5086         else
5087                 rbtdbiter->current = &rbtdbiter->chain;
5088
5089         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5090
5091         return (ISC_R_SUCCESS);
5092 }
5093
5094 static isc_result_t
5095 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5096                   dns_rdatatype_t type, dns_rdatatype_t covers,
5097                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5098                   dns_rdataset_t *sigrdataset)
5099 {
5100         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5101         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5102         rdatasetheader_t *header, *header_next, *found, *foundsig;
5103         rbtdb_serial_t serial;
5104         rbtdb_version_t *rbtversion = version;
5105         isc_boolean_t close_version = ISC_FALSE;
5106         rbtdb_rdatatype_t matchtype, sigmatchtype;
5107
5108         REQUIRE(VALID_RBTDB(rbtdb));
5109         REQUIRE(type != dns_rdatatype_any);
5110
5111         if (rbtversion == NULL) {
5112                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5113                 close_version = ISC_TRUE;
5114         }
5115         serial = rbtversion->serial;
5116         now = 0;
5117
5118         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5119                   isc_rwlocktype_read);
5120
5121         found = NULL;
5122         foundsig = NULL;
5123         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5124         if (covers == 0)
5125                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5126         else
5127                 sigmatchtype = 0;
5128
5129         for (header = rbtnode->data; header != NULL; header = header_next) {
5130                 header_next = header->next;
5131                 do {
5132                         if (header->serial <= serial &&
5133                             !IGNORE(header)) {
5134                                 /*
5135                                  * Is this a "this rdataset doesn't
5136                                  * exist" record?
5137                                  */
5138                                 if (NONEXISTENT(header))
5139                                         header = NULL;
5140                                 break;
5141                         } else
5142                                 header = header->down;
5143                 } while (header != NULL);
5144                 if (header != NULL) {
5145                         /*
5146                          * We have an active, extant rdataset.  If it's a
5147                          * type we're looking for, remember it.
5148                          */
5149                         if (header->type == matchtype) {
5150                                 found = header;
5151                                 if (foundsig != NULL)
5152                                         break;
5153                         } else if (header->type == sigmatchtype) {
5154                                 foundsig = header;
5155                                 if (found != NULL)
5156                                         break;
5157                         }
5158                 }
5159         }
5160         if (found != NULL) {
5161                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5162                 if (foundsig != NULL)
5163                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5164                                       sigrdataset);
5165         }
5166
5167         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5168                     isc_rwlocktype_read);
5169
5170         if (close_version)
5171                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5172                              ISC_FALSE);
5173
5174         if (found == NULL)
5175                 return (ISC_R_NOTFOUND);
5176
5177         return (ISC_R_SUCCESS);
5178 }
5179
5180 static isc_result_t
5181 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5182                    dns_rdatatype_t type, dns_rdatatype_t covers,
5183                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5184                    dns_rdataset_t *sigrdataset)
5185 {
5186         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5187         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5188         rdatasetheader_t *header, *header_next, *found, *foundsig;
5189         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5190         isc_result_t result;
5191         nodelock_t *lock;
5192         isc_rwlocktype_t locktype;
5193
5194         REQUIRE(VALID_RBTDB(rbtdb));
5195         REQUIRE(type != dns_rdatatype_any);
5196
5197         UNUSED(version);
5198
5199         result = ISC_R_SUCCESS;
5200
5201         if (now == 0)
5202                 isc_stdtime_get(&now);
5203
5204         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5205         locktype = isc_rwlocktype_read;
5206         NODE_LOCK(lock, locktype);
5207
5208         found = NULL;
5209         foundsig = NULL;
5210         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5211         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5212         if (covers == 0)
5213                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5214         else
5215                 sigmatchtype = 0;
5216
5217         for (header = rbtnode->data; header != NULL; header = header_next) {
5218                 header_next = header->next;
5219                 if (header->rdh_ttl <= now) {
5220                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5221                             (locktype == isc_rwlocktype_write ||
5222                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5223                                 /*
5224                                  * We update the node's status only when we
5225                                  * can get write access.
5226                                  */
5227                                 locktype = isc_rwlocktype_write;
5228
5229                                 /*
5230                                  * We don't check if refcurrent(rbtnode) == 0
5231                                  * and try to free like we do in cache_find(),
5232                                  * because refcurrent(rbtnode) must be
5233                                  * non-zero.  This is so because 'node' is an
5234                                  * argument to the function.
5235                                  */
5236                                 header->attributes |= RDATASET_ATTR_STALE;
5237                                 rbtnode->dirty = 1;
5238                         }
5239                 } else if (EXISTS(header)) {
5240                         if (header->type == matchtype)
5241                                 found = header;
5242                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5243                                  header->type == negtype)
5244                                 found = header;
5245                         else if (header->type == sigmatchtype)
5246                                 foundsig = header;
5247                 }
5248         }
5249         if (found != NULL) {
5250                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5251                 if (foundsig != NULL)
5252                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5253                                       sigrdataset);
5254         }
5255
5256         NODE_UNLOCK(lock, locktype);
5257
5258         if (found == NULL)
5259                 return (ISC_R_NOTFOUND);
5260
5261         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5262                 /*
5263                  * We found a negative cache entry.
5264                  */
5265                 if (NXDOMAIN(found))
5266                         result = DNS_R_NCACHENXDOMAIN;
5267                 else
5268                         result = DNS_R_NCACHENXRRSET;
5269         }
5270
5271         return (result);
5272 }
5273
5274 static isc_result_t
5275 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5276              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5277 {
5278         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5279         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5280         rbtdb_version_t *rbtversion = version;
5281         rbtdb_rdatasetiter_t *iterator;
5282         unsigned int refs;
5283
5284         REQUIRE(VALID_RBTDB(rbtdb));
5285
5286         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5287         if (iterator == NULL)
5288                 return (ISC_R_NOMEMORY);
5289
5290         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5291                 now = 0;
5292                 if (rbtversion == NULL)
5293                         currentversion(db,
5294                                  (dns_dbversion_t **) (void *)(&rbtversion));
5295                 else {
5296                         unsigned int refs;
5297
5298                         isc_refcount_increment(&rbtversion->references,
5299                                                &refs);
5300                         INSIST(refs > 1);
5301                 }
5302         } else {
5303                 if (now == 0)
5304                         isc_stdtime_get(&now);
5305                 rbtversion = NULL;
5306         }
5307
5308         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5309         iterator->common.methods = &rdatasetiter_methods;
5310         iterator->common.db = db;
5311         iterator->common.node = node;
5312         iterator->common.version = (dns_dbversion_t *)rbtversion;
5313         iterator->common.now = now;
5314
5315         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5316
5317         dns_rbtnode_refincrement(rbtnode, &refs);
5318         INSIST(refs != 0);
5319
5320         iterator->current = NULL;
5321
5322         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5323
5324         *iteratorp = (dns_rdatasetiter_t *)iterator;
5325
5326         return (ISC_R_SUCCESS);
5327 }
5328
5329 static isc_boolean_t
5330 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5331         rdatasetheader_t *header, *header_next;
5332         isc_boolean_t cname, other_data;
5333         dns_rdatatype_t rdtype;
5334
5335         /*
5336          * The caller must hold the node lock.
5337          */
5338
5339         /*
5340          * Look for CNAME and "other data" rdatasets active in our version.
5341          */
5342         cname = ISC_FALSE;
5343         other_data = ISC_FALSE;
5344         for (header = node->data; header != NULL; header = header_next) {
5345                 header_next = header->next;
5346                 if (header->type == dns_rdatatype_cname) {
5347                         /*
5348                          * Look for an active extant CNAME.
5349                          */
5350                         do {
5351                                 if (header->serial <= serial &&
5352                                     !IGNORE(header)) {
5353                                         /*
5354                                          * Is this a "this rdataset doesn't
5355                                          * exist" record?
5356                                          */
5357                                         if (NONEXISTENT(header))
5358                                                 header = NULL;
5359                                         break;
5360                                 } else
5361                                         header = header->down;
5362                         } while (header != NULL);
5363                         if (header != NULL)
5364                                 cname = ISC_TRUE;
5365                 } else {
5366                         /*
5367                          * Look for active extant "other data".
5368                          *
5369                          * "Other data" is any rdataset whose type is not
5370                          * KEY, NSEC, SIG or RRSIG.
5371                          */
5372                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5373                         if (rdtype != dns_rdatatype_key &&
5374                             rdtype != dns_rdatatype_sig &&
5375                             rdtype != dns_rdatatype_nsec &&
5376                             rdtype != dns_rdatatype_rrsig) {
5377                                 /*
5378                                  * Is it active and extant?
5379                                  */
5380                                 do {
5381                                         if (header->serial <= serial &&
5382                                             !IGNORE(header)) {
5383                                                 /*
5384                                                  * Is this a "this rdataset
5385                                                  * doesn't exist" record?
5386                                                  */
5387                                                 if (NONEXISTENT(header))
5388                                                         header = NULL;
5389                                                 break;
5390                                         } else
5391                                                 header = header->down;
5392                                 } while (header != NULL);
5393                                 if (header != NULL)
5394                                         other_data = ISC_TRUE;
5395                         }
5396                 }
5397         }
5398
5399         if (cname && other_data)
5400                 return (ISC_TRUE);
5401
5402         return (ISC_FALSE);
5403 }
5404
5405 static isc_result_t
5406 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5407         isc_result_t result;
5408
5409         INSIST(newheader->heap_index == 0);
5410         INSIST(!ISC_LINK_LINKED(newheader, lru_link));
5411         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5412         return (result);
5413 }
5414
5415 static isc_result_t
5416 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5417     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5418     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5419 {
5420         rbtdb_changed_t *changed = NULL;
5421         rdatasetheader_t *topheader, *topheader_prev, *header;
5422         unsigned char *merged;
5423         isc_result_t result;
5424         isc_boolean_t header_nx;
5425         isc_boolean_t newheader_nx;
5426         isc_boolean_t merge;
5427         dns_rdatatype_t rdtype, covers;
5428         rbtdb_rdatatype_t negtype;
5429         dns_trust_t trust;
5430         int idx;
5431
5432         /*
5433          * Add an rdatasetheader_t to a node.
5434          */
5435
5436         /*
5437          * Caller must be holding the node lock.
5438          */
5439
5440         if ((options & DNS_DBADD_MERGE) != 0) {
5441                 REQUIRE(rbtversion != NULL);
5442                 merge = ISC_TRUE;
5443         } else
5444                 merge = ISC_FALSE;
5445
5446         if ((options & DNS_DBADD_FORCE) != 0)
5447                 trust = dns_trust_ultimate;
5448         else
5449                 trust = newheader->trust;
5450
5451         if (rbtversion != NULL && !loading) {
5452                 /*
5453                  * We always add a changed record, even if no changes end up
5454                  * being made to this node, because it's harmless and
5455                  * simplifies the code.
5456                  */
5457                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5458                 if (changed == NULL) {
5459                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5460                         return (ISC_R_NOMEMORY);
5461                 }
5462         }
5463
5464         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5465         topheader_prev = NULL;
5466
5467         negtype = 0;
5468         if (rbtversion == NULL && !newheader_nx) {
5469                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5470                 if (rdtype == 0) {
5471                         /*
5472                          * We're adding a negative cache entry.
5473                          */
5474                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5475                         if (covers == dns_rdatatype_any) {
5476                                 /*
5477                                  * We're adding an negative cache entry
5478                                  * which covers all types (NXDOMAIN,
5479                                  * NODATA(QTYPE=ANY)).
5480                                  *
5481                                  * We make all other data stale so that the
5482                                  * only rdataset that can be found at this
5483                                  * node is the negative cache entry.
5484                                  */
5485                                 for (topheader = rbtnode->data;
5486                                      topheader != NULL;
5487                                      topheader = topheader->next) {
5488                                         set_ttl(rbtdb, topheader, 0);
5489                                         topheader->attributes |=
5490                                                 RDATASET_ATTR_STALE;
5491                                 }
5492                                 rbtnode->dirty = 1;
5493                                 goto find_header;
5494                         }
5495                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5496                 } else {
5497                         /*
5498                          * We're adding something that isn't a
5499                          * negative cache entry.  Look for an extant
5500                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5501                          * cache entry.
5502                          */
5503                         for (topheader = rbtnode->data;
5504                              topheader != NULL;
5505                              topheader = topheader->next) {
5506                                 if (topheader->type ==
5507                                     RBTDB_RDATATYPE_NCACHEANY)
5508                                         break;
5509                         }
5510                         if (topheader != NULL && EXISTS(topheader) &&
5511                             topheader->rdh_ttl > now) {
5512                                 /*
5513                                  * Found one.
5514                                  */
5515                                 if (trust < topheader->trust) {
5516                                         /*
5517                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5518                                          * is more trusted.
5519                                          */
5520                                         free_rdataset(rbtdb,
5521                                                       rbtdb->common.mctx,
5522                                                       newheader);
5523                                         if (addedrdataset != NULL)
5524                                                 bind_rdataset(rbtdb, rbtnode,
5525                                                               topheader, now,
5526                                                               addedrdataset);
5527                                         return (DNS_R_UNCHANGED);
5528                                 }
5529                                 /*
5530                                  * The new rdataset is better.  Expire the
5531                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5532                                  */
5533                                 set_ttl(rbtdb, topheader, 0);
5534                                 topheader->attributes |= RDATASET_ATTR_STALE;
5535                                 rbtnode->dirty = 1;
5536                                 topheader = NULL;
5537                                 goto find_header;
5538                         }
5539                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5540                 }
5541         }
5542
5543         for (topheader = rbtnode->data;
5544              topheader != NULL;
5545              topheader = topheader->next) {
5546                 if (topheader->type == newheader->type ||
5547                     topheader->type == negtype)
5548                         break;
5549                 topheader_prev = topheader;
5550         }
5551
5552  find_header:
5553         /*
5554          * If header isn't NULL, we've found the right type.  There may be
5555          * IGNORE rdatasets between the top of the chain and the first real
5556          * data.  We skip over them.
5557          */
5558         header = topheader;
5559         while (header != NULL && IGNORE(header))
5560                 header = header->down;
5561         if (header != NULL) {
5562                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5563
5564                 /*
5565                  * Deleting an already non-existent rdataset has no effect.
5566                  */
5567                 if (header_nx && newheader_nx) {
5568                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5569                         return (DNS_R_UNCHANGED);
5570                 }
5571
5572                 /*
5573                  * Trying to add an rdataset with lower trust to a cache DB
5574                  * has no effect, provided that the cache data isn't stale.
5575                  */
5576                 if (rbtversion == NULL && trust < header->trust &&
5577                     (header->rdh_ttl > now || header_nx)) {
5578                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5579                         if (addedrdataset != NULL)
5580                                 bind_rdataset(rbtdb, rbtnode, header, now,
5581                                               addedrdataset);
5582                         return (DNS_R_UNCHANGED);
5583                 }
5584
5585                 /*
5586                  * Don't merge if a nonexistent rdataset is involved.
5587                  */
5588                 if (merge && (header_nx || newheader_nx))
5589                         merge = ISC_FALSE;
5590
5591                 /*
5592                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5593                  * that is the union of 'newheader' and 'header'.
5594                  */
5595                 if (merge) {
5596                         unsigned int flags = 0;
5597                         INSIST(rbtversion->serial >= header->serial);
5598                         merged = NULL;
5599                         result = ISC_R_SUCCESS;
5600
5601                         if ((options & DNS_DBADD_EXACT) != 0)
5602                                 flags |= DNS_RDATASLAB_EXACT;
5603                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5604                              newheader->rdh_ttl != header->rdh_ttl)
5605                                         result = DNS_R_NOTEXACT;
5606                         else if (newheader->rdh_ttl != header->rdh_ttl)
5607                                 flags |= DNS_RDATASLAB_FORCE;
5608                         if (result == ISC_R_SUCCESS)
5609                                 result = dns_rdataslab_merge(
5610                                              (unsigned char *)header,
5611                                              (unsigned char *)newheader,
5612                                              (unsigned int)(sizeof(*newheader)),
5613                                              rbtdb->common.mctx,
5614                                              rbtdb->common.rdclass,
5615                                              (dns_rdatatype_t)header->type,
5616                                              flags, &merged);
5617                         if (result == ISC_R_SUCCESS) {
5618                                 /*
5619                                  * If 'header' has the same serial number as
5620                                  * we do, we could clean it up now if we knew
5621                                  * that our caller had no references to it.
5622                                  * We don't know this, however, so we leave it
5623                                  * alone.  It will get cleaned up when
5624                                  * clean_zone_node() runs.
5625                                  */
5626                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5627                                               newheader);
5628                                 newheader = (rdatasetheader_t *)merged;
5629                                 if (loading && RESIGN(newheader) &&
5630                                     RESIGN(header) &&
5631                                     header->resign < newheader->resign)
5632                                         newheader->resign = header->resign;
5633                         } else {
5634                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5635                                               newheader);
5636                                 return (result);
5637                         }
5638                 }
5639                 /*
5640                  * Don't replace existing NS, A and AAAA RRsets
5641                  * in the cache if they are already exist.  This
5642                  * prevents named being locked to old servers.
5643                  * Don't lower trust of existing record if the
5644                  * update is forced.
5645                  */
5646                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5647                     header->type == dns_rdatatype_ns &&
5648                     !header_nx && !newheader_nx &&
5649                     header->trust >= newheader->trust &&
5650                     dns_rdataslab_equalx((unsigned char *)header,
5651                                          (unsigned char *)newheader,
5652                                          (unsigned int)(sizeof(*newheader)),
5653                                          rbtdb->common.rdclass,
5654                                          (dns_rdatatype_t)header->type)) {
5655                         /*
5656                          * Honour the new ttl if it is less than the
5657                          * older one.
5658                          */
5659                         if (header->rdh_ttl > newheader->rdh_ttl)
5660                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5661                         if (header->noqname == NULL &&
5662                             newheader->noqname != NULL) {
5663                                 header->noqname = newheader->noqname;
5664                                 newheader->noqname = NULL;
5665                         }
5666                         if (header->closest == NULL &&
5667                             newheader->closest != NULL) {
5668                                 header->closest = newheader->closest;
5669                                 newheader->closest = NULL;
5670                         }
5671                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5672                         if (addedrdataset != NULL)
5673                                 bind_rdataset(rbtdb, rbtnode, header, now,
5674                                               addedrdataset);
5675                         return (ISC_R_SUCCESS);
5676                 }
5677                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5678                     (header->type == dns_rdatatype_a ||
5679                      header->type == dns_rdatatype_aaaa) &&
5680                     !header_nx && !newheader_nx &&
5681                     header->trust >= newheader->trust &&
5682                     dns_rdataslab_equal((unsigned char *)header,
5683                                         (unsigned char *)newheader,
5684                                         (unsigned int)(sizeof(*newheader)))) {
5685                         /*
5686                          * Honour the new ttl if it is less than the
5687                          * older one.
5688                          */
5689                         if (header->rdh_ttl > newheader->rdh_ttl)
5690                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5691                         if (header->noqname == NULL &&
5692                             newheader->noqname != NULL) {
5693                                 header->noqname = newheader->noqname;
5694                                 newheader->noqname = NULL;
5695                         }
5696                         if (header->closest == NULL &&
5697                             newheader->closest != NULL) {
5698                                 header->closest = newheader->closest;
5699                                 newheader->closest = NULL;
5700                         }
5701                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5702                         if (addedrdataset != NULL)
5703                                 bind_rdataset(rbtdb, rbtnode, header, now,
5704                                               addedrdataset);
5705                         return (ISC_R_SUCCESS);
5706                 }
5707                 INSIST(rbtversion == NULL ||
5708                        rbtversion->serial >= topheader->serial);
5709                 if (topheader_prev != NULL)
5710                         topheader_prev->next = newheader;
5711                 else
5712                         rbtnode->data = newheader;
5713                 newheader->next = topheader->next;
5714                 if (loading) {
5715                         /*
5716                          * There are no other references to 'header' when
5717                          * loading, so we MAY clean up 'header' now.
5718                          * Since we don't generate changed records when
5719                          * loading, we MUST clean up 'header' now.
5720                          */
5721                         newheader->down = NULL;
5722                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5723                 } else {
5724                         newheader->down = topheader;
5725                         topheader->next = newheader;
5726                         rbtnode->dirty = 1;
5727                         if (changed != NULL)
5728                                 changed->dirty = ISC_TRUE;
5729                         if (rbtversion == NULL) {
5730                                 set_ttl(rbtdb, header, 0);
5731                                 header->attributes |= RDATASET_ATTR_STALE;
5732                         }
5733                         idx = newheader->node->locknum;
5734                         if (IS_CACHE(rbtdb)) {
5735                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5736                                                  newheader, lru_link);
5737                                 /*
5738                                  * XXXMLG We don't check the return value
5739                                  * here.  If it fails, we will not do TTL
5740                                  * based expiry on this node.  However, we
5741                                  * will do it on the LRU side, so memory
5742                                  * will not leak... for long.
5743                                  */
5744                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5745                         } else if (RESIGN(newheader))
5746                                 resign_insert(rbtdb, idx, newheader);
5747                 }
5748         } else {
5749                 /*
5750                  * No non-IGNORED rdatasets of the given type exist at
5751                  * this node.
5752                  */
5753
5754                 /*
5755                  * If we're trying to delete the type, don't bother.
5756                  */
5757                 if (newheader_nx) {
5758                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5759                         return (DNS_R_UNCHANGED);
5760                 }
5761
5762                 if (topheader != NULL) {
5763                         /*
5764                          * We have an list of rdatasets of the given type,
5765                          * but they're all marked IGNORE.  We simply insert
5766                          * the new rdataset at the head of the list.
5767                          *
5768                          * Ignored rdatasets cannot occur during loading, so
5769                          * we INSIST on it.
5770                          */
5771                         INSIST(!loading);
5772                         INSIST(rbtversion == NULL ||
5773                                rbtversion->serial >= topheader->serial);
5774                         if (topheader_prev != NULL)
5775                                 topheader_prev->next = newheader;
5776                         else
5777                                 rbtnode->data = newheader;
5778                         newheader->next = topheader->next;
5779                         newheader->down = topheader;
5780                         topheader->next = newheader;
5781                         rbtnode->dirty = 1;
5782                         if (changed != NULL)
5783                                 changed->dirty = ISC_TRUE;
5784                 } else {
5785                         /*
5786                          * No rdatasets of the given type exist at the node.
5787                          */
5788                         newheader->next = rbtnode->data;
5789                         newheader->down = NULL;
5790                         rbtnode->data = newheader;
5791                 }
5792                 idx = newheader->node->locknum;
5793                 if (IS_CACHE(rbtdb)) {
5794                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5795                                          newheader, lru_link);
5796                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5797                 } else if (RESIGN(newheader)) {
5798                         resign_insert(rbtdb, idx, newheader);
5799                 }
5800         }
5801
5802         /*
5803          * Check if the node now contains CNAME and other data.
5804          */
5805         if (rbtversion != NULL &&
5806             cname_and_other_data(rbtnode, rbtversion->serial))
5807                 return (DNS_R_CNAMEANDOTHER);
5808
5809         if (addedrdataset != NULL)
5810                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5811
5812         return (ISC_R_SUCCESS);
5813 }
5814
5815 static inline isc_boolean_t
5816 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5817                 rbtdb_rdatatype_t type)
5818 {
5819         if (IS_CACHE(rbtdb)) {
5820                 if (type == dns_rdatatype_dname)
5821                         return (ISC_TRUE);
5822                 else
5823                         return (ISC_FALSE);
5824         } else if (type == dns_rdatatype_dname ||
5825                    (type == dns_rdatatype_ns &&
5826                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5827                 return (ISC_TRUE);
5828         return (ISC_FALSE);
5829 }
5830
5831 static inline isc_result_t
5832 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5833            dns_rdataset_t *rdataset)
5834 {
5835         struct noqname *noqname;
5836         isc_mem_t *mctx = rbtdb->common.mctx;
5837         dns_name_t name;
5838         dns_rdataset_t neg, negsig;
5839         isc_result_t result;
5840         isc_region_t r;
5841
5842         dns_name_init(&name, NULL);
5843         dns_rdataset_init(&neg);
5844         dns_rdataset_init(&negsig);
5845
5846         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5847         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5848
5849         noqname = isc_mem_get(mctx, sizeof(*noqname));
5850         if (noqname == NULL) {
5851                 result = ISC_R_NOMEMORY;
5852                 goto cleanup;
5853         }
5854         dns_name_init(&noqname->name, NULL);
5855         noqname->neg = NULL;
5856         noqname->negsig = NULL;
5857         noqname->type = neg.type;
5858         result = dns_name_dup(&name, mctx, &noqname->name);
5859         if (result != ISC_R_SUCCESS)
5860                 goto cleanup;
5861         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5862         if (result != ISC_R_SUCCESS)
5863                 goto cleanup;
5864         noqname->neg = r.base;
5865         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5866         if (result != ISC_R_SUCCESS)
5867                 goto cleanup;
5868         noqname->negsig = r.base;
5869         dns_rdataset_disassociate(&neg);
5870         dns_rdataset_disassociate(&negsig);
5871         newheader->noqname = noqname;
5872         return (ISC_R_SUCCESS);
5873
5874 cleanup:
5875         dns_rdataset_disassociate(&neg);
5876         dns_rdataset_disassociate(&negsig);
5877         free_noqname(mctx, &noqname);
5878         return(result);
5879 }
5880
5881 static inline isc_result_t
5882 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5883            dns_rdataset_t *rdataset)
5884 {
5885         struct noqname *closest;
5886         isc_mem_t *mctx = rbtdb->common.mctx;
5887         dns_name_t name;
5888         dns_rdataset_t neg, negsig;
5889         isc_result_t result;
5890         isc_region_t r;
5891
5892         dns_name_init(&name, NULL);
5893         dns_rdataset_init(&neg);
5894         dns_rdataset_init(&negsig);
5895
5896         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5897         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5898
5899         closest = isc_mem_get(mctx, sizeof(*closest));
5900         if (closest == NULL) {
5901                 result = ISC_R_NOMEMORY;
5902                 goto cleanup;
5903         }
5904         dns_name_init(&closest->name, NULL);
5905         closest->neg = NULL;
5906         closest->negsig = NULL;
5907         closest->type = neg.type;
5908         result = dns_name_dup(&name, mctx, &closest->name);
5909         if (result != ISC_R_SUCCESS)
5910                 goto cleanup;
5911         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5912         if (result != ISC_R_SUCCESS)
5913                 goto cleanup;
5914         closest->neg = r.base;
5915         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5916         if (result != ISC_R_SUCCESS)
5917                 goto cleanup;
5918         closest->negsig = r.base;
5919         dns_rdataset_disassociate(&neg);
5920         dns_rdataset_disassociate(&negsig);
5921         newheader->closest = closest;
5922         return (ISC_R_SUCCESS);
5923
5924  cleanup:
5925         dns_rdataset_disassociate(&neg);
5926         dns_rdataset_disassociate(&negsig);
5927         free_noqname(mctx, &closest);
5928         return(result);
5929 }
5930
5931 static dns_dbmethods_t zone_methods;
5932
5933 static isc_result_t
5934 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5935             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5936             dns_rdataset_t *addedrdataset)
5937 {
5938         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5939         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5940         rbtdb_version_t *rbtversion = version;
5941         isc_region_t region;
5942         rdatasetheader_t *newheader;
5943         rdatasetheader_t *header;
5944         isc_result_t result;
5945         isc_boolean_t delegating;
5946         isc_boolean_t tree_locked = ISC_FALSE;
5947
5948         REQUIRE(VALID_RBTDB(rbtdb));
5949
5950         if (rbtdb->common.methods == &zone_methods)
5951                 REQUIRE(((rbtnode->nsec3 &&
5952                           (rdataset->type == dns_rdatatype_nsec3 ||
5953                            rdataset->covers == dns_rdatatype_nsec3)) ||
5954                          (!rbtnode->nsec3 &&
5955                            rdataset->type != dns_rdatatype_nsec3 &&
5956                            rdataset->covers != dns_rdatatype_nsec3)));
5957
5958         if (rbtversion == NULL) {
5959                 if (now == 0)
5960                         isc_stdtime_get(&now);
5961         } else
5962                 now = 0;
5963
5964         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5965                                             &region,
5966                                             sizeof(rdatasetheader_t));
5967         if (result != ISC_R_SUCCESS)
5968                 return (result);
5969
5970         newheader = (rdatasetheader_t *)region.base;
5971         init_rdataset(rbtdb, newheader);
5972         set_ttl(rbtdb, newheader, rdataset->ttl + now);
5973         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5974                                                 rdataset->covers);
5975         newheader->attributes = 0;
5976         newheader->noqname = NULL;
5977         newheader->closest = NULL;
5978         newheader->count = init_count++;
5979         newheader->trust = rdataset->trust;
5980         newheader->additional_auth = NULL;
5981         newheader->additional_glue = NULL;
5982         newheader->last_used = now;
5983         newheader->node = rbtnode;
5984         if (rbtversion != NULL) {
5985                 newheader->serial = rbtversion->serial;
5986                 now = 0;
5987
5988                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5989                         newheader->attributes |= RDATASET_ATTR_RESIGN;
5990                         newheader->resign = rdataset->resign;
5991                 } else
5992                         newheader->resign = 0;
5993         } else {
5994                 newheader->serial = 1;
5995                 newheader->resign = 0;
5996                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5997                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
5998                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
5999                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6000                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6001                         result = addnoqname(rbtdb, newheader, rdataset);
6002                         if (result != ISC_R_SUCCESS) {
6003                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6004                                               newheader);
6005                                 return (result);
6006                         }
6007                 }
6008                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6009                         result = addclosest(rbtdb, newheader, rdataset);
6010                         if (result != ISC_R_SUCCESS) {
6011                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6012                                               newheader);
6013                                 return (result);
6014                         }
6015                 }
6016         }
6017
6018         /*
6019          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6020          * just DNAME for the cache), then we need to set the callback bit
6021          * on the node.
6022          */
6023         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6024                 delegating = ISC_TRUE;
6025         else
6026                 delegating = ISC_FALSE;
6027
6028         /*
6029          * If we're adding a delegation type or the DB is a cache in an overmem
6030          * state, hold an exclusive lock on the tree.  In the latter case
6031          * the lock does not necessarily have to be acquired but it will help
6032          * purge stale entries more effectively.
6033          */
6034         if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6035                 tree_locked = ISC_TRUE;
6036                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6037         }
6038
6039         if (IS_CACHE(rbtdb) && rbtdb->overmem)
6040                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6041
6042         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6043                   isc_rwlocktype_write);
6044
6045         if (rbtdb->rrsetstats != NULL) {
6046                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6047                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6048         }
6049
6050         if (IS_CACHE(rbtdb)) {
6051                 if (tree_locked)
6052                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6053
6054                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6055                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6056                         expire_header(rbtdb, header, tree_locked);
6057
6058                 /*
6059                  * If we've been holding a write lock on the tree just for
6060                  * cleaning, we can release it now.  However, we still need the
6061                  * node lock.
6062                  */
6063                 if (tree_locked && !delegating) {
6064                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6065                         tree_locked = ISC_FALSE;
6066                 }
6067         }
6068
6069         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6070                      addedrdataset, now);
6071         if (result == ISC_R_SUCCESS && delegating)
6072                 rbtnode->find_callback = 1;
6073
6074         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6075                     isc_rwlocktype_write);
6076
6077         if (tree_locked)
6078                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6079
6080         /*
6081          * Update the zone's secure status.  If version is non-NULL
6082          * this is deferred until closeversion() is called.
6083          */
6084         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6085                 iszonesecure(db, version, rbtdb->origin_node);
6086
6087         return (result);
6088 }
6089
6090 static isc_result_t
6091 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6092                  dns_rdataset_t *rdataset, unsigned int options,
6093                  dns_rdataset_t *newrdataset)
6094 {
6095         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6096         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6097         rbtdb_version_t *rbtversion = version;
6098         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6099         unsigned char *subresult;
6100         isc_region_t region;
6101         isc_result_t result;
6102         rbtdb_changed_t *changed;
6103
6104         REQUIRE(VALID_RBTDB(rbtdb));
6105
6106         if (rbtdb->common.methods == &zone_methods)
6107                 REQUIRE(((rbtnode->nsec3 &&
6108                           (rdataset->type == dns_rdatatype_nsec3 ||
6109                            rdataset->covers == dns_rdatatype_nsec3)) ||
6110                          (!rbtnode->nsec3 &&
6111                            rdataset->type != dns_rdatatype_nsec3 &&
6112                            rdataset->covers != dns_rdatatype_nsec3)));
6113
6114         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6115                                             &region,
6116                                             sizeof(rdatasetheader_t));
6117         if (result != ISC_R_SUCCESS)
6118                 return (result);
6119         newheader = (rdatasetheader_t *)region.base;
6120         init_rdataset(rbtdb, newheader);
6121         set_ttl(rbtdb, newheader, rdataset->ttl);
6122         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6123                                                 rdataset->covers);
6124         newheader->attributes = 0;
6125         newheader->serial = rbtversion->serial;
6126         newheader->trust = 0;
6127         newheader->noqname = NULL;
6128         newheader->closest = NULL;
6129         newheader->count = init_count++;
6130         newheader->additional_auth = NULL;
6131         newheader->additional_glue = NULL;
6132         newheader->last_used = 0;
6133         newheader->node = rbtnode;
6134         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6135                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6136                 newheader->resign = rdataset->resign;
6137         } else
6138                 newheader->resign = 0;
6139
6140         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6141                   isc_rwlocktype_write);
6142
6143         changed = add_changed(rbtdb, rbtversion, rbtnode);
6144         if (changed == NULL) {
6145                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6146                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6147                             isc_rwlocktype_write);
6148                 return (ISC_R_NOMEMORY);
6149         }
6150
6151         topheader_prev = NULL;
6152         for (topheader = rbtnode->data;
6153              topheader != NULL;
6154              topheader = topheader->next) {
6155                 if (topheader->type == newheader->type)
6156                         break;
6157                 topheader_prev = topheader;
6158         }
6159         /*
6160          * If header isn't NULL, we've found the right type.  There may be
6161          * IGNORE rdatasets between the top of the chain and the first real
6162          * data.  We skip over them.
6163          */
6164         header = topheader;
6165         while (header != NULL && IGNORE(header))
6166                 header = header->down;
6167         if (header != NULL && EXISTS(header)) {
6168                 unsigned int flags = 0;
6169                 subresult = NULL;
6170                 result = ISC_R_SUCCESS;
6171                 if ((options & DNS_DBSUB_EXACT) != 0) {
6172                         flags |= DNS_RDATASLAB_EXACT;
6173                         if (newheader->rdh_ttl != header->rdh_ttl)
6174                                 result = DNS_R_NOTEXACT;
6175                 }
6176                 if (result == ISC_R_SUCCESS)
6177                         result = dns_rdataslab_subtract(
6178                                         (unsigned char *)header,
6179                                         (unsigned char *)newheader,
6180                                         (unsigned int)(sizeof(*newheader)),
6181                                         rbtdb->common.mctx,
6182                                         rbtdb->common.rdclass,
6183                                         (dns_rdatatype_t)header->type,
6184                                         flags, &subresult);
6185                 if (result == ISC_R_SUCCESS) {
6186                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6187                         newheader = (rdatasetheader_t *)subresult;
6188                         init_rdataset(rbtdb, newheader);
6189                         /*
6190                          * We have to set the serial since the rdataslab
6191                          * subtraction routine copies the reserved portion of
6192                          * header, not newheader.
6193                          */
6194                         newheader->serial = rbtversion->serial;
6195                         /*
6196                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6197                          * to additional info.  We need to clear these fields
6198                          * to avoid having duplicated references.
6199                          */
6200                         newheader->additional_auth = NULL;
6201                         newheader->additional_glue = NULL;
6202                 } else if (result == DNS_R_NXRRSET) {
6203                         /*
6204                          * This subtraction would remove all of the rdata;
6205                          * add a nonexistent header instead.
6206                          */
6207                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6208                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6209                         if (newheader == NULL) {
6210                                 result = ISC_R_NOMEMORY;
6211                                 goto unlock;
6212                         }
6213                         set_ttl(rbtdb, newheader, 0);
6214                         newheader->type = topheader->type;
6215                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6216                         newheader->trust = 0;
6217                         newheader->serial = rbtversion->serial;
6218                         newheader->noqname = NULL;
6219                         newheader->closest = NULL;
6220                         newheader->count = 0;
6221                         newheader->additional_auth = NULL;
6222                         newheader->additional_glue = NULL;
6223                         newheader->node = rbtnode;
6224                         newheader->resign = 0;
6225                         newheader->last_used = 0;
6226                 } else {
6227                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6228                         goto unlock;
6229                 }
6230
6231                 /*
6232                  * If we're here, we want to link newheader in front of
6233                  * topheader.
6234                  */
6235                 INSIST(rbtversion->serial >= topheader->serial);
6236                 if (topheader_prev != NULL)
6237                         topheader_prev->next = newheader;
6238                 else
6239                         rbtnode->data = newheader;
6240                 newheader->next = topheader->next;
6241                 newheader->down = topheader;
6242                 topheader->next = newheader;
6243                 rbtnode->dirty = 1;
6244                 changed->dirty = ISC_TRUE;
6245         } else {
6246                 /*
6247                  * The rdataset doesn't exist, so we don't need to do anything
6248                  * to satisfy the deletion request.
6249                  */
6250                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6251                 if ((options & DNS_DBSUB_EXACT) != 0)
6252                         result = DNS_R_NOTEXACT;
6253                 else
6254                         result = DNS_R_UNCHANGED;
6255         }
6256
6257         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6258                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6259
6260  unlock:
6261         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6262                     isc_rwlocktype_write);
6263
6264         /*
6265          * Update the zone's secure status.  If version is non-NULL
6266          * this is deferred until closeversion() is called.
6267          */
6268         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6269                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6270
6271         return (result);
6272 }
6273
6274 static isc_result_t
6275 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6276                dns_rdatatype_t type, dns_rdatatype_t covers)
6277 {
6278         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6279         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6280         rbtdb_version_t *rbtversion = version;
6281         isc_result_t result;
6282         rdatasetheader_t *newheader;
6283
6284         REQUIRE(VALID_RBTDB(rbtdb));
6285
6286         if (type == dns_rdatatype_any)
6287                 return (ISC_R_NOTIMPLEMENTED);
6288         if (type == dns_rdatatype_rrsig && covers == 0)
6289                 return (ISC_R_NOTIMPLEMENTED);
6290
6291         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6292         if (newheader == NULL)
6293                 return (ISC_R_NOMEMORY);
6294         set_ttl(rbtdb, newheader, 0);
6295         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6296         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6297         newheader->trust = 0;
6298         newheader->noqname = NULL;
6299         newheader->closest = NULL;
6300         newheader->additional_auth = NULL;
6301         newheader->additional_glue = NULL;
6302         if (rbtversion != NULL)
6303                 newheader->serial = rbtversion->serial;
6304         else
6305                 newheader->serial = 0;
6306         newheader->count = 0;
6307         newheader->last_used = 0;
6308         newheader->node = rbtnode;
6309
6310         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6311                   isc_rwlocktype_write);
6312
6313         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6314                      ISC_FALSE, NULL, 0);
6315
6316         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6317                     isc_rwlocktype_write);
6318
6319         /*
6320          * Update the zone's secure status.  If version is non-NULL
6321          * this is deferred until closeversion() is called.
6322          */
6323         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6324                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6325
6326         return (result);
6327 }
6328
6329 static isc_result_t
6330 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6331         rbtdb_load_t *loadctx = arg;
6332         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6333         dns_rbtnode_t *node;
6334         isc_result_t result;
6335         isc_region_t region;
6336         rdatasetheader_t *newheader;
6337
6338         /*
6339          * This routine does no node locking.  See comments in
6340          * 'load' below for more information on loading and
6341          * locking.
6342          */
6343
6344
6345         /*
6346          * SOA records are only allowed at top of zone.
6347          */
6348         if (rdataset->type == dns_rdatatype_soa &&
6349             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6350                 return (DNS_R_NOTZONETOP);
6351
6352         if (rdataset->type != dns_rdatatype_nsec3 &&
6353             rdataset->covers != dns_rdatatype_nsec3)
6354                 add_empty_wildcards(rbtdb, name);
6355
6356         if (dns_name_iswildcard(name)) {
6357                 /*
6358                  * NS record owners cannot legally be wild cards.
6359                  */
6360                 if (rdataset->type == dns_rdatatype_ns)
6361                         return (DNS_R_INVALIDNS);
6362                 /*
6363                  * NSEC3 record owners cannot legally be wild cards.
6364                  */
6365                 if (rdataset->type == dns_rdatatype_nsec3)
6366                         return (DNS_R_INVALIDNSEC3);
6367                 result = add_wildcard_magic(rbtdb, name);
6368                 if (result != ISC_R_SUCCESS)
6369                         return (result);
6370         }
6371
6372         node = NULL;
6373         if (rdataset->type == dns_rdatatype_nsec3 ||
6374             rdataset->covers == dns_rdatatype_nsec3) {
6375                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6376                 if (result == ISC_R_SUCCESS)
6377                         node->nsec3 = 1;
6378         } else {
6379                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6380                 if (result == ISC_R_SUCCESS)
6381                         node->nsec3 = 0;
6382         }
6383         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6384                 return (result);
6385         if (result != ISC_R_EXISTS) {
6386                 dns_name_t foundname;
6387                 dns_name_init(&foundname, NULL);
6388                 dns_rbt_namefromnode(node, &foundname);
6389 #ifdef DNS_RBT_USEHASH
6390                 node->locknum = node->hashval % rbtdb->node_lock_count;
6391 #else
6392                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6393                         rbtdb->node_lock_count;
6394 #endif
6395         }
6396
6397         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6398                                             &region,
6399                                             sizeof(rdatasetheader_t));
6400         if (result != ISC_R_SUCCESS)
6401                 return (result);
6402         newheader = (rdatasetheader_t *)region.base;
6403         init_rdataset(rbtdb, newheader);
6404         set_ttl(rbtdb, newheader,
6405                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6406         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6407                                                 rdataset->covers);
6408         newheader->attributes = 0;
6409         newheader->trust = rdataset->trust;
6410         newheader->serial = 1;
6411         newheader->noqname = NULL;
6412         newheader->closest = NULL;
6413         newheader->count = init_count++;
6414         newheader->additional_auth = NULL;
6415         newheader->additional_glue = NULL;
6416         newheader->last_used = 0;
6417         newheader->node = node;
6418         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6419                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6420                 newheader->resign = rdataset->resign;
6421         } else
6422                 newheader->resign = 0;
6423
6424         result = add(rbtdb, node, rbtdb->current_version, newheader,
6425                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6426         if (result == ISC_R_SUCCESS &&
6427             delegating_type(rbtdb, node, rdataset->type))
6428                 node->find_callback = 1;
6429         else if (result == DNS_R_UNCHANGED)
6430                 result = ISC_R_SUCCESS;
6431
6432         return (result);
6433 }
6434
6435 static isc_result_t
6436 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6437         rbtdb_load_t *loadctx;
6438         dns_rbtdb_t *rbtdb;
6439
6440         rbtdb = (dns_rbtdb_t *)db;
6441
6442         REQUIRE(VALID_RBTDB(rbtdb));
6443
6444         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6445         if (loadctx == NULL)
6446                 return (ISC_R_NOMEMORY);
6447
6448         loadctx->rbtdb = rbtdb;
6449         if (IS_CACHE(rbtdb))
6450                 isc_stdtime_get(&loadctx->now);
6451         else
6452                 loadctx->now = 0;
6453
6454         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6455
6456         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6457                 == 0);
6458         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6459
6460         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6461
6462         *addp = loading_addrdataset;
6463         *dbloadp = loadctx;
6464
6465         return (ISC_R_SUCCESS);
6466 }
6467
6468 static isc_result_t
6469 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6470         rbtdb_load_t *loadctx;
6471         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6472
6473         REQUIRE(VALID_RBTDB(rbtdb));
6474         REQUIRE(dbloadp != NULL);
6475         loadctx = *dbloadp;
6476         REQUIRE(loadctx->rbtdb == rbtdb);
6477
6478         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6479
6480         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6481         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6482
6483         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6484         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6485
6486         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6487
6488         /*
6489          * If there's a KEY rdataset at the zone origin containing a
6490          * zone key, we consider the zone secure.
6491          */
6492         if (! IS_CACHE(rbtdb))
6493                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6494
6495         *dbloadp = NULL;
6496
6497         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6498
6499         return (ISC_R_SUCCESS);
6500 }
6501
6502 static isc_result_t
6503 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6504      dns_masterformat_t masterformat) {
6505         dns_rbtdb_t *rbtdb;
6506
6507         rbtdb = (dns_rbtdb_t *)db;
6508
6509         REQUIRE(VALID_RBTDB(rbtdb));
6510
6511         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6512                                  &dns_master_style_default,
6513                                  filename, masterformat));
6514 }
6515
6516 static void
6517 delete_callback(void *data, void *arg) {
6518         dns_rbtdb_t *rbtdb = arg;
6519         rdatasetheader_t *current, *next;
6520
6521         for (current = data; current != NULL; current = next) {
6522                 next = current->next;
6523                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6524         }
6525 }
6526
6527 static isc_boolean_t
6528 issecure(dns_db_t *db) {
6529         dns_rbtdb_t *rbtdb;
6530         isc_boolean_t secure;
6531
6532         rbtdb = (dns_rbtdb_t *)db;
6533
6534         REQUIRE(VALID_RBTDB(rbtdb));
6535
6536         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6537         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6538         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6539
6540         return (secure);
6541 }
6542
6543 static isc_boolean_t
6544 isdnssec(dns_db_t *db) {
6545         dns_rbtdb_t *rbtdb;
6546         isc_boolean_t dnssec;
6547
6548         rbtdb = (dns_rbtdb_t *)db;
6549
6550         REQUIRE(VALID_RBTDB(rbtdb));
6551
6552         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6553         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6554         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6555
6556         return (dnssec);
6557 }
6558
6559 static unsigned int
6560 nodecount(dns_db_t *db) {
6561         dns_rbtdb_t *rbtdb;
6562         unsigned int count;
6563
6564         rbtdb = (dns_rbtdb_t *)db;
6565
6566         REQUIRE(VALID_RBTDB(rbtdb));
6567
6568         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6569         count = dns_rbt_nodecount(rbtdb->tree);
6570         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6571
6572         return (count);
6573 }
6574
6575 static void
6576 settask(dns_db_t *db, isc_task_t *task) {
6577         dns_rbtdb_t *rbtdb;
6578
6579         rbtdb = (dns_rbtdb_t *)db;
6580
6581         REQUIRE(VALID_RBTDB(rbtdb));
6582
6583         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6584         if (rbtdb->task != NULL)
6585                 isc_task_detach(&rbtdb->task);
6586         if (task != NULL)
6587                 isc_task_attach(task, &rbtdb->task);
6588         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6589 }
6590
6591 static isc_boolean_t
6592 ispersistent(dns_db_t *db) {
6593         UNUSED(db);
6594         return (ISC_FALSE);
6595 }
6596
6597 static isc_result_t
6598 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6599         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6600         dns_rbtnode_t *onode;
6601         isc_result_t result = ISC_R_SUCCESS;
6602
6603         REQUIRE(VALID_RBTDB(rbtdb));
6604         REQUIRE(nodep != NULL && *nodep == NULL);
6605
6606         /* Note that the access to origin_node doesn't require a DB lock */
6607         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6608         if (onode != NULL) {
6609                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6610                 new_reference(rbtdb, onode);
6611                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6612
6613                 *nodep = rbtdb->origin_node;
6614         } else {
6615                 INSIST(IS_CACHE(rbtdb));
6616                 result = ISC_R_NOTFOUND;
6617         }
6618
6619         return (result);
6620 }
6621
6622 static isc_result_t
6623 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6624                    isc_uint8_t *flags, isc_uint16_t *iterations,
6625                    unsigned char *salt, size_t *salt_length)
6626 {
6627         dns_rbtdb_t *rbtdb;
6628         isc_result_t result = ISC_R_NOTFOUND;
6629         rbtdb_version_t *rbtversion = version;
6630
6631         rbtdb = (dns_rbtdb_t *)db;
6632
6633         REQUIRE(VALID_RBTDB(rbtdb));
6634
6635         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6636
6637         if (rbtversion == NULL)
6638                 rbtversion = rbtdb->current_version;
6639
6640         if (rbtversion->havensec3) {
6641                 if (hash != NULL)
6642                         *hash = rbtversion->hash;
6643                 if (salt != NULL && salt_length != 0) {
6644                         REQUIRE(*salt_length > rbtversion->salt_length);
6645                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6646                 }
6647                 if (salt_length != NULL)
6648                         *salt_length = rbtversion->salt_length;
6649                 if (iterations != NULL)
6650                         *iterations = rbtversion->iterations;
6651                 if (flags != NULL)
6652                         *flags = rbtversion->flags;
6653                 result = ISC_R_SUCCESS;
6654         }
6655         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6656
6657         return (result);
6658 }
6659
6660 static isc_result_t
6661 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6662         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6663         isc_stdtime_t oldresign;
6664         isc_result_t result = ISC_R_SUCCESS;
6665         rdatasetheader_t *header;
6666
6667         REQUIRE(VALID_RBTDB(rbtdb));
6668         REQUIRE(!IS_CACHE(rbtdb));
6669         REQUIRE(rdataset != NULL);
6670
6671         header = rdataset->private3;
6672         header--;
6673
6674         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6675                   isc_rwlocktype_write);
6676
6677         oldresign = header->resign;
6678         header->resign = resign;
6679         if (header->heap_index != 0) {
6680                 INSIST(RESIGN(header));
6681                 if (resign == 0) {
6682                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6683                                         header->heap_index);
6684                         header->heap_index = 0;
6685                 } else if (resign < oldresign)
6686                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6687                                            header->heap_index);
6688                 else
6689                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6690                                            header->heap_index);
6691         } else if (resign && header->heap_index == 0) {
6692                 header->attributes |= RDATASET_ATTR_RESIGN;
6693                 result = resign_insert(rbtdb, header->node->locknum, header);
6694         }
6695         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6696                     isc_rwlocktype_write);
6697         return (result);
6698 }
6699
6700 static isc_result_t
6701 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6702                dns_name_t *foundname)
6703 {
6704         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6705         rdatasetheader_t *header = NULL, *this;
6706         unsigned int i;
6707         isc_result_t result = ISC_R_NOTFOUND;
6708
6709         REQUIRE(VALID_RBTDB(rbtdb));
6710
6711         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6712
6713         for (i = 0; i < rbtdb->node_lock_count; i++) {
6714                 this = isc_heap_element(rbtdb->heaps[i], 1);
6715                 if (this == NULL)
6716                         continue;
6717                 if (header == NULL)
6718                         header = this;
6719                 else if (isc_serial_lt(this->resign, header->resign))
6720                         header = this;
6721         }
6722
6723         if (header == NULL)
6724                 goto unlock;
6725
6726         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6727                   isc_rwlocktype_read);
6728
6729         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6730
6731         if (foundname != NULL)
6732                 dns_rbt_fullnamefromnode(header->node, foundname);
6733
6734         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6735                     isc_rwlocktype_read);
6736
6737         result = ISC_R_SUCCESS;
6738
6739  unlock:
6740         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6741
6742         return (result);
6743 }
6744
6745 static void
6746 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6747 {
6748         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6749         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6750         dns_rbtnode_t *node;
6751         rdatasetheader_t *header;
6752
6753         REQUIRE(VALID_RBTDB(rbtdb));
6754         REQUIRE(rdataset != NULL);
6755         REQUIRE(rbtdb->future_version == rbtversion);
6756         REQUIRE(rbtversion->writer);
6757
6758         node = rdataset->private2;
6759         header = rdataset->private3;
6760         header--;
6761
6762         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6763         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6764                   isc_rwlocktype_write);
6765         /*
6766          * Delete from heap and save to re-signed list so that it can
6767          * be restored if we backout of this change.
6768          */
6769         new_reference(rbtdb, node);
6770         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6771         header->heap_index = 0;
6772         ISC_LIST_APPEND(rbtversion->resigned_list, header, lru_link);
6773
6774         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6775                     isc_rwlocktype_write);
6776         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6777 }
6778
6779 static dns_stats_t *
6780 getrrsetstats(dns_db_t *db) {
6781         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6782
6783         REQUIRE(VALID_RBTDB(rbtdb));
6784         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6785
6786         return (rbtdb->rrsetstats);
6787 }
6788
6789 static dns_dbmethods_t zone_methods = {
6790         attach,
6791         detach,
6792         beginload,
6793         endload,
6794         dump,
6795         currentversion,
6796         newversion,
6797         attachversion,
6798         closeversion,
6799         findnode,
6800         zone_find,
6801         zone_findzonecut,
6802         attachnode,
6803         detachnode,
6804         expirenode,
6805         printnode,
6806         createiterator,
6807         zone_findrdataset,
6808         allrdatasets,
6809         addrdataset,
6810         subtractrdataset,
6811         deleterdataset,
6812         issecure,
6813         nodecount,
6814         ispersistent,
6815         overmem,
6816         settask,
6817         getoriginnode,
6818         NULL,
6819         getnsec3parameters,
6820         findnsec3node,
6821         setsigningtime,
6822         getsigningtime,
6823         resigned,
6824         isdnssec,
6825         NULL
6826 };
6827
6828 static dns_dbmethods_t cache_methods = {
6829         attach,
6830         detach,
6831         beginload,
6832         endload,
6833         dump,
6834         currentversion,
6835         newversion,
6836         attachversion,
6837         closeversion,
6838         findnode,
6839         cache_find,
6840         cache_findzonecut,
6841         attachnode,
6842         detachnode,
6843         expirenode,
6844         printnode,
6845         createiterator,
6846         cache_findrdataset,
6847         allrdatasets,
6848         addrdataset,
6849         subtractrdataset,
6850         deleterdataset,
6851         issecure,
6852         nodecount,
6853         ispersistent,
6854         overmem,
6855         settask,
6856         getoriginnode,
6857         NULL,
6858         NULL,
6859         NULL,
6860         NULL,
6861         NULL,
6862         NULL,
6863         isdnssec,
6864         getrrsetstats
6865 };
6866
6867 isc_result_t
6868 #ifdef DNS_RBTDB_VERSION64
6869 dns_rbtdb64_create
6870 #else
6871 dns_rbtdb_create
6872 #endif
6873                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6874                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6875                  void *driverarg, dns_db_t **dbp)
6876 {
6877         dns_rbtdb_t *rbtdb;
6878         isc_result_t result;
6879         int i;
6880         dns_name_t name;
6881         isc_boolean_t (*sooner)(void *, void *);
6882
6883         /* Keep the compiler happy. */
6884         UNUSED(argc);
6885         UNUSED(argv);
6886         UNUSED(driverarg);
6887
6888         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6889         if (rbtdb == NULL)
6890                 return (ISC_R_NOMEMORY);
6891
6892         memset(rbtdb, '\0', sizeof(*rbtdb));
6893         dns_name_init(&rbtdb->common.origin, NULL);
6894         rbtdb->common.attributes = 0;
6895         if (type == dns_dbtype_cache) {
6896                 rbtdb->common.methods = &cache_methods;
6897                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6898         } else if (type == dns_dbtype_stub) {
6899                 rbtdb->common.methods = &zone_methods;
6900                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6901         } else
6902                 rbtdb->common.methods = &zone_methods;
6903         rbtdb->common.rdclass = rdclass;
6904         rbtdb->common.mctx = NULL;
6905
6906         result = RBTDB_INITLOCK(&rbtdb->lock);
6907         if (result != ISC_R_SUCCESS)
6908                 goto cleanup_rbtdb;
6909
6910         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6911         if (result != ISC_R_SUCCESS)
6912                 goto cleanup_lock;
6913
6914         /*
6915          * Initialize node_lock_count in a generic way to support future
6916          * extension which allows the user to specify this value on creation.
6917          * Note that when specified for a cache DB it must be larger than 1
6918          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6919          */
6920         if (rbtdb->node_lock_count == 0) {
6921                 if (IS_CACHE(rbtdb))
6922                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6923                 else
6924                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6925         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6926                 result = ISC_R_RANGE;
6927                 goto cleanup_tree_lock;
6928         }
6929         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6930         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6931                                         sizeof(rbtdb_nodelock_t));
6932         if (rbtdb->node_locks == NULL) {
6933                 result = ISC_R_NOMEMORY;
6934                 goto cleanup_tree_lock;
6935         }
6936
6937         rbtdb->rrsetstats = NULL;
6938         if (IS_CACHE(rbtdb)) {
6939                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6940                 if (result != ISC_R_SUCCESS)
6941                         goto cleanup_node_locks;
6942                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6943                                                sizeof(rdatasetheaderlist_t));
6944                 if (rbtdb->rdatasets == NULL) {
6945                         result = ISC_R_NOMEMORY;
6946                         goto cleanup_rrsetstats;
6947                 }
6948                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6949                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
6950         } else
6951                 rbtdb->rdatasets = NULL;
6952
6953         /*
6954          * Create the heaps.
6955          */
6956         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6957                                    sizeof(isc_heap_t *));
6958         if (rbtdb->heaps == NULL) {
6959                 result = ISC_R_NOMEMORY;
6960                 goto cleanup_rdatasets;
6961         }
6962         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6963                 rbtdb->heaps[i] = NULL;
6964         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6965         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6966                 result = isc_heap_create(mctx, sooner, set_index, 0,
6967                                          &rbtdb->heaps[i]);
6968                 if (result != ISC_R_SUCCESS)
6969                         goto cleanup_heaps;
6970         }
6971
6972         /*
6973          * Create deadnode lists.
6974          */
6975         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6976                                        sizeof(rbtnodelist_t));
6977         if (rbtdb->deadnodes == NULL) {
6978                 result = ISC_R_NOMEMORY;
6979                 goto cleanup_heaps;
6980         }
6981         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6982                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6983
6984         rbtdb->active = rbtdb->node_lock_count;
6985
6986         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
6987                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
6988                 if (result == ISC_R_SUCCESS) {
6989                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
6990                         if (result != ISC_R_SUCCESS)
6991                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6992                 }
6993                 if (result != ISC_R_SUCCESS) {
6994                         while (i-- > 0) {
6995                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6996                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
6997                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
6998                         }
6999                         goto cleanup_deadnodes;
7000                 }
7001                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7002         }
7003
7004         /*
7005          * Attach to the mctx.  The database will persist so long as there
7006          * are references to it, and attaching to the mctx ensures that our
7007          * mctx won't disappear out from under us.
7008          */
7009         isc_mem_attach(mctx, &rbtdb->common.mctx);
7010
7011         /*
7012          * Must be initialized before free_rbtdb() is called.
7013          */
7014         isc_ondestroy_init(&rbtdb->common.ondest);
7015
7016         /*
7017          * Make a copy of the origin name.
7018          */
7019         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7020         if (result != ISC_R_SUCCESS) {
7021                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7022                 return (result);
7023         }
7024
7025         /*
7026          * Make the Red-Black Trees.
7027          */
7028         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7029         if (result != ISC_R_SUCCESS) {
7030                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7031                 return (result);
7032         }
7033
7034         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7035         if (result != ISC_R_SUCCESS) {
7036                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7037                 return (result);
7038         }
7039
7040         /*
7041          * In order to set the node callback bit correctly in zone databases,
7042          * we need to know if the node has the origin name of the zone.
7043          * In loading_addrdataset() we could simply compare the new name
7044          * to the origin name, but this is expensive.  Also, we don't know the
7045          * node name in addrdataset(), so we need another way of knowing the
7046          * zone's top.
7047          *
7048          * We now explicitly create a node for the zone's origin, and then
7049          * we simply remember the node's address.  This is safe, because
7050          * the top-of-zone node can never be deleted, nor can its address
7051          * change.
7052          */
7053         if (!IS_CACHE(rbtdb)) {
7054                 rbtdb->origin_node = NULL;
7055                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7056                                          &rbtdb->origin_node);
7057                 if (result != ISC_R_SUCCESS) {
7058                         INSIST(result != ISC_R_EXISTS);
7059                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7060                         return (result);
7061                 }
7062                 rbtdb->origin_node->nsec3 = 0;
7063                 /*
7064                  * We need to give the origin node the right locknum.
7065                  */
7066                 dns_name_init(&name, NULL);
7067                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7068 #ifdef DNS_RBT_USEHASH
7069                 rbtdb->origin_node->locknum =
7070                         rbtdb->origin_node->hashval %
7071                         rbtdb->node_lock_count;
7072 #else
7073                 rbtdb->origin_node->locknum =
7074                         dns_name_hash(&name, ISC_TRUE) %
7075                         rbtdb->node_lock_count;
7076 #endif
7077         }
7078
7079         /*
7080          * Misc. Initialization.
7081          */
7082         result = isc_refcount_init(&rbtdb->references, 1);
7083         if (result != ISC_R_SUCCESS) {
7084                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7085                 return (result);
7086         }
7087         rbtdb->attributes = 0;
7088         rbtdb->overmem = ISC_FALSE;
7089         rbtdb->task = NULL;
7090
7091         /*
7092          * Version Initialization.
7093          */
7094         rbtdb->current_serial = 1;
7095         rbtdb->least_serial = 1;
7096         rbtdb->next_serial = 2;
7097         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7098         if (rbtdb->current_version == NULL) {
7099                 isc_refcount_decrement(&rbtdb->references, NULL);
7100                 isc_refcount_destroy(&rbtdb->references);
7101                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7102                 return (ISC_R_NOMEMORY);
7103         }
7104         rbtdb->current_version->secure = dns_db_insecure;
7105         rbtdb->current_version->havensec3 = ISC_FALSE;
7106         rbtdb->current_version->flags = 0;
7107         rbtdb->current_version->iterations = 0;
7108         rbtdb->current_version->hash = 0;
7109         rbtdb->current_version->salt_length = 0;
7110         memset(rbtdb->current_version->salt, 0,
7111                sizeof(rbtdb->current_version->salt));
7112         rbtdb->future_version = NULL;
7113         ISC_LIST_INIT(rbtdb->open_versions);
7114         /*
7115          * Keep the current version in the open list so that list operation
7116          * won't happen in normal lookup operations.
7117          */
7118         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7119
7120         rbtdb->common.magic = DNS_DB_MAGIC;
7121         rbtdb->common.impmagic = RBTDB_MAGIC;
7122
7123         *dbp = (dns_db_t *)rbtdb;
7124
7125         return (ISC_R_SUCCESS);
7126
7127  cleanup_deadnodes:
7128         isc_mem_put(mctx, rbtdb->deadnodes,
7129                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7130
7131  cleanup_heaps:
7132         if (rbtdb->heaps != NULL) {
7133                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7134                         if (rbtdb->heaps[i] != NULL)
7135                                 isc_heap_destroy(&rbtdb->heaps[i]);
7136                 isc_mem_put(mctx, rbtdb->heaps,
7137                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7138         }
7139
7140  cleanup_rdatasets:
7141         if (rbtdb->rdatasets != NULL)
7142                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7143                             sizeof(rdatasetheaderlist_t));
7144  cleanup_rrsetstats:
7145         if (rbtdb->rrsetstats != NULL)
7146                 dns_stats_detach(&rbtdb->rrsetstats);
7147
7148  cleanup_node_locks:
7149         isc_mem_put(mctx, rbtdb->node_locks,
7150                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7151
7152  cleanup_tree_lock:
7153         isc_rwlock_destroy(&rbtdb->tree_lock);
7154
7155  cleanup_lock:
7156         RBTDB_DESTROYLOCK(&rbtdb->lock);
7157
7158  cleanup_rbtdb:
7159         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7160         return (result);
7161 }
7162
7163
7164 /*
7165  * Slabbed Rdataset Methods
7166  */
7167
7168 static void
7169 rdataset_disassociate(dns_rdataset_t *rdataset) {
7170         dns_db_t *db = rdataset->private1;
7171         dns_dbnode_t *node = rdataset->private2;
7172
7173         detachnode(db, &node);
7174 }
7175
7176 static isc_result_t
7177 rdataset_first(dns_rdataset_t *rdataset) {
7178         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7179         unsigned int count;
7180
7181         count = raw[0] * 256 + raw[1];
7182         if (count == 0) {
7183                 rdataset->private5 = NULL;
7184                 return (ISC_R_NOMORE);
7185         }
7186
7187 #if DNS_RDATASET_FIXED
7188         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7189                 raw += 2 + (4 * count);
7190         else
7191 #endif
7192                 raw += 2;
7193
7194         /*
7195          * The privateuint4 field is the number of rdata beyond the
7196          * cursor position, so we decrement the total count by one
7197          * before storing it.
7198          *
7199          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7200          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7201          * to the first entry in the offset table.
7202          */
7203         count--;
7204         rdataset->privateuint4 = count;
7205         rdataset->private5 = raw;
7206
7207         return (ISC_R_SUCCESS);
7208 }
7209
7210 static isc_result_t
7211 rdataset_next(dns_rdataset_t *rdataset) {
7212         unsigned int count;
7213         unsigned int length;
7214         unsigned char *raw;     /* RDATASLAB */
7215
7216         count = rdataset->privateuint4;
7217         if (count == 0)
7218                 return (ISC_R_NOMORE);
7219         count--;
7220         rdataset->privateuint4 = count;
7221
7222         /*
7223          * Skip forward one record (length + 4) or one offset (4).
7224          */
7225         raw = rdataset->private5;
7226 #if DNS_RDATASET_FIXED
7227         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7228 #endif
7229                 length = raw[0] * 256 + raw[1];
7230                 raw += length;
7231 #if DNS_RDATASET_FIXED
7232         }
7233         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7234 #else
7235         rdataset->private5 = raw + 2;           /* length(2) */
7236 #endif
7237
7238         return (ISC_R_SUCCESS);
7239 }
7240
7241 static void
7242 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7243         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7244 #if DNS_RDATASET_FIXED
7245         unsigned int offset;
7246 #endif
7247         unsigned int length;
7248         isc_region_t r;
7249         unsigned int flags = 0;
7250
7251         REQUIRE(raw != NULL);
7252
7253         /*
7254          * Find the start of the record if not already in private5
7255          * then skip the length and order fields.
7256          */
7257 #if DNS_RDATASET_FIXED
7258         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7259                 offset = (raw[0] << 24) + (raw[1] << 16) +
7260                          (raw[2] << 8) + raw[3];
7261                 raw = rdataset->private3;
7262                 raw += offset;
7263         }
7264 #endif
7265         length = raw[0] * 256 + raw[1];
7266 #if DNS_RDATASET_FIXED
7267         raw += 4;
7268 #else
7269         raw += 2;
7270 #endif
7271         if (rdataset->type == dns_rdatatype_rrsig) {
7272                 if (*raw & DNS_RDATASLAB_OFFLINE)
7273                         flags |= DNS_RDATA_OFFLINE;
7274                 length--;
7275                 raw++;
7276         }
7277         r.length = length;
7278         r.base = raw;
7279         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7280         rdata->flags |= flags;
7281 }
7282
7283 static void
7284 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7285         dns_db_t *db = source->private1;
7286         dns_dbnode_t *node = source->private2;
7287         dns_dbnode_t *cloned_node = NULL;
7288
7289         attachnode(db, node, &cloned_node);
7290         *target = *source;
7291
7292         /*
7293          * Reset iterator state.
7294          */
7295         target->privateuint4 = 0;
7296         target->private5 = NULL;
7297 }
7298
7299 static unsigned int
7300 rdataset_count(dns_rdataset_t *rdataset) {
7301         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7302         unsigned int count;
7303
7304         count = raw[0] * 256 + raw[1];
7305
7306         return (count);
7307 }
7308
7309 static isc_result_t
7310 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7311                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7312 {
7313         dns_db_t *db = rdataset->private1;
7314         dns_dbnode_t *node = rdataset->private2;
7315         dns_dbnode_t *cloned_node;
7316         struct noqname *noqname = rdataset->private6;
7317
7318         cloned_node = NULL;
7319         attachnode(db, node, &cloned_node);
7320         nsec->methods = &rdataset_methods;
7321         nsec->rdclass = db->rdclass;
7322         nsec->type = noqname->type;
7323         nsec->covers = 0;
7324         nsec->ttl = rdataset->ttl;
7325         nsec->trust = rdataset->trust;
7326         nsec->private1 = rdataset->private1;
7327         nsec->private2 = rdataset->private2;
7328         nsec->private3 = noqname->neg;
7329         nsec->privateuint4 = 0;
7330         nsec->private5 = NULL;
7331         nsec->private6 = NULL;
7332         nsec->private7 = NULL;
7333
7334         cloned_node = NULL;
7335         attachnode(db, node, &cloned_node);
7336         nsecsig->methods = &rdataset_methods;
7337         nsecsig->rdclass = db->rdclass;
7338         nsecsig->type = dns_rdatatype_rrsig;
7339         nsecsig->covers = noqname->type;
7340         nsecsig->ttl = rdataset->ttl;
7341         nsecsig->trust = rdataset->trust;
7342         nsecsig->private1 = rdataset->private1;
7343         nsecsig->private2 = rdataset->private2;
7344         nsecsig->private3 = noqname->negsig;
7345         nsecsig->privateuint4 = 0;
7346         nsecsig->private5 = NULL;
7347         nsec->private6 = NULL;
7348         nsec->private7 = NULL;
7349
7350         dns_name_clone(&noqname->name, name);
7351
7352         return (ISC_R_SUCCESS);
7353 }
7354
7355 static isc_result_t
7356 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7357                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7358 {
7359         dns_db_t *db = rdataset->private1;
7360         dns_dbnode_t *node = rdataset->private2;
7361         dns_dbnode_t *cloned_node;
7362         struct noqname *closest = rdataset->private7;
7363
7364         cloned_node = NULL;
7365         attachnode(db, node, &cloned_node);
7366         nsec->methods = &rdataset_methods;
7367         nsec->rdclass = db->rdclass;
7368         nsec->type = closest->type;
7369         nsec->covers = 0;
7370         nsec->ttl = rdataset->ttl;
7371         nsec->trust = rdataset->trust;
7372         nsec->private1 = rdataset->private1;
7373         nsec->private2 = rdataset->private2;
7374         nsec->private3 = closest->neg;
7375         nsec->privateuint4 = 0;
7376         nsec->private5 = NULL;
7377         nsec->private6 = NULL;
7378         nsec->private7 = NULL;
7379
7380         cloned_node = NULL;
7381         attachnode(db, node, &cloned_node);
7382         nsecsig->methods = &rdataset_methods;
7383         nsecsig->rdclass = db->rdclass;
7384         nsecsig->type = dns_rdatatype_rrsig;
7385         nsecsig->covers = closest->type;
7386         nsecsig->ttl = rdataset->ttl;
7387         nsecsig->trust = rdataset->trust;
7388         nsecsig->private1 = rdataset->private1;
7389         nsecsig->private2 = rdataset->private2;
7390         nsecsig->private3 = closest->negsig;
7391         nsecsig->privateuint4 = 0;
7392         nsecsig->private5 = NULL;
7393         nsec->private6 = NULL;
7394         nsec->private7 = NULL;
7395
7396         dns_name_clone(&closest->name, name);
7397
7398         return (ISC_R_SUCCESS);
7399 }
7400
7401 /*
7402  * Rdataset Iterator Methods
7403  */
7404
7405 static void
7406 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7407         rbtdb_rdatasetiter_t *rbtiterator;
7408
7409         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7410
7411         if (rbtiterator->common.version != NULL)
7412                 closeversion(rbtiterator->common.db,
7413                              &rbtiterator->common.version, ISC_FALSE);
7414         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7415         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7416                     sizeof(*rbtiterator));
7417
7418         *iteratorp = NULL;
7419 }
7420
7421 static isc_result_t
7422 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7423         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7424         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7425         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7426         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7427         rdatasetheader_t *header, *top_next;
7428         rbtdb_serial_t serial;
7429         isc_stdtime_t now;
7430
7431         if (IS_CACHE(rbtdb)) {
7432                 serial = 1;
7433                 now = rbtiterator->common.now;
7434         } else {
7435                 serial = rbtversion->serial;
7436                 now = 0;
7437         }
7438
7439         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7440                   isc_rwlocktype_read);
7441
7442         for (header = rbtnode->data; header != NULL; header = top_next) {
7443                 top_next = header->next;
7444                 do {
7445                         if (header->serial <= serial && !IGNORE(header)) {
7446                                 /*
7447                                  * Is this a "this rdataset doesn't exist"
7448                                  * record?  Or is it too old in the cache?
7449                                  *
7450                                  * Note: unlike everywhere else, we
7451                                  * check for now > header->rdh_ttl instead
7452                                  * of now >= header->rdh_ttl.  This allows
7453                                  * ANY and RRSIG queries for 0 TTL
7454                                  * rdatasets to work.
7455                                  */
7456                                 if (NONEXISTENT(header) ||
7457                                     (now != 0 && now > header->rdh_ttl))
7458                                         header = NULL;
7459                                 break;
7460                         } else
7461                                 header = header->down;
7462                 } while (header != NULL);
7463                 if (header != NULL)
7464                         break;
7465         }
7466
7467         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7468                     isc_rwlocktype_read);
7469
7470         rbtiterator->current = header;
7471
7472         if (header == NULL)
7473                 return (ISC_R_NOMORE);
7474
7475         return (ISC_R_SUCCESS);
7476 }
7477
7478 static isc_result_t
7479 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7480         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7481         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7482         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7483         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7484         rdatasetheader_t *header, *top_next;
7485         rbtdb_serial_t serial;
7486         isc_stdtime_t now;
7487         rbtdb_rdatatype_t type, negtype;
7488         dns_rdatatype_t rdtype, covers;
7489
7490         header = rbtiterator->current;
7491         if (header == NULL)
7492                 return (ISC_R_NOMORE);
7493
7494         if (IS_CACHE(rbtdb)) {
7495                 serial = 1;
7496                 now = rbtiterator->common.now;
7497         } else {
7498                 serial = rbtversion->serial;
7499                 now = 0;
7500         }
7501
7502         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7503                   isc_rwlocktype_read);
7504
7505         type = header->type;
7506         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7507         if (rdtype == 0) {
7508                 covers = RBTDB_RDATATYPE_EXT(header->type);
7509                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7510         } else
7511                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7512         for (header = header->next; header != NULL; header = top_next) {
7513                 top_next = header->next;
7514                 /*
7515                  * If not walking back up the down list.
7516                  */
7517                 if (header->type != type && header->type != negtype) {
7518                         do {
7519                                 if (header->serial <= serial &&
7520                                     !IGNORE(header)) {
7521                                         /*
7522                                          * Is this a "this rdataset doesn't
7523                                          * exist" record?
7524                                          *
7525                                          * Note: unlike everywhere else, we
7526                                          * check for now > header->ttl instead
7527                                          * of now >= header->ttl.  This allows
7528                                          * ANY and RRSIG queries for 0 TTL
7529                                          * rdatasets to work.
7530                                          */
7531                                         if ((header->attributes &
7532                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7533                                             (now != 0 && now > header->rdh_ttl))
7534                                                 header = NULL;
7535                                         break;
7536                                 } else
7537                                         header = header->down;
7538                         } while (header != NULL);
7539                         if (header != NULL)
7540                                 break;
7541                 }
7542         }
7543
7544         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7545                     isc_rwlocktype_read);
7546
7547         rbtiterator->current = header;
7548
7549         if (header == NULL)
7550                 return (ISC_R_NOMORE);
7551
7552         return (ISC_R_SUCCESS);
7553 }
7554
7555 static void
7556 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7557         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7558         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7559         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7560         rdatasetheader_t *header;
7561
7562         header = rbtiterator->current;
7563         REQUIRE(header != NULL);
7564
7565         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7566                   isc_rwlocktype_read);
7567
7568         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7569                       rdataset);
7570
7571         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7572                     isc_rwlocktype_read);
7573 }
7574
7575
7576 /*
7577  * Database Iterator Methods
7578  */
7579
7580 static inline void
7581 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7582         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7583         dns_rbtnode_t *node = rbtdbiter->node;
7584
7585         if (node == NULL)
7586                 return;
7587
7588         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7589         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7590 }
7591
7592 static inline void
7593 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7594         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7595         dns_rbtnode_t *node = rbtdbiter->node;
7596         nodelock_t *lock;
7597
7598         if (node == NULL)
7599                 return;
7600
7601         lock = &rbtdb->node_locks[node->locknum].lock;
7602         NODE_LOCK(lock, isc_rwlocktype_read);
7603         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7604                             rbtdbiter->tree_locked, ISC_FALSE);
7605         NODE_UNLOCK(lock, isc_rwlocktype_read);
7606
7607         rbtdbiter->node = NULL;
7608 }
7609
7610 static void
7611 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7612         dns_rbtnode_t *node;
7613         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7614         isc_boolean_t was_read_locked = ISC_FALSE;
7615         nodelock_t *lock;
7616         int i;
7617
7618         if (rbtdbiter->delete != 0) {
7619                 /*
7620                  * Note that "%d node of %d in tree" can report things like
7621                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7622                  * That some nodes appear on the deletions list more than
7623                  * once.  Only the last occurence will actually be deleted.
7624                  */
7625                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7626                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7627                               "flush_deletions: %d nodes of %d in tree",
7628                               rbtdbiter->delete,
7629                               dns_rbt_nodecount(rbtdb->tree));
7630
7631                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7632                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7633                         was_read_locked = ISC_TRUE;
7634                 }
7635                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7636                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7637
7638                 for (i = 0; i < rbtdbiter->delete; i++) {
7639                         node = rbtdbiter->deletions[i];
7640                         lock = &rbtdb->node_locks[node->locknum].lock;
7641
7642                         NODE_LOCK(lock, isc_rwlocktype_read);
7643                         decrement_reference(rbtdb, node, 0,
7644                                             isc_rwlocktype_read,
7645                                             rbtdbiter->tree_locked, ISC_FALSE);
7646                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7647                 }
7648
7649                 rbtdbiter->delete = 0;
7650
7651                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7652                 if (was_read_locked) {
7653                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7654                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7655
7656                 } else {
7657                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7658                 }
7659         }
7660 }
7661
7662 static inline void
7663 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7664         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7665
7666         REQUIRE(rbtdbiter->paused);
7667         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7668
7669         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7670         rbtdbiter->tree_locked = isc_rwlocktype_read;
7671
7672         rbtdbiter->paused = ISC_FALSE;
7673 }
7674
7675 static void
7676 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7677         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7678         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7679         dns_db_t *db = NULL;
7680
7681         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7682                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7683                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7684         } else
7685                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7686
7687         dereference_iter_node(rbtdbiter);
7688
7689         flush_deletions(rbtdbiter);
7690
7691         dns_db_attach(rbtdbiter->common.db, &db);
7692         dns_db_detach(&rbtdbiter->common.db);
7693
7694         dns_rbtnodechain_reset(&rbtdbiter->chain);
7695         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7696         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7697         dns_db_detach(&db);
7698
7699         *iteratorp = NULL;
7700 }
7701
7702 static isc_result_t
7703 dbiterator_first(dns_dbiterator_t *iterator) {
7704         isc_result_t result;
7705         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7706         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7707         dns_name_t *name, *origin;
7708
7709         if (rbtdbiter->result != ISC_R_SUCCESS &&
7710             rbtdbiter->result != ISC_R_NOMORE)
7711                 return (rbtdbiter->result);
7712
7713         if (rbtdbiter->paused)
7714                 resume_iteration(rbtdbiter);
7715
7716         dereference_iter_node(rbtdbiter);
7717
7718         name = dns_fixedname_name(&rbtdbiter->name);
7719         origin = dns_fixedname_name(&rbtdbiter->origin);
7720         dns_rbtnodechain_reset(&rbtdbiter->chain);
7721         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7722
7723         if (rbtdbiter->nsec3only) {
7724                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7725                 result = dns_rbtnodechain_first(rbtdbiter->current,
7726                                                 rbtdb->nsec3, name, origin);
7727         } else {
7728                 rbtdbiter->current = &rbtdbiter->chain;
7729                 result = dns_rbtnodechain_first(rbtdbiter->current,
7730                                                 rbtdb->tree, name, origin);
7731                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7732                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7733                         result = dns_rbtnodechain_first(rbtdbiter->current,
7734                                                         rbtdb->nsec3, name,
7735                                                         origin);
7736                 }
7737         }
7738         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7739                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7740                                                   NULL, &rbtdbiter->node);
7741                 if (result == ISC_R_SUCCESS) {
7742                         rbtdbiter->new_origin = ISC_TRUE;
7743                         reference_iter_node(rbtdbiter);
7744                 }
7745         } else {
7746                 INSIST(result == ISC_R_NOTFOUND);
7747                 result = ISC_R_NOMORE; /* The tree is empty. */
7748         }
7749
7750         rbtdbiter->result = result;
7751
7752         return (result);
7753 }
7754
7755 static isc_result_t
7756 dbiterator_last(dns_dbiterator_t *iterator) {
7757         isc_result_t result;
7758         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7759         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7760         dns_name_t *name, *origin;
7761
7762         if (rbtdbiter->result != ISC_R_SUCCESS &&
7763             rbtdbiter->result != ISC_R_NOMORE)
7764                 return (rbtdbiter->result);
7765
7766         if (rbtdbiter->paused)
7767                 resume_iteration(rbtdbiter);
7768
7769         dereference_iter_node(rbtdbiter);
7770
7771         name = dns_fixedname_name(&rbtdbiter->name);
7772         origin = dns_fixedname_name(&rbtdbiter->origin);
7773         dns_rbtnodechain_reset(&rbtdbiter->chain);
7774         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7775
7776         result = ISC_R_NOTFOUND;
7777         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7778                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7779                 result = dns_rbtnodechain_last(rbtdbiter->current,
7780                                                rbtdb->nsec3, name, origin);
7781         }
7782         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7783                 rbtdbiter->current = &rbtdbiter->chain;
7784                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7785                                                name, origin);
7786         }
7787         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7788                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7789                                                   NULL, &rbtdbiter->node);
7790                 if (result == ISC_R_SUCCESS) {
7791                         rbtdbiter->new_origin = ISC_TRUE;
7792                         reference_iter_node(rbtdbiter);
7793                 }
7794         } else {
7795                 INSIST(result == ISC_R_NOTFOUND);
7796                 result = ISC_R_NOMORE; /* The tree is empty. */
7797         }
7798
7799         rbtdbiter->result = result;
7800
7801         return (result);
7802 }
7803
7804 static isc_result_t
7805 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7806         isc_result_t result;
7807         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7808         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7809         dns_name_t *iname, *origin;
7810
7811         if (rbtdbiter->result != ISC_R_SUCCESS &&
7812             rbtdbiter->result != ISC_R_NOTFOUND &&
7813             rbtdbiter->result != ISC_R_NOMORE)
7814                 return (rbtdbiter->result);
7815
7816         if (rbtdbiter->paused)
7817                 resume_iteration(rbtdbiter);
7818
7819         dereference_iter_node(rbtdbiter);
7820
7821         iname = dns_fixedname_name(&rbtdbiter->name);
7822         origin = dns_fixedname_name(&rbtdbiter->origin);
7823         dns_rbtnodechain_reset(&rbtdbiter->chain);
7824         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7825
7826         if (rbtdbiter->nsec3only) {
7827                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7828                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7829                                           &rbtdbiter->node,
7830                                           rbtdbiter->current,
7831                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7832         } else if (rbtdbiter->nonsec3) {
7833                 rbtdbiter->current = &rbtdbiter->chain;
7834                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7835                                           &rbtdbiter->node,
7836                                           rbtdbiter->current,
7837                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7838         } else {
7839                 /*
7840                  * Stay on main chain if not found on either chain.
7841                  */
7842                 rbtdbiter->current = &rbtdbiter->chain;
7843                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7844                                           &rbtdbiter->node,
7845                                           rbtdbiter->current,
7846                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7847                 if (result == DNS_R_PARTIALMATCH) {
7848                         dns_rbtnode_t *node = NULL;
7849                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7850                                                   &node, &rbtdbiter->nsec3chain,
7851                                                   DNS_RBTFIND_EMPTYDATA,
7852                                                   NULL, NULL);
7853                         if (result == ISC_R_SUCCESS) {
7854                                 rbtdbiter->node = node;
7855                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7856                         }
7857                 }
7858         }
7859
7860 #if 1
7861         if (result == ISC_R_SUCCESS) {
7862                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7863                                                   origin, NULL);
7864                 if (result == ISC_R_SUCCESS) {
7865                         rbtdbiter->new_origin = ISC_TRUE;
7866                         reference_iter_node(rbtdbiter);
7867                 }
7868         } else if (result == DNS_R_PARTIALMATCH) {
7869                 result = ISC_R_NOTFOUND;
7870                 rbtdbiter->node = NULL;
7871         }
7872
7873         rbtdbiter->result = result;
7874 #else
7875         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7876                 isc_result_t tresult;
7877                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7878                                                    origin, NULL);
7879                 if (tresult == ISC_R_SUCCESS) {
7880                         rbtdbiter->new_origin = ISC_TRUE;
7881                         reference_iter_node(rbtdbiter);
7882                 } else {
7883                         result = tresult;
7884                         rbtdbiter->node = NULL;
7885                 }
7886         } else
7887                 rbtdbiter->node = NULL;
7888
7889         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7890                             ISC_R_SUCCESS : result;
7891 #endif
7892
7893         return (result);
7894 }
7895
7896 static isc_result_t
7897 dbiterator_prev(dns_dbiterator_t *iterator) {
7898         isc_result_t result;
7899         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7900         dns_name_t *name, *origin;
7901         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7902
7903         REQUIRE(rbtdbiter->node != NULL);
7904
7905         if (rbtdbiter->result != ISC_R_SUCCESS)
7906                 return (rbtdbiter->result);
7907
7908         if (rbtdbiter->paused)
7909                 resume_iteration(rbtdbiter);
7910
7911         name = dns_fixedname_name(&rbtdbiter->name);
7912         origin = dns_fixedname_name(&rbtdbiter->origin);
7913         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7914         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7915             !rbtdbiter->nonsec3 &&
7916             &rbtdbiter->nsec3chain == rbtdbiter->current) {
7917                 rbtdbiter->current = &rbtdbiter->chain;
7918                 dns_rbtnodechain_reset(rbtdbiter->current);
7919                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7920                                                name, origin);
7921                 if (result == ISC_R_NOTFOUND)
7922                         result = ISC_R_NOMORE;
7923         }
7924
7925         dereference_iter_node(rbtdbiter);
7926
7927         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7928                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7929                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7930                                                   NULL, &rbtdbiter->node);
7931         }
7932
7933         if (result == ISC_R_SUCCESS)
7934                 reference_iter_node(rbtdbiter);
7935
7936         rbtdbiter->result = result;
7937
7938         return (result);
7939 }
7940
7941 static isc_result_t
7942 dbiterator_next(dns_dbiterator_t *iterator) {
7943         isc_result_t result;
7944         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7945         dns_name_t *name, *origin;
7946         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7947
7948         REQUIRE(rbtdbiter->node != NULL);
7949
7950         if (rbtdbiter->result != ISC_R_SUCCESS)
7951                 return (rbtdbiter->result);
7952
7953         if (rbtdbiter->paused)
7954                 resume_iteration(rbtdbiter);
7955
7956         name = dns_fixedname_name(&rbtdbiter->name);
7957         origin = dns_fixedname_name(&rbtdbiter->origin);
7958         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
7959         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7960             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
7961                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7962                 dns_rbtnodechain_reset(rbtdbiter->current);
7963                 result = dns_rbtnodechain_first(rbtdbiter->current,
7964                                                 rbtdb->nsec3, name, origin);
7965                 if (result == ISC_R_NOTFOUND)
7966                         result = ISC_R_NOMORE;
7967         }
7968
7969         dereference_iter_node(rbtdbiter);
7970
7971         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7972                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7973                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7974                                                   NULL, &rbtdbiter->node);
7975         }
7976         if (result == ISC_R_SUCCESS)
7977                 reference_iter_node(rbtdbiter);
7978
7979         rbtdbiter->result = result;
7980
7981         return (result);
7982 }
7983
7984 static isc_result_t
7985 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
7986                    dns_name_t *name)
7987 {
7988         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7989         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7990         dns_rbtnode_t *node = rbtdbiter->node;
7991         isc_result_t result;
7992         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
7993         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
7994
7995         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
7996         REQUIRE(rbtdbiter->node != NULL);
7997
7998         if (rbtdbiter->paused)
7999                 resume_iteration(rbtdbiter);
8000
8001         if (name != NULL) {
8002                 if (rbtdbiter->common.relative_names)
8003                         origin = NULL;
8004                 result = dns_name_concatenate(nodename, origin, name, NULL);
8005                 if (result != ISC_R_SUCCESS)
8006                         return (result);
8007                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8008                         result = DNS_R_NEWORIGIN;
8009         } else
8010                 result = ISC_R_SUCCESS;
8011
8012         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8013         new_reference(rbtdb, node);
8014         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8015
8016         *nodep = rbtdbiter->node;
8017
8018         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8019                 isc_result_t expire_result;
8020
8021                 /*
8022                  * If the deletion array is full, flush it before trying
8023                  * to expire the current node.  The current node can't
8024                  * fully deleted while the iteration cursor is still on it.
8025                  */
8026                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8027                         flush_deletions(rbtdbiter);
8028
8029                 expire_result = expirenode(iterator->db, *nodep, 0);
8030
8031                 /*
8032                  * expirenode() currently always returns success.
8033                  */
8034                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8035                         unsigned int refs;
8036
8037                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8038                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8039                         dns_rbtnode_refincrement(node, &refs);
8040                         INSIST(refs != 0);
8041                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8042                 }
8043         }
8044
8045         return (result);
8046 }
8047
8048 static isc_result_t
8049 dbiterator_pause(dns_dbiterator_t *iterator) {
8050         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8051         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8052
8053         if (rbtdbiter->result != ISC_R_SUCCESS &&
8054             rbtdbiter->result != ISC_R_NOMORE)
8055                 return (rbtdbiter->result);
8056
8057         if (rbtdbiter->paused)
8058                 return (ISC_R_SUCCESS);
8059
8060         rbtdbiter->paused = ISC_TRUE;
8061
8062         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8063                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8064                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8065                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8066         }
8067
8068         flush_deletions(rbtdbiter);
8069
8070         return (ISC_R_SUCCESS);
8071 }
8072
8073 static isc_result_t
8074 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8075         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8076         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8077
8078         if (rbtdbiter->result != ISC_R_SUCCESS)
8079                 return (rbtdbiter->result);
8080
8081         return (dns_name_copy(origin, name, NULL));
8082 }
8083
8084 /*%
8085  * Additional cache routines.
8086  */
8087 static isc_result_t
8088 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8089                        dns_rdatatype_t qtype, dns_acache_t *acache,
8090                        dns_zone_t **zonep, dns_db_t **dbp,
8091                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8092                        dns_name_t *fname, dns_message_t *msg,
8093                        isc_stdtime_t now)
8094 {
8095         dns_rbtdb_t *rbtdb = rdataset->private1;
8096         dns_rbtnode_t *rbtnode = rdataset->private2;
8097         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8098         unsigned int current_count = rdataset->privateuint4;
8099         unsigned int count;
8100         rdatasetheader_t *header;
8101         nodelock_t *nodelock;
8102         unsigned int total_count;
8103         acachectl_t *acarray;
8104         dns_acacheentry_t *entry;
8105         isc_result_t result;
8106
8107         UNUSED(qtype); /* we do not use this value at least for now */
8108         UNUSED(acache);
8109
8110         header = (struct rdatasetheader *)(raw - sizeof(*header));
8111
8112         total_count = raw[0] * 256 + raw[1];
8113         INSIST(total_count > current_count);
8114         count = total_count - current_count - 1;
8115
8116         acarray = NULL;
8117
8118         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8119         NODE_LOCK(nodelock, isc_rwlocktype_read);
8120
8121         switch (type) {
8122         case dns_rdatasetadditional_fromauth:
8123                 acarray = header->additional_auth;
8124                 break;
8125         case dns_rdatasetadditional_fromcache:
8126                 acarray = NULL;
8127                 break;
8128         case dns_rdatasetadditional_fromglue:
8129                 acarray = header->additional_glue;
8130                 break;
8131         default:
8132                 INSIST(0);
8133         }
8134
8135         if (acarray == NULL) {
8136                 if (type != dns_rdatasetadditional_fromcache)
8137                         dns_acache_countquerymiss(acache);
8138                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8139                 return (ISC_R_NOTFOUND);
8140         }
8141
8142         if (acarray[count].entry == NULL) {
8143                 dns_acache_countquerymiss(acache);
8144                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8145                 return (ISC_R_NOTFOUND);
8146         }
8147
8148         entry = NULL;
8149         dns_acache_attachentry(acarray[count].entry, &entry);
8150
8151         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8152
8153         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8154                                      nodep, fname, msg, now);
8155
8156         dns_acache_detachentry(&entry);
8157
8158         return (result);
8159 }
8160
8161 static void
8162 acache_callback(dns_acacheentry_t *entry, void **arg) {
8163         dns_rbtdb_t *rbtdb;
8164         dns_rbtnode_t *rbtnode;
8165         nodelock_t *nodelock;
8166         acachectl_t *acarray = NULL;
8167         acache_cbarg_t *cbarg;
8168         unsigned int count;
8169
8170         REQUIRE(arg != NULL);
8171         cbarg = *arg;
8172
8173         /*
8174          * The caller must hold the entry lock.
8175          */
8176
8177         rbtdb = (dns_rbtdb_t *)cbarg->db;
8178         rbtnode = (dns_rbtnode_t *)cbarg->node;
8179
8180         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8181         NODE_LOCK(nodelock, isc_rwlocktype_write);
8182
8183         switch (cbarg->type) {
8184         case dns_rdatasetadditional_fromauth:
8185                 acarray = cbarg->header->additional_auth;
8186                 break;
8187         case dns_rdatasetadditional_fromglue:
8188                 acarray = cbarg->header->additional_glue;
8189                 break;
8190         default:
8191                 INSIST(0);
8192         }
8193
8194         count = cbarg->count;
8195         if (acarray != NULL && acarray[count].entry == entry) {
8196                 acarray[count].entry = NULL;
8197                 INSIST(acarray[count].cbarg == cbarg);
8198                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8199                 acarray[count].cbarg = NULL;
8200         } else
8201                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8202
8203         dns_acache_detachentry(&entry);
8204
8205         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8206
8207         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8208         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8209
8210         *arg = NULL;
8211 }
8212
8213 static void
8214 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8215                       acache_cbarg_t **cbargp)
8216 {
8217         acache_cbarg_t *cbarg;
8218
8219         REQUIRE(mctx != NULL);
8220         REQUIRE(entry != NULL);
8221         REQUIRE(cbargp != NULL && *cbargp != NULL);
8222
8223         cbarg = *cbargp;
8224
8225         dns_acache_cancelentry(entry);
8226         dns_db_detachnode(cbarg->db, &cbarg->node);
8227         dns_db_detach(&cbarg->db);
8228
8229         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8230
8231         *cbargp = NULL;
8232 }
8233
8234 static isc_result_t
8235 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8236                        dns_rdatatype_t qtype, dns_acache_t *acache,
8237                        dns_zone_t *zone, dns_db_t *db,
8238                        dns_dbversion_t *version, dns_dbnode_t *node,
8239                        dns_name_t *fname)
8240 {
8241         dns_rbtdb_t *rbtdb = rdataset->private1;
8242         dns_rbtnode_t *rbtnode = rdataset->private2;
8243         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8244         unsigned int current_count = rdataset->privateuint4;
8245         rdatasetheader_t *header;
8246         unsigned int total_count, count;
8247         nodelock_t *nodelock;
8248         isc_result_t result;
8249         acachectl_t *acarray;
8250         dns_acacheentry_t *newentry, *oldentry = NULL;
8251         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8252
8253         UNUSED(qtype);
8254
8255         if (type == dns_rdatasetadditional_fromcache)
8256                 return (ISC_R_SUCCESS);
8257
8258         header = (struct rdatasetheader *)(raw - sizeof(*header));
8259
8260         total_count = raw[0] * 256 + raw[1];
8261         INSIST(total_count > current_count);
8262         count = total_count - current_count - 1; /* should be private data */
8263
8264         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8265         if (newcbarg == NULL)
8266                 return (ISC_R_NOMEMORY);
8267         newcbarg->type = type;
8268         newcbarg->count = count;
8269         newcbarg->header = header;
8270         newcbarg->db = NULL;
8271         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8272         newcbarg->node = NULL;
8273         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8274                           &newcbarg->node);
8275         newentry = NULL;
8276         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8277                                         acache_callback, newcbarg, &newentry);
8278         if (result != ISC_R_SUCCESS)
8279                 goto fail;
8280         /* Set cache data in the new entry. */
8281         result = dns_acache_setentry(acache, newentry, zone, db,
8282                                      version, node, fname);
8283         if (result != ISC_R_SUCCESS)
8284                 goto fail;
8285
8286         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8287         NODE_LOCK(nodelock, isc_rwlocktype_write);
8288
8289         acarray = NULL;
8290         switch (type) {
8291         case dns_rdatasetadditional_fromauth:
8292                 acarray = header->additional_auth;
8293                 break;
8294         case dns_rdatasetadditional_fromglue:
8295                 acarray = header->additional_glue;
8296                 break;
8297         default:
8298                 INSIST(0);
8299         }
8300
8301         if (acarray == NULL) {
8302                 unsigned int i;
8303
8304                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8305                                       sizeof(acachectl_t));
8306
8307                 if (acarray == NULL) {
8308                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8309                         goto fail;
8310                 }
8311
8312                 for (i = 0; i < total_count; i++) {
8313                         acarray[i].entry = NULL;
8314                         acarray[i].cbarg = NULL;
8315                 }
8316         }
8317         switch (type) {
8318         case dns_rdatasetadditional_fromauth:
8319                 header->additional_auth = acarray;
8320                 break;
8321         case dns_rdatasetadditional_fromglue:
8322                 header->additional_glue = acarray;
8323                 break;
8324         default:
8325                 INSIST(0);
8326         }
8327
8328         if (acarray[count].entry != NULL) {
8329                 /*
8330                  * Swap the entry.  Delay cleaning-up the old entry since
8331                  * it would require a node lock.
8332                  */
8333                 oldentry = acarray[count].entry;
8334                 INSIST(acarray[count].cbarg != NULL);
8335                 oldcbarg = acarray[count].cbarg;
8336         }
8337         acarray[count].entry = newentry;
8338         acarray[count].cbarg = newcbarg;
8339
8340         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8341
8342         if (oldentry != NULL) {
8343                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8344                 dns_acache_detachentry(&oldentry);
8345         }
8346
8347         return (ISC_R_SUCCESS);
8348
8349  fail:
8350         if (newcbarg != NULL) {
8351                 if (newentry != NULL) {
8352                         acache_cancelentry(rbtdb->common.mctx, newentry,
8353                                            &newcbarg);
8354                         dns_acache_detachentry(&newentry);
8355                 } else {
8356                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8357                         dns_db_detach(&newcbarg->db);
8358                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8359                             sizeof(*newcbarg));
8360                 }
8361         }
8362
8363         return (result);
8364 }
8365
8366 static isc_result_t
8367 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8368                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8369 {
8370         dns_rbtdb_t *rbtdb = rdataset->private1;
8371         dns_rbtnode_t *rbtnode = rdataset->private2;
8372         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8373         unsigned int current_count = rdataset->privateuint4;
8374         rdatasetheader_t *header;
8375         nodelock_t *nodelock;
8376         unsigned int total_count, count;
8377         acachectl_t *acarray;
8378         dns_acacheentry_t *entry;
8379         acache_cbarg_t *cbarg;
8380
8381         UNUSED(qtype);          /* we do not use this value at least for now */
8382         UNUSED(acache);
8383
8384         if (type == dns_rdatasetadditional_fromcache)
8385                 return (ISC_R_SUCCESS);
8386
8387         header = (struct rdatasetheader *)(raw - sizeof(*header));
8388
8389         total_count = raw[0] * 256 + raw[1];
8390         INSIST(total_count > current_count);
8391         count = total_count - current_count - 1;
8392
8393         acarray = NULL;
8394         entry = NULL;
8395
8396         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8397         NODE_LOCK(nodelock, isc_rwlocktype_write);
8398
8399         switch (type) {
8400         case dns_rdatasetadditional_fromauth:
8401                 acarray = header->additional_auth;
8402                 break;
8403         case dns_rdatasetadditional_fromglue:
8404                 acarray = header->additional_glue;
8405                 break;
8406         default:
8407                 INSIST(0);
8408         }
8409
8410         if (acarray == NULL) {
8411                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8412                 return (ISC_R_NOTFOUND);
8413         }
8414
8415         entry = acarray[count].entry;
8416         if (entry == NULL) {
8417                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8418                 return (ISC_R_NOTFOUND);
8419         }
8420
8421         acarray[count].entry = NULL;
8422         cbarg = acarray[count].cbarg;
8423         acarray[count].cbarg = NULL;
8424
8425         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8426
8427         if (entry != NULL) {
8428                 if (cbarg != NULL)
8429                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8430                 dns_acache_detachentry(&entry);
8431         }
8432
8433         return (ISC_R_SUCCESS);
8434 }
8435
8436 /*%
8437  * Routines for LRU-based cache management.
8438  */
8439
8440 /*%
8441  * See if a given cache entry that is being reused needs to be updated
8442  * in the LRU-list.  From the LRU management point of view, this function is
8443  * expected to return true for almost all cases.  When used with threads,
8444  * however, this may cause a non-negligible performance penalty because a
8445  * writer lock will have to be acquired before updating the list.
8446  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8447  * function returns true if the entry has not been updated for some period of
8448  * time.  We differentiate the NS or glue address case and the others since
8449  * experiments have shown that the former tends to be accessed relatively
8450  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8451  * may cause external queries at a higher level zone, involving more
8452  * transactions).
8453  *
8454  * Caller must hold the node (read or write) lock.
8455  */
8456 static inline isc_boolean_t
8457 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8458         if ((header->attributes &
8459              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8460                 return (ISC_FALSE);
8461
8462 #if DNS_RBTDB_LIMITLRUUPDATE
8463         if (header->type == dns_rdatatype_ns ||
8464             (header->trust == dns_trust_glue &&
8465              (header->type == dns_rdatatype_a ||
8466               header->type == dns_rdatatype_aaaa))) {
8467                 /*
8468                  * Glue records are updated if at least 60 seconds have passed
8469                  * since the previous update time.
8470                  */
8471                 return (header->last_used + 60 <= now);
8472         }
8473
8474         /* Other records are updated if 5 minutes have passed. */
8475         return (header->last_used + 300 <= now);
8476 #else
8477         UNUSED(now);
8478
8479         return (ISC_TRUE);
8480 #endif
8481 }
8482
8483 /*%
8484  * Update the timestamp of a given cache entry and move it to the head
8485  * of the corresponding LRU list.
8486  *
8487  * Caller must hold the node (write) lock.
8488  *
8489  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8490  */
8491 static void
8492 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8493               isc_stdtime_t now)
8494 {
8495         INSIST(IS_CACHE(rbtdb));
8496
8497         /* To be checked: can we really assume this? XXXMLG */
8498         INSIST(ISC_LINK_LINKED(header, lru_link));
8499
8500         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum],
8501                         header, lru_link);
8502         header->last_used = now;
8503         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum],
8504                          header, lru_link);
8505 }
8506
8507 /*%
8508  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8509  * under an overmem condition.  To recover from this condition quickly, up to
8510  * 2 entries will be purged.  This process is triggered while adding a new
8511  * entry, and we specifically avoid purging entries in the same LRU bucket as
8512  * the one to which the new entry will belong.  Otherwise, we might purge
8513  * entries of the same name of different RR types while adding RRsets from a
8514  * single response (consider the case where we're adding A and AAAA glue records
8515  * of the same NS name).
8516  */
8517 static void
8518 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8519               isc_stdtime_t now, isc_boolean_t tree_locked)
8520 {
8521         rdatasetheader_t *header, *header_prev;
8522         unsigned int locknum;
8523         int purgecount = 2;
8524
8525         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8526              locknum != locknum_start && purgecount > 0;
8527              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8528                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8529                           isc_rwlocktype_write);
8530
8531                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8532                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8533                         expire_header(rbtdb, header, tree_locked);
8534                         purgecount--;
8535                 }
8536
8537                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8538                      header != NULL && purgecount > 0;
8539                      header = header_prev) {
8540                         header_prev = ISC_LIST_PREV(header, lru_link);
8541                         /*
8542                          * Unlink the entry at this point to avoid checking it
8543                          * again even if it's currently used someone else and
8544                          * cannot be purged at this moment.  This entry won't be
8545                          * referenced any more (so unlinking is safe) since the
8546                          * TTL was reset to 0.
8547                          */
8548                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8549                                         lru_link);
8550                         expire_header(rbtdb, header, tree_locked);
8551                         purgecount--;
8552                 }
8553
8554                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8555                                     isc_rwlocktype_write);
8556         }
8557 }
8558
8559 static void
8560 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8561               isc_boolean_t tree_locked)
8562 {
8563         set_ttl(rbtdb, header, 0);
8564         header->attributes |= RDATASET_ATTR_STALE;
8565         header->node->dirty = 1;
8566
8567         /*
8568          * Caller must hold the node (write) lock.
8569          */
8570
8571         if (dns_rbtnode_refcurrent(header->node) == 0) {
8572                 /*
8573                  * If no one else is using the node, we can clean it up now.
8574                  * We first need to gain a new reference to the node to meet a
8575                  * requirement of decrement_reference().
8576                  */
8577                 new_reference(rbtdb, header->node);
8578                 decrement_reference(rbtdb, header->node, 0,
8579                                     isc_rwlocktype_write,
8580                                     tree_locked ? isc_rwlocktype_write :
8581                                     isc_rwlocktype_none, ISC_FALSE);
8582         }
8583 }