2 * Copyright (C) 2004-2012 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
57 #include <dns/rdata.h>
58 #include <dns/rdataset.h>
59 #include <dns/rdatasetiter.h>
60 #include <dns/rdataslab.h>
61 #include <dns/rdatastruct.h>
62 #include <dns/result.h>
63 #include <dns/stats.h>
66 #include <dns/zonekey.h>
68 #ifdef DNS_RBTDB_VERSION64
74 #ifdef DNS_RBTDB_VERSION64
75 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
77 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
81 * Note that "impmagic" is not the first four bytes of the struct, so
82 * ISC_MAGIC_VALID cannot be used.
84 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
85 (rbtdb)->common.impmagic == RBTDB_MAGIC)
87 #ifdef DNS_RBTDB_VERSION64
88 typedef isc_uint64_t rbtdb_serial_t;
90 * Make casting easier in symbolic debuggers by using different names
91 * for the 64 bit version.
93 #define dns_rbtdb_t dns_rbtdb64_t
94 #define rdatasetheader_t rdatasetheader64_t
95 #define rbtdb_version_t rbtdb_version64_t
97 typedef isc_uint32_t rbtdb_serial_t;
100 typedef isc_uint32_t rbtdb_rdatatype_t;
102 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
103 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
104 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
106 #define RBTDB_RDATATYPE_SIGNSEC \
107 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
108 #define RBTDB_RDATATYPE_SIGNSEC3 \
109 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
110 #define RBTDB_RDATATYPE_SIGNS \
111 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
112 #define RBTDB_RDATATYPE_SIGCNAME \
113 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
114 #define RBTDB_RDATATYPE_SIGDNAME \
115 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
116 #define RBTDB_RDATATYPE_NCACHEANY \
117 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
120 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
121 * Using rwlock is effective with regard to lookup performance only when
122 * it is implemented in an efficient way.
123 * Otherwise, it is generally wise to stick to the simple locking since rwlock
124 * would require more memory or can even make lookups slower due to its own
125 * overhead (when it internally calls mutex locks).
127 #ifdef ISC_RWLOCK_USEATOMIC
128 #define DNS_RBTDB_USERWLOCK 1
130 #define DNS_RBTDB_USERWLOCK 0
133 #if DNS_RBTDB_USERWLOCK
134 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
135 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
136 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
137 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
139 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
140 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
141 #define RBTDB_LOCK(l, t) LOCK(l)
142 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
146 * Since node locking is sensitive to both performance and memory footprint,
147 * we need some trick here. If we have both high-performance rwlock and
148 * high performance and small-memory reference counters, we use rwlock for
149 * node lock and isc_refcount for node references. In this case, we don't have
150 * to protect the access to the counters by locks.
151 * Otherwise, we simply use ordinary mutex lock for node locking, and use
152 * simple integers as reference counters which is protected by the lock.
153 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
154 * NODE_UNLOCK. In some other cases, however, we need to protect reference
155 * counters first and then protect other parts of a node as read-only data.
156 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
157 * provided for these special cases. When we can use the efficient backend
158 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
159 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
160 * section including the access to the reference counter.
161 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
162 * section is also protected by NODE_STRONGLOCK().
164 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
165 typedef isc_rwlock_t nodelock_t;
167 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
168 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
169 #define NODE_LOCK(l, t) RWLOCK((l), (t))
170 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
171 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
173 #define NODE_STRONGLOCK(l) ((void)0)
174 #define NODE_STRONGUNLOCK(l) ((void)0)
175 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
176 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
177 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
179 typedef isc_mutex_t nodelock_t;
181 #define NODE_INITLOCK(l) isc_mutex_init(l)
182 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
183 #define NODE_LOCK(l, t) LOCK(l)
184 #define NODE_UNLOCK(l, t) UNLOCK(l)
185 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
187 #define NODE_STRONGLOCK(l) LOCK(l)
188 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
189 #define NODE_WEAKLOCK(l, t) ((void)0)
190 #define NODE_WEAKUNLOCK(l, t) ((void)0)
191 #define NODE_WEAKDOWNGRADE(l) ((void)0)
195 * Whether to rate-limit updating the LRU to avoid possible thread contention.
196 * Our performance measurement has shown the cost is marginal, so it's defined
197 * to be 0 by default either with or without threads.
199 #ifndef DNS_RBTDB_LIMITLRUUPDATE
200 #define DNS_RBTDB_LIMITLRUUPDATE 0
204 * Allow clients with a virtual time of up to 5 minutes in the past to see
205 * records that would have otherwise have expired.
207 #define RBTDB_VIRTUAL 300
213 dns_rdatatype_t type;
216 typedef struct acachectl acachectl_t;
218 typedef struct rdatasetheader {
220 * Locked by the owning node's lock.
222 rbtdb_serial_t serial;
224 rbtdb_rdatatype_t type;
225 isc_uint16_t attributes;
227 struct noqname *noqname;
228 struct noqname *closest;
230 * We don't use the LIST macros, because the LIST structure has
231 * both head and tail pointers, and is doubly linked.
234 struct rdatasetheader *next;
236 * If this is the top header for an rdataset, 'next' points
237 * to the top header for the next rdataset (i.e., the next type).
238 * Otherwise, it points up to the header whose down pointer points
242 struct rdatasetheader *down;
244 * Points to the header for the next older version of
250 * Monotonously increased every time this rdataset is bound so that
251 * it is used as the base of the starting point in DNS responses
252 * when the "cyclic" rrset-order is required. Since the ordering
253 * should not be so crucial, no lock is set for the counter for
254 * performance reasons.
257 acachectl_t *additional_auth;
258 acachectl_t *additional_glue;
261 isc_stdtime_t last_used;
262 ISC_LINK(struct rdatasetheader) link;
264 unsigned int heap_index;
266 * Used for TTL-based cache cleaning.
268 isc_stdtime_t resign;
271 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
272 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
274 #define RDATASET_ATTR_NONEXISTENT 0x0001
275 #define RDATASET_ATTR_STALE 0x0002
276 #define RDATASET_ATTR_IGNORE 0x0004
277 #define RDATASET_ATTR_RETAIN 0x0008
278 #define RDATASET_ATTR_NXDOMAIN 0x0010
279 #define RDATASET_ATTR_RESIGN 0x0020
280 #define RDATASET_ATTR_STATCOUNT 0x0040
281 #define RDATASET_ATTR_OPTOUT 0x0080
282 #define RDATASET_ATTR_NEGATIVE 0x0100
284 typedef struct acache_cbarg {
285 dns_rdatasetadditional_t type;
289 rdatasetheader_t *header;
293 dns_acacheentry_t *entry;
294 acache_cbarg_t *cbarg;
299 * When the cache will pre-expire data (due to memory low or other
300 * situations) before the rdataset's TTL has expired, it MUST
301 * respect the RETAIN bit and not expire the data until its TTL is
305 #undef IGNORE /* WIN32 winbase.h defines this. */
307 #define EXISTS(header) \
308 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
309 #define NONEXISTENT(header) \
310 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
311 #define IGNORE(header) \
312 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
313 #define RETAIN(header) \
314 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
315 #define NXDOMAIN(header) \
316 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
317 #define RESIGN(header) \
318 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
319 #define OPTOUT(header) \
320 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
321 #define NEGATIVE(header) \
322 (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
324 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
327 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
328 * There is a tradeoff issue about configuring this value: if this is too
329 * small, it may cause heavier contention between threads; if this is too large,
330 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
331 * The default value should work well for most environments, but this can
332 * also be configurable at compilation time via the
333 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
334 * 1 due to the assumption of overmem_purge().
336 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
338 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
340 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
343 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
344 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
348 /* Protected in the refcount routines. */
349 isc_refcount_t references;
350 /* Locked by lock. */
351 isc_boolean_t exiting;
354 typedef struct rbtdb_changed {
355 dns_rbtnode_t * node;
357 ISC_LINK(struct rbtdb_changed) link;
360 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
368 typedef struct dns_rbtdb dns_rbtdb_t;
370 typedef struct rbtdb_version {
372 rbtdb_serial_t serial;
375 * Protected in the refcount routines.
376 * XXXJT: should we change the lock policy based on the refcount
379 isc_refcount_t references;
380 /* Locked by database lock. */
381 isc_boolean_t writer;
382 isc_boolean_t commit_ok;
383 rbtdb_changedlist_t changed_list;
384 rdatasetheaderlist_t resigned_list;
385 ISC_LINK(struct rbtdb_version) link;
386 dns_db_secure_t secure;
387 isc_boolean_t havensec3;
388 /* NSEC3 parameters */
391 isc_uint16_t iterations;
392 isc_uint8_t salt_length;
393 unsigned char salt[DNS_NSEC3_SALTSIZE];
396 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
401 /* Locks the data in this struct */
402 #if DNS_RBTDB_USERWLOCK
407 /* Locks the tree structure (prevents nodes appearing/disappearing) */
408 isc_rwlock_t tree_lock;
409 /* Locks for individual tree nodes */
410 unsigned int node_lock_count;
411 rbtdb_nodelock_t * node_locks;
412 dns_rbtnode_t * origin_node;
413 dns_stats_t * rrsetstats; /* cache DB only */
414 /* Locked by lock. */
416 isc_refcount_t references;
417 unsigned int attributes;
418 rbtdb_serial_t current_serial;
419 rbtdb_serial_t least_serial;
420 rbtdb_serial_t next_serial;
421 rbtdb_version_t * current_version;
422 rbtdb_version_t * future_version;
423 rbtdb_versionlist_t open_versions;
425 dns_dbnode_t *soanode;
426 dns_dbnode_t *nsnode;
429 * This is a linked list used to implement the LRU cache. There will
430 * be node_lock_count linked lists here. Nodes in bucket 1 will be
431 * placed on the linked list rdatasets[1].
433 rdatasetheaderlist_t *rdatasets;
436 * Temporary storage for stale cache nodes and dynamically deleted
437 * nodes that await being cleaned up.
439 rbtnodelist_t *deadnodes;
442 * Heaps. These are used for TTL based expiry in a cache,
443 * or for zone resigning in a zone DB. hmctx is the memory
444 * context to use for the heap (which differs from the main
445 * database memory context in the case of a cache).
450 /* Locked by tree_lock. */
454 dns_rpz_cidr_t * rpz_cidr;
457 unsigned int quantum;
460 #define RBTDB_ATTR_LOADED 0x01
461 #define RBTDB_ATTR_LOADING 0x02
468 rbtdb_version_t * rbtversion;
469 rbtdb_serial_t serial;
470 unsigned int options;
471 dns_rbtnodechain_t chain;
472 isc_boolean_t copy_name;
473 isc_boolean_t need_cleanup;
475 dns_rbtnode_t * zonecut;
476 rdatasetheader_t * zonecut_rdataset;
477 rdatasetheader_t * zonecut_sigrdataset;
478 dns_fixedname_t zonecut_name;
490 static void rdataset_disassociate(dns_rdataset_t *rdataset);
491 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
492 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
493 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
494 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
495 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
496 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
499 dns_rdataset_t *negsig);
500 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
503 dns_rdataset_t *negsig);
504 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
505 dns_rdatasetadditional_t type,
506 dns_rdatatype_t qtype,
507 dns_acache_t *acache,
510 dns_dbversion_t **versionp,
511 dns_dbnode_t **nodep,
515 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
516 dns_rdatasetadditional_t type,
517 dns_rdatatype_t qtype,
518 dns_acache_t *acache,
521 dns_dbversion_t *version,
524 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
525 dns_rdataset_t *rdataset,
526 dns_rdatasetadditional_t type,
527 dns_rdatatype_t qtype);
528 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
530 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
532 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
533 isc_boolean_t tree_locked);
534 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
535 isc_stdtime_t now, isc_boolean_t tree_locked);
536 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
537 rdatasetheader_t *newheader);
538 static void prune_tree(isc_task_t *task, isc_event_t *event);
539 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
540 static void rdataset_expire(dns_rdataset_t *rdataset);
542 static dns_rdatasetmethods_t rdataset_methods = {
543 rdataset_disassociate,
553 rdataset_getadditional,
554 rdataset_setadditional,
555 rdataset_putadditional,
560 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
561 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
562 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
563 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
564 dns_rdataset_t *rdataset);
566 static dns_rdatasetitermethods_t rdatasetiter_methods = {
567 rdatasetiter_destroy,
573 typedef struct rbtdb_rdatasetiter {
574 dns_rdatasetiter_t common;
575 rdatasetheader_t * current;
576 } rbtdb_rdatasetiter_t;
578 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
579 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
580 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
581 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
583 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
584 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
585 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
586 dns_dbnode_t **nodep,
588 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
589 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
592 static dns_dbiteratormethods_t dbiterator_methods = {
604 #define DELETION_BATCH_MAX 64
607 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
609 typedef struct rbtdb_dbiterator {
610 dns_dbiterator_t common;
611 isc_boolean_t paused;
612 isc_boolean_t new_origin;
613 isc_rwlocktype_t tree_locked;
615 dns_fixedname_t name;
616 dns_fixedname_t origin;
617 dns_rbtnodechain_t chain;
618 dns_rbtnodechain_t nsec3chain;
619 dns_rbtnodechain_t *current;
621 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
623 isc_boolean_t nsec3only;
624 isc_boolean_t nonsec3;
625 } rbtdb_dbiterator_t;
628 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
629 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
631 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
633 static void overmem(dns_db_t *db, isc_boolean_t overmem);
635 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
639 * 'init_count' is used to initialize 'newheader->count' which inturn
640 * is used to determine where in the cycle rrset-order cyclic starts.
641 * We don't lock this as we don't care about simultaneous updates.
644 * Both init_count and header->count can be ISC_UINT32_MAX.
645 * The count on the returned rdataset however can't be as
646 * that indicates that the database does not implement cyclic
649 static unsigned int init_count;
654 * If a routine is going to lock more than one lock in this module, then
655 * the locking must be done in the following order:
659 * Node Lock (Only one from the set may be locked at one time by
664 * Failure to follow this hierarchy can result in deadlock.
670 * For zone databases the node for the origin of the zone MUST NOT be deleted.
679 attach(dns_db_t *source, dns_db_t **targetp) {
680 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
682 REQUIRE(VALID_RBTDB(rbtdb));
684 isc_refcount_increment(&rbtdb->references, NULL);
690 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
691 dns_rbtdb_t *rbtdb = event->ev_arg;
695 free_rbtdb(rbtdb, ISC_TRUE, event);
699 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
700 isc_boolean_t increment)
702 dns_rdatastatstype_t statattributes = 0;
703 dns_rdatastatstype_t base = 0;
704 dns_rdatastatstype_t type;
706 /* At the moment we count statistics only for cache DB */
707 INSIST(IS_CACHE(rbtdb));
709 if (NEGATIVE(header)) {
710 if (NXDOMAIN(header))
711 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
713 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
714 base = RBTDB_RDATATYPE_EXT(header->type);
717 base = RBTDB_RDATATYPE_BASE(header->type);
719 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
721 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
723 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
727 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
732 oldttl = header->rdh_ttl;
733 header->rdh_ttl = newttl;
735 if (!IS_CACHE(rbtdb))
739 * It's possible the rbtdb is not a cache. If this is the case,
740 * we will not have a heap, and we move on. If we do, though,
741 * we might need to adjust things.
743 if (header->heap_index == 0 || newttl == oldttl)
745 idx = header->node->locknum;
746 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
748 heap = rbtdb->heaps[idx];
751 isc_heap_increased(heap, header->heap_index);
753 isc_heap_decreased(heap, header->heap_index);
757 * These functions allow the heap code to rank the priority of each
758 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
761 ttl_sooner(void *v1, void *v2) {
762 rdatasetheader_t *h1 = v1;
763 rdatasetheader_t *h2 = v2;
765 if (h1->rdh_ttl < h2->rdh_ttl)
771 resign_sooner(void *v1, void *v2) {
772 rdatasetheader_t *h1 = v1;
773 rdatasetheader_t *h2 = v2;
775 if (h1->resign < h2->resign)
781 * This function sets the heap index into the header.
784 set_index(void *what, unsigned int index) {
785 rdatasetheader_t *h = what;
787 h->heap_index = index;
791 * Work out how many nodes can be deleted in the time between two
792 * requests to the nameserver. Smooth the resulting number and use it
793 * as a estimate for the number of nodes to be deleted in the next
797 adjust_quantum(unsigned int old, isc_time_t *start) {
798 unsigned int pps = dns_pps; /* packets per second */
799 unsigned int interval;
808 interval = 1000000 / pps; /* interval in usec */
811 usecs = isc_time_microdiff(&end, start);
814 * We were unable to measure the amount of time taken.
815 * Double the nodes deleted next time.
822 new = old * interval;
823 new /= (unsigned int)usecs;
830 new = (new + old * 3) / 4;
832 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
833 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
839 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
841 isc_ondestroy_t ondest;
843 char buf[DNS_NAME_FORMATSIZE];
847 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
848 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
850 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
851 REQUIRE(rbtdb->future_version == NULL);
853 if (rbtdb->current_version != NULL) {
856 isc_refcount_decrement(&rbtdb->current_version->references,
859 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
860 isc_refcount_destroy(&rbtdb->current_version->references);
861 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
862 sizeof(rbtdb_version_t));
866 * We assume the number of remaining dead nodes is reasonably small;
867 * the overhead of unlinking all nodes here should be negligible.
869 for (i = 0; i < rbtdb->node_lock_count; i++) {
872 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
873 while (node != NULL) {
874 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
875 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
880 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
884 * pick the next tree to (start to) destroy
886 treep = &rbtdb->tree;
887 if (*treep == NULL) {
888 treep = &rbtdb->nsec;
889 if (*treep == NULL) {
890 treep = &rbtdb->nsec3;
892 * we're finished after clear cutting
899 isc_time_now(&start);
900 result = dns_rbt_destroy2(treep, rbtdb->quantum);
901 if (result == ISC_R_QUOTA) {
902 INSIST(rbtdb->task != NULL);
903 if (rbtdb->quantum != 0)
904 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
907 event = isc_event_allocate(rbtdb->common.mctx,
909 DNS_EVENT_FREESTORAGE,
912 sizeof(isc_event_t));
915 isc_task_send(rbtdb->task, &event);
918 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
922 isc_event_free(&event);
924 if (dns_name_dynamic(&rbtdb->common.origin))
925 dns_name_format(&rbtdb->common.origin, buf,
928 strcpy(buf, "<UNKNOWN>");
929 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
930 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
931 "done free_rbtdb(%s)", buf);
933 if (dns_name_dynamic(&rbtdb->common.origin))
934 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
935 for (i = 0; i < rbtdb->node_lock_count; i++) {
936 isc_refcount_destroy(&rbtdb->node_locks[i].references);
937 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
941 * Clean up LRU / re-signing order lists.
943 if (rbtdb->rdatasets != NULL) {
944 for (i = 0; i < rbtdb->node_lock_count; i++)
945 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
946 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
947 rbtdb->node_lock_count *
948 sizeof(rdatasetheaderlist_t));
951 * Clean up dead node buckets.
953 if (rbtdb->deadnodes != NULL) {
954 for (i = 0; i < rbtdb->node_lock_count; i++)
955 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
956 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
957 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
960 * Clean up heap objects.
962 if (rbtdb->heaps != NULL) {
963 for (i = 0; i < rbtdb->node_lock_count; i++)
964 isc_heap_destroy(&rbtdb->heaps[i]);
965 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
966 rbtdb->node_lock_count * sizeof(isc_heap_t *));
969 if (rbtdb->rrsetstats != NULL)
970 dns_stats_detach(&rbtdb->rrsetstats);
973 if (rbtdb->rpz_cidr != NULL)
974 dns_rpz_cidr_free(&rbtdb->rpz_cidr);
977 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
978 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
979 isc_rwlock_destroy(&rbtdb->tree_lock);
980 isc_refcount_destroy(&rbtdb->references);
981 if (rbtdb->task != NULL)
982 isc_task_detach(&rbtdb->task);
984 RBTDB_DESTROYLOCK(&rbtdb->lock);
985 rbtdb->common.magic = 0;
986 rbtdb->common.impmagic = 0;
987 ondest = rbtdb->common.ondest;
988 isc_mem_detach(&rbtdb->hmctx);
989 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
990 isc_ondestroy_notify(&ondest, rbtdb);
994 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
995 isc_boolean_t want_free = ISC_FALSE;
997 unsigned int inactive = 0;
999 /* XXX check for open versions here */
1001 if (rbtdb->soanode != NULL)
1002 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1003 if (rbtdb->nsnode != NULL)
1004 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1007 * Even though there are no external direct references, there still
1008 * may be nodes in use.
1010 for (i = 0; i < rbtdb->node_lock_count; i++) {
1011 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1012 rbtdb->node_locks[i].exiting = ISC_TRUE;
1013 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1014 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1020 if (inactive != 0) {
1021 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1022 rbtdb->active -= inactive;
1023 if (rbtdb->active == 0)
1024 want_free = ISC_TRUE;
1025 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1027 char buf[DNS_NAME_FORMATSIZE];
1028 if (dns_name_dynamic(&rbtdb->common.origin))
1029 dns_name_format(&rbtdb->common.origin, buf,
1032 strcpy(buf, "<UNKNOWN>");
1033 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1034 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1035 "calling free_rbtdb(%s)", buf);
1036 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1042 detach(dns_db_t **dbp) {
1043 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1046 REQUIRE(VALID_RBTDB(rbtdb));
1048 isc_refcount_decrement(&rbtdb->references, &refs);
1051 maybe_free_rbtdb(rbtdb);
1057 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1058 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1059 rbtdb_version_t *version;
1062 REQUIRE(VALID_RBTDB(rbtdb));
1064 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1065 version = rbtdb->current_version;
1066 isc_refcount_increment(&version->references, &refs);
1067 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1069 *versionp = (dns_dbversion_t *)version;
1072 static inline rbtdb_version_t *
1073 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1074 unsigned int references, isc_boolean_t writer)
1076 isc_result_t result;
1077 rbtdb_version_t *version;
1079 version = isc_mem_get(mctx, sizeof(*version));
1080 if (version == NULL)
1082 version->serial = serial;
1083 result = isc_refcount_init(&version->references, references);
1084 if (result != ISC_R_SUCCESS) {
1085 isc_mem_put(mctx, version, sizeof(*version));
1088 version->writer = writer;
1089 version->commit_ok = ISC_FALSE;
1090 ISC_LIST_INIT(version->changed_list);
1091 ISC_LIST_INIT(version->resigned_list);
1092 ISC_LINK_INIT(version, link);
1098 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1099 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1100 rbtdb_version_t *version;
1102 REQUIRE(VALID_RBTDB(rbtdb));
1103 REQUIRE(versionp != NULL && *versionp == NULL);
1104 REQUIRE(rbtdb->future_version == NULL);
1106 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1107 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1108 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1110 if (version != NULL) {
1111 version->rbtdb = rbtdb;
1112 version->commit_ok = ISC_TRUE;
1113 version->secure = rbtdb->current_version->secure;
1114 version->havensec3 = rbtdb->current_version->havensec3;
1115 if (version->havensec3) {
1116 version->flags = rbtdb->current_version->flags;
1117 version->iterations =
1118 rbtdb->current_version->iterations;
1119 version->hash = rbtdb->current_version->hash;
1120 version->salt_length =
1121 rbtdb->current_version->salt_length;
1122 memcpy(version->salt, rbtdb->current_version->salt,
1123 version->salt_length);
1126 version->iterations = 0;
1128 version->salt_length = 0;
1129 memset(version->salt, 0, sizeof(version->salt));
1131 rbtdb->next_serial++;
1132 rbtdb->future_version = version;
1134 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1136 if (version == NULL)
1137 return (ISC_R_NOMEMORY);
1139 *versionp = version;
1141 return (ISC_R_SUCCESS);
1145 attachversion(dns_db_t *db, dns_dbversion_t *source,
1146 dns_dbversion_t **targetp)
1148 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1149 rbtdb_version_t *rbtversion = source;
1152 REQUIRE(VALID_RBTDB(rbtdb));
1153 INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
1155 isc_refcount_increment(&rbtversion->references, &refs);
1158 *targetp = rbtversion;
1161 static rbtdb_changed_t *
1162 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1163 dns_rbtnode_t *node)
1165 rbtdb_changed_t *changed;
1169 * Caller must be holding the node lock if its reference must be
1170 * protected by the lock.
1173 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1175 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1177 REQUIRE(version->writer);
1179 if (changed != NULL) {
1180 dns_rbtnode_refincrement(node, &refs);
1182 changed->node = node;
1183 changed->dirty = ISC_FALSE;
1184 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1186 version->commit_ok = ISC_FALSE;
1188 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1194 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1199 unsigned char *raw; /* RDATASLAB */
1202 * The caller must be holding the corresponding node lock.
1208 raw = (unsigned char *)header + sizeof(*header);
1209 count = raw[0] * 256 + raw[1];
1212 * Sanity check: since an additional cache entry has a reference to
1213 * the original DB node (in the callback arg), there should be no
1214 * acache entries when the node can be freed.
1216 for (i = 0; i < count; i++)
1217 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1219 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1223 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1225 if (dns_name_dynamic(&(*noqname)->name))
1226 dns_name_free(&(*noqname)->name, mctx);
1227 if ((*noqname)->neg != NULL)
1228 isc_mem_put(mctx, (*noqname)->neg,
1229 dns_rdataslab_size((*noqname)->neg, 0));
1230 if ((*noqname)->negsig != NULL)
1231 isc_mem_put(mctx, (*noqname)->negsig,
1232 dns_rdataslab_size((*noqname)->negsig, 0));
1233 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1238 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1240 ISC_LINK_INIT(h, link);
1244 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1245 fprintf(stderr, "initialized header: %p\n", h);
1251 static inline rdatasetheader_t *
1252 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1254 rdatasetheader_t *h;
1256 h = isc_mem_get(mctx, sizeof(*h));
1261 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1262 fprintf(stderr, "allocated header: %p\n", h);
1264 init_rdataset(rbtdb, h);
1269 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1274 if (EXISTS(rdataset) &&
1275 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1276 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1279 idx = rdataset->node->locknum;
1280 if (ISC_LINK_LINKED(rdataset, link)) {
1281 INSIST(IS_CACHE(rbtdb));
1282 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1284 if (rdataset->heap_index != 0)
1285 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1286 rdataset->heap_index = 0;
1288 if (rdataset->noqname != NULL)
1289 free_noqname(mctx, &rdataset->noqname);
1290 if (rdataset->closest != NULL)
1291 free_noqname(mctx, &rdataset->closest);
1293 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1294 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1296 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1297 size = sizeof(*rdataset);
1299 size = dns_rdataslab_size((unsigned char *)rdataset,
1301 isc_mem_put(mctx, rdataset, size);
1305 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1306 rdatasetheader_t *header, *dcurrent;
1307 isc_boolean_t make_dirty = ISC_FALSE;
1310 * Caller must hold the node lock.
1314 * We set the IGNORE attribute on rdatasets with serial number
1315 * 'serial'. When the reference count goes to zero, these rdatasets
1316 * will be cleaned up; until that time, they will be ignored.
1318 for (header = node->data; header != NULL; header = header->next) {
1319 if (header->serial == serial) {
1320 header->attributes |= RDATASET_ATTR_IGNORE;
1321 make_dirty = ISC_TRUE;
1323 for (dcurrent = header->down;
1325 dcurrent = dcurrent->down) {
1326 if (dcurrent->serial == serial) {
1327 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1328 make_dirty = ISC_TRUE;
1337 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1339 rdatasetheader_t *d, *down_next;
1341 for (d = top->down; d != NULL; d = down_next) {
1342 down_next = d->down;
1343 free_rdataset(rbtdb, mctx, d);
1349 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1350 rdatasetheader_t *current, *top_prev, *top_next;
1351 isc_mem_t *mctx = rbtdb->common.mctx;
1354 * Caller must be holding the node lock.
1358 for (current = node->data; current != NULL; current = top_next) {
1359 top_next = current->next;
1360 clean_stale_headers(rbtdb, mctx, current);
1362 * If current is nonexistent or stale, we can clean it up.
1364 if ((current->attributes &
1365 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1366 if (top_prev != NULL)
1367 top_prev->next = current->next;
1369 node->data = current->next;
1370 free_rdataset(rbtdb, mctx, current);
1378 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1379 rbtdb_serial_t least_serial)
1381 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1382 rdatasetheader_t *top_prev, *top_next;
1383 isc_mem_t *mctx = rbtdb->common.mctx;
1384 isc_boolean_t still_dirty = ISC_FALSE;
1387 * Caller must be holding the node lock.
1389 REQUIRE(least_serial != 0);
1392 for (current = node->data; current != NULL; current = top_next) {
1393 top_next = current->next;
1396 * First, we clean up any instances of multiple rdatasets
1397 * with the same serial number, or that have the IGNORE
1401 for (dcurrent = current->down;
1403 dcurrent = down_next) {
1404 down_next = dcurrent->down;
1405 INSIST(dcurrent->serial <= dparent->serial);
1406 if (dcurrent->serial == dparent->serial ||
1408 if (down_next != NULL)
1409 down_next->next = dparent;
1410 dparent->down = down_next;
1411 free_rdataset(rbtdb, mctx, dcurrent);
1417 * We've now eliminated all IGNORE datasets with the possible
1418 * exception of current, which we now check.
1420 if (IGNORE(current)) {
1421 down_next = current->down;
1422 if (down_next == NULL) {
1423 if (top_prev != NULL)
1424 top_prev->next = current->next;
1426 node->data = current->next;
1427 free_rdataset(rbtdb, mctx, current);
1429 * current no longer exists, so we can
1430 * just continue with the loop.
1435 * Pull up current->down, making it the new
1438 if (top_prev != NULL)
1439 top_prev->next = down_next;
1441 node->data = down_next;
1442 down_next->next = top_next;
1443 free_rdataset(rbtdb, mctx, current);
1444 current = down_next;
1449 * We now try to find the first down node less than the
1453 for (dcurrent = current->down;
1455 dcurrent = down_next) {
1456 down_next = dcurrent->down;
1457 if (dcurrent->serial < least_serial)
1463 * If there is a such an rdataset, delete it and any older
1466 if (dcurrent != NULL) {
1468 down_next = dcurrent->down;
1469 INSIST(dcurrent->serial <= least_serial);
1470 free_rdataset(rbtdb, mctx, dcurrent);
1471 dcurrent = down_next;
1472 } while (dcurrent != NULL);
1473 dparent->down = NULL;
1477 * Note. The serial number of 'current' might be less than
1478 * least_serial too, but we cannot delete it because it is
1479 * the most recent version, unless it is a NONEXISTENT
1482 if (current->down != NULL) {
1483 still_dirty = ISC_TRUE;
1487 * If this is a NONEXISTENT rdataset, we can delete it.
1489 if (NONEXISTENT(current)) {
1490 if (top_prev != NULL)
1491 top_prev->next = current->next;
1493 node->data = current->next;
1494 free_rdataset(rbtdb, mctx, current);
1504 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1506 dns_rbtnode_t *nsecnode;
1507 dns_fixedname_t fname;
1509 isc_result_t result = ISC_R_UNEXPECTED;
1511 INSIST(!ISC_LINK_LINKED(node, deadlink));
1513 switch (node->nsec) {
1514 case DNS_RBT_NSEC_NORMAL:
1516 if (rbtdb->rpz_cidr != NULL) {
1517 dns_fixedname_init(&fname);
1518 name = dns_fixedname_name(&fname);
1519 dns_rbt_fullnamefromnode(node, name);
1520 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1523 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1525 case DNS_RBT_NSEC_HAS_NSEC:
1526 dns_fixedname_init(&fname);
1527 name = dns_fixedname_name(&fname);
1528 dns_rbt_fullnamefromnode(node, name);
1530 * Delete the corresponding node from the auxiliary NSEC
1531 * tree before deleting from the main tree.
1534 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1535 NULL, DNS_RBTFIND_EMPTYDATA,
1537 if (result != ISC_R_SUCCESS) {
1538 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1539 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1541 "dns_rbt_findnode(nsec): %s",
1542 isc_result_totext(result));
1544 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1546 if (result != ISC_R_SUCCESS) {
1547 isc_log_write(dns_lctx,
1548 DNS_LOGCATEGORY_DATABASE,
1549 DNS_LOGMODULE_CACHE,
1551 "delete_nsecnode(): "
1552 "dns_rbt_deletenode(nsecnode): %s",
1553 isc_result_totext(result));
1556 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1558 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1561 case DNS_RBT_NSEC_NSEC:
1562 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1564 case DNS_RBT_NSEC_NSEC3:
1565 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1568 if (result != ISC_R_SUCCESS) {
1569 isc_log_write(dns_lctx,
1570 DNS_LOGCATEGORY_DATABASE,
1571 DNS_LOGMODULE_CACHE,
1573 "delete_nsecnode(): "
1574 "dns_rbt_deletenode: %s",
1575 isc_result_totext(result));
1580 * Clean up dead nodes. These are nodes which have no references, and
1581 * have no data. They are dead but we could not or chose not to delete
1582 * them when we deleted all the data at that node because we did not want
1583 * to wait for the tree write lock.
1585 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1588 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1589 dns_rbtnode_t *node;
1590 int count = 10; /* XXXJT: should be adjustable */
1592 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1593 while (node != NULL && count > 0) {
1594 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1597 * Since we're holding a tree write lock, it should be
1598 * impossible for this node to be referenced by others.
1600 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1601 node->data == NULL);
1603 delete_node(rbtdb, node);
1605 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1611 * Caller must be holding the node lock.
1614 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1615 unsigned int lockrefs, noderefs;
1616 isc_refcount_t *lockref;
1618 INSIST(!ISC_LINK_LINKED(node, deadlink));
1619 dns_rbtnode_refincrement0(node, &noderefs);
1620 if (noderefs == 1) { /* this is the first reference to the node */
1621 lockref = &rbtdb->node_locks[node->locknum].references;
1622 isc_refcount_increment0(lockref, &lockrefs);
1623 INSIST(lockrefs != 0);
1625 INSIST(noderefs != 0);
1629 * This function is assumed to be called when a node is newly referenced
1630 * and can be in the deadnode list. In that case the node must be retrieved
1631 * from the list because it is going to be used. In addition, if the caller
1632 * happens to hold a write lock on the tree, it's a good chance to purge dead
1634 * Note: while a new reference is gained in multiple places, there are only very
1635 * few cases where the node can be in the deadnode list (only empty nodes can
1636 * have been added to the list).
1639 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1640 isc_rwlocktype_t treelocktype)
1642 isc_rwlocktype_t locktype = isc_rwlocktype_read;
1643 nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
1644 isc_boolean_t maybe_cleanup = ISC_FALSE;
1648 NODE_STRONGLOCK(nodelock);
1649 NODE_WEAKLOCK(nodelock, locktype);
1652 * Check if we can possibly cleanup the dead node. If so, upgrade
1653 * the node lock below to perform the cleanup.
1655 if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1656 treelocktype == isc_rwlocktype_write) {
1657 maybe_cleanup = ISC_TRUE;
1660 if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
1662 * Upgrade the lock and test if we still need to unlink.
1664 NODE_WEAKUNLOCK(nodelock, locktype);
1665 locktype = isc_rwlocktype_write;
1667 NODE_WEAKLOCK(nodelock, locktype);
1668 if (ISC_LINK_LINKED(node, deadlink))
1669 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1672 cleanup_dead_nodes(rbtdb, node->locknum);
1675 new_reference(rbtdb, node);
1677 NODE_WEAKUNLOCK(nodelock, locktype);
1678 NODE_STRONGUNLOCK(nodelock);
1682 * Caller must be holding the node lock; either the "strong", read or write
1683 * lock. Note that the lock must be held even when node references are
1684 * atomically modified; in that case the decrement operation itself does not
1685 * have to be protected, but we must avoid a race condition where multiple
1686 * threads are decreasing the reference to zero simultaneously and at least
1687 * one of them is going to free the node.
1688 * This function returns ISC_TRUE if and only if the node reference decreases
1691 static isc_boolean_t
1692 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1693 rbtdb_serial_t least_serial,
1694 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1695 isc_boolean_t pruning)
1697 isc_result_t result;
1698 isc_boolean_t write_locked;
1699 rbtdb_nodelock_t *nodelock;
1700 unsigned int refs, nrefs;
1701 int bucket = node->locknum;
1702 isc_boolean_t no_reference = ISC_TRUE;
1704 nodelock = &rbtdb->node_locks[bucket];
1706 /* Handle easy and typical case first. */
1707 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1708 dns_rbtnode_refdecrement(node, &nrefs);
1709 INSIST((int)nrefs >= 0);
1711 isc_refcount_decrement(&nodelock->references, &refs);
1712 INSIST((int)refs >= 0);
1714 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1717 /* Upgrade the lock? */
1718 if (nlock == isc_rwlocktype_read) {
1719 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1720 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1723 dns_rbtnode_refdecrement(node, &nrefs);
1724 INSIST((int)nrefs >= 0);
1726 /* Restore the lock? */
1727 if (nlock == isc_rwlocktype_read)
1728 NODE_WEAKDOWNGRADE(&nodelock->lock);
1733 if (IS_CACHE(rbtdb))
1734 clean_cache_node(rbtdb, node);
1736 if (least_serial == 0) {
1738 * Caller doesn't know the least serial.
1741 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1742 least_serial = rbtdb->least_serial;
1743 RBTDB_UNLOCK(&rbtdb->lock,
1744 isc_rwlocktype_read);
1746 clean_zone_node(rbtdb, node, least_serial);
1751 * Attempt to switch to a write lock on the tree. If this fails,
1752 * we will add this node to a linked list of nodes in this locking
1753 * bucket which we will free later.
1755 if (tlock != isc_rwlocktype_write) {
1757 * Locking hierarchy notwithstanding, we don't need to free
1758 * the node lock before acquiring the tree write lock because
1759 * we only do a trylock.
1761 if (tlock == isc_rwlocktype_read)
1762 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1764 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1765 isc_rwlocktype_write);
1766 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1767 result == ISC_R_LOCKBUSY);
1769 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1771 write_locked = ISC_TRUE;
1773 isc_refcount_decrement(&nodelock->references, &refs);
1774 INSIST((int)refs >= 0);
1777 * XXXDCL should this only be done for cache zones?
1779 if (node->data != NULL || node->down != NULL)
1784 * We can now delete the node.
1788 * If this node is the only one in the level it's in, deleting
1789 * this node may recursively make its parent the only node in
1790 * the parent level; if so, and if no one is currently using
1791 * the parent node, this is almost the only opportunity to
1792 * clean it up. But the recursive cleanup is not that trivial
1793 * since the child and parent may be in different lock buckets,
1794 * which would cause a lock order reversal problem. To avoid
1795 * the trouble, we'll dispatch a separate event for batch
1796 * cleaning. We need to check whether we're deleting the node
1797 * as a result of pruning to avoid infinite dispatching.
1798 * Note: pruning happens only when a task has been set for the
1799 * rbtdb. If the user of the rbtdb chooses not to set a task,
1800 * it's their responsibility to purge stale leaves (e.g. by
1801 * periodic walk-through).
1803 if (!pruning && node->parent != NULL &&
1804 node->parent->down == node && node->left == NULL &&
1805 node->right == NULL && rbtdb->task != NULL) {
1809 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1812 sizeof(isc_event_t));
1814 new_reference(rbtdb, node);
1816 attach((dns_db_t *)rbtdb, &db);
1818 isc_task_send(rbtdb->task, &ev);
1819 no_reference = ISC_FALSE;
1822 * XXX: this is a weird situation. We could
1823 * ignore this error case, but then the stale
1824 * node will unlikely be purged except via a
1825 * rare condition such as manual cleanup. So
1826 * we queue it in the deadnodes list, hoping
1827 * the memory shortage is temporary and the node
1828 * will be deleted later.
1830 isc_log_write(dns_lctx,
1831 DNS_LOGCATEGORY_DATABASE,
1832 DNS_LOGMODULE_CACHE,
1834 "decrement_reference: failed to "
1835 "allocate pruning event");
1836 INSIST(node->data == NULL);
1837 INSIST(!ISC_LINK_LINKED(node, deadlink));
1838 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1842 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1843 char printname[DNS_NAME_FORMATSIZE];
1845 isc_log_write(dns_lctx,
1846 DNS_LOGCATEGORY_DATABASE,
1847 DNS_LOGMODULE_CACHE,
1849 "decrement_reference: "
1850 "delete from rbt: %p %s",
1852 dns_rbt_formatnodename(node,
1854 sizeof(printname)));
1857 delete_node(rbtdb, node);
1860 INSIST(node->data == NULL);
1861 INSIST(!ISC_LINK_LINKED(node, deadlink));
1862 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1866 /* Restore the lock? */
1867 if (nlock == isc_rwlocktype_read)
1868 NODE_WEAKDOWNGRADE(&nodelock->lock);
1871 * Relock a read lock, or unlock the write lock if no lock was held.
1873 if (tlock == isc_rwlocktype_none)
1875 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1877 if (tlock == isc_rwlocktype_read)
1879 isc_rwlock_downgrade(&rbtdb->tree_lock);
1881 return (no_reference);
1885 * Prune the tree by recursively cleaning-up single leaves. In the worst
1886 * case, the number of iteration is the number of tree levels, which is at
1887 * most the maximum number of domain name labels, i.e, 127. In practice, this
1888 * should be much smaller (only a few times), and even the worst case would be
1889 * acceptable for a single event.
1892 prune_tree(isc_task_t *task, isc_event_t *event) {
1893 dns_rbtdb_t *rbtdb = event->ev_sender;
1894 dns_rbtnode_t *node = event->ev_arg;
1895 dns_rbtnode_t *parent;
1896 unsigned int locknum;
1900 isc_event_free(&event);
1902 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1903 locknum = node->locknum;
1904 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1906 parent = node->parent;
1907 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1908 isc_rwlocktype_write, ISC_TRUE);
1910 if (parent != NULL && parent->down == NULL) {
1912 * node was the only down child of the parent and has
1913 * just been removed. We'll then need to examine the
1914 * parent. Keep the lock if possible; otherwise,
1915 * release the old lock and acquire one for the parent.
1917 if (parent->locknum != locknum) {
1918 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1919 isc_rwlocktype_write);
1920 locknum = parent->locknum;
1921 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1922 isc_rwlocktype_write);
1926 * We need to gain a reference to the node before
1927 * decrementing it in the next iteration. In addition,
1928 * if the node is in the dead-nodes list, extract it
1929 * from the list beforehand as we do in
1930 * reactivate_node().
1932 if (ISC_LINK_LINKED(parent, deadlink))
1933 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1935 new_reference(rbtdb, parent);
1940 } while (node != NULL);
1941 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1942 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1944 detach((dns_db_t **)&rbtdb);
1948 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1949 rbtdb_changedlist_t *cleanup_list)
1952 * Caller must be holding the database lock.
1955 rbtdb->least_serial = version->serial;
1956 *cleanup_list = version->changed_list;
1957 ISC_LIST_INIT(version->changed_list);
1961 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1962 rbtdb_changed_t *changed, *next_changed;
1965 * If the changed record is dirty, then
1966 * an update created multiple versions of
1967 * a given rdataset. We keep this list
1968 * until we're the least open version, at
1969 * which point it's safe to get rid of any
1972 * If the changed record isn't dirty, then
1973 * we don't need it anymore since we're
1974 * committing and not rolling back.
1976 * The caller must be holding the database lock.
1978 for (changed = HEAD(version->changed_list);
1980 changed = next_changed) {
1981 next_changed = NEXT(changed, link);
1982 if (!changed->dirty) {
1983 UNLINK(version->changed_list,
1985 APPEND(*cleanup_list,
1992 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
2000 dns_rdataset_t keyset;
2001 dns_rdataset_t nsecset, signsecset;
2002 isc_boolean_t haszonekey = ISC_FALSE;
2003 isc_boolean_t hasnsec = ISC_FALSE;
2004 isc_result_t result;
2006 dns_rdataset_init(&keyset);
2007 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
2008 0, 0, &keyset, NULL);
2009 if (result == ISC_R_SUCCESS) {
2010 result = dns_rdataset_first(&keyset);
2011 while (result == ISC_R_SUCCESS) {
2012 dns_rdata_t keyrdata = DNS_RDATA_INIT;
2013 dns_rdataset_current(&keyset, &keyrdata);
2014 if (dns_zonekey_iszonekey(&keyrdata)) {
2015 haszonekey = ISC_TRUE;
2018 result = dns_rdataset_next(&keyset);
2020 dns_rdataset_disassociate(&keyset);
2023 version->secure = dns_db_insecure;
2024 version->havensec3 = ISC_FALSE;
2028 dns_rdataset_init(&nsecset);
2029 dns_rdataset_init(&signsecset);
2030 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2031 0, 0, &nsecset, &signsecset);
2032 if (result == ISC_R_SUCCESS) {
2033 if (dns_rdataset_isassociated(&signsecset)) {
2035 dns_rdataset_disassociate(&signsecset);
2037 dns_rdataset_disassociate(&nsecset);
2040 setnsec3parameters(db, version);
2043 * Do we have a valid NSEC/NSEC3 chain?
2045 if (version->havensec3 || hasnsec)
2046 version->secure = dns_db_secure;
2048 version->secure = dns_db_insecure;
2053 * Walk the origin node looking for NSEC3PARAM records.
2054 * Cache the nsec3 parameters.
2058 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2059 dns_rbtnode_t *node;
2060 dns_rdata_nsec3param_t nsec3param;
2061 dns_rdata_t rdata = DNS_RDATA_INIT;
2062 isc_region_t region;
2063 isc_result_t result;
2064 rdatasetheader_t *header, *header_next;
2065 unsigned char *raw; /* RDATASLAB */
2066 unsigned int count, length;
2067 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2069 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2070 version->havensec3 = ISC_FALSE;
2071 node = rbtdb->origin_node;
2072 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2073 isc_rwlocktype_read);
2074 for (header = node->data;
2076 header = header_next) {
2077 header_next = header->next;
2079 if (header->serial <= version->serial &&
2081 if (NONEXISTENT(header))
2085 header = header->down;
2086 } while (header != NULL);
2088 if (header != NULL &&
2089 (header->type == dns_rdatatype_nsec3param)) {
2091 * Find A NSEC3PARAM with a supported algorithm.
2093 raw = (unsigned char *)header + sizeof(*header);
2094 count = raw[0] * 256 + raw[1]; /* count */
2095 #if DNS_RDATASET_FIXED
2096 raw += count * 4 + 2;
2100 while (count-- > 0U) {
2101 length = raw[0] * 256 + raw[1];
2102 #if DNS_RDATASET_FIXED
2108 region.length = length;
2110 dns_rdata_fromregion(&rdata,
2111 rbtdb->common.rdclass,
2112 dns_rdatatype_nsec3param,
2114 result = dns_rdata_tostruct(&rdata,
2117 INSIST(result == ISC_R_SUCCESS);
2118 dns_rdata_reset(&rdata);
2120 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2121 !dns_nsec3_supportedhash(nsec3param.hash))
2124 if (nsec3param.flags != 0)
2127 memcpy(version->salt, nsec3param.salt,
2128 nsec3param.salt_length);
2129 version->hash = nsec3param.hash;
2130 version->salt_length = nsec3param.salt_length;
2131 version->iterations = nsec3param.iterations;
2132 version->flags = nsec3param.flags;
2133 version->havensec3 = ISC_TRUE;
2135 * Look for a better algorithm than the
2136 * unknown test algorithm.
2138 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2144 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2145 isc_rwlocktype_read);
2146 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2151 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2152 dns_rbtdb_t *rbtdb = event->ev_arg;
2153 isc_boolean_t again = ISC_FALSE;
2154 unsigned int locknum;
2157 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2158 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2159 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2160 isc_rwlocktype_write);
2161 cleanup_dead_nodes(rbtdb, locknum);
2162 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2164 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2165 isc_rwlocktype_write);
2167 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2169 isc_task_send(task, &event);
2171 isc_event_free(&event);
2172 isc_refcount_decrement(&rbtdb->references, &refs);
2174 maybe_free_rbtdb(rbtdb);
2179 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2180 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2181 rbtdb_version_t *version, *cleanup_version, *least_greater;
2182 isc_boolean_t rollback = ISC_FALSE;
2183 rbtdb_changedlist_t cleanup_list;
2184 rdatasetheaderlist_t resigned_list;
2185 rbtdb_changed_t *changed, *next_changed;
2186 rbtdb_serial_t serial, least_serial;
2187 dns_rbtnode_t *rbtnode;
2189 rdatasetheader_t *header;
2190 isc_boolean_t writer;
2192 REQUIRE(VALID_RBTDB(rbtdb));
2193 version = (rbtdb_version_t *)*versionp;
2194 INSIST(version->rbtdb == rbtdb);
2196 cleanup_version = NULL;
2197 ISC_LIST_INIT(cleanup_list);
2198 ISC_LIST_INIT(resigned_list);
2200 isc_refcount_decrement(&version->references, &refs);
2201 if (refs > 0) { /* typical and easy case first */
2203 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2204 INSIST(!version->writer);
2205 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2210 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2211 serial = version->serial;
2212 writer = version->writer;
2213 if (version->writer) {
2216 rbtdb_version_t *cur_version;
2218 INSIST(version->commit_ok);
2219 INSIST(version == rbtdb->future_version);
2221 * The current version is going to be replaced.
2222 * Release the (likely last) reference to it from the
2223 * DB itself and unlink it from the open list.
2225 cur_version = rbtdb->current_version;
2226 isc_refcount_decrement(&cur_version->references,
2229 if (cur_version->serial == rbtdb->least_serial)
2230 INSIST(EMPTY(cur_version->changed_list));
2231 UNLINK(rbtdb->open_versions,
2234 if (EMPTY(rbtdb->open_versions)) {
2236 * We're going to become the least open
2239 make_least_version(rbtdb, version,
2243 * Some other open version is the
2244 * least version. We can't cleanup
2245 * records that were changed in this
2246 * version because the older versions
2247 * may still be in use by an open
2250 * We can, however, discard the
2251 * changed records for things that
2252 * we've added that didn't exist in
2255 cleanup_nondirty(version, &cleanup_list);
2258 * If the (soon to be former) current version
2259 * isn't being used by anyone, we can clean
2263 cleanup_version = cur_version;
2264 APPENDLIST(version->changed_list,
2265 cleanup_version->changed_list,
2269 * Become the current version.
2271 version->writer = ISC_FALSE;
2272 rbtdb->current_version = version;
2273 rbtdb->current_serial = version->serial;
2274 rbtdb->future_version = NULL;
2277 * Keep the current version in the open list, and
2278 * gain a reference for the DB itself (see the DB
2279 * creation function below). This must be the only
2280 * case where we need to increment the counter from
2281 * zero and need to use isc_refcount_increment0().
2283 isc_refcount_increment0(&version->references,
2285 INSIST(cur_ref == 1);
2286 PREPEND(rbtdb->open_versions,
2287 rbtdb->current_version, link);
2288 resigned_list = version->resigned_list;
2289 ISC_LIST_INIT(version->resigned_list);
2292 * We're rolling back this transaction.
2294 cleanup_list = version->changed_list;
2295 ISC_LIST_INIT(version->changed_list);
2296 resigned_list = version->resigned_list;
2297 ISC_LIST_INIT(version->resigned_list);
2298 rollback = ISC_TRUE;
2299 cleanup_version = version;
2300 rbtdb->future_version = NULL;
2303 if (version != rbtdb->current_version) {
2305 * There are no external or internal references
2306 * to this version and it can be cleaned up.
2308 cleanup_version = version;
2311 * Find the version with the least serial
2312 * number greater than ours.
2314 least_greater = PREV(version, link);
2315 if (least_greater == NULL)
2316 least_greater = rbtdb->current_version;
2318 INSIST(version->serial < least_greater->serial);
2320 * Is this the least open version?
2322 if (version->serial == rbtdb->least_serial) {
2324 * Yes. Install the new least open
2327 make_least_version(rbtdb,
2332 * Add any unexecuted cleanups to
2333 * those of the least greater version.
2335 APPENDLIST(least_greater->changed_list,
2336 version->changed_list,
2339 } else if (version->serial == rbtdb->least_serial)
2340 INSIST(EMPTY(version->changed_list));
2341 UNLINK(rbtdb->open_versions, version, link);
2343 least_serial = rbtdb->least_serial;
2344 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2347 * Update the zone's secure status.
2349 if (writer && commit && !IS_CACHE(rbtdb))
2350 iszonesecure(db, version, rbtdb->origin_node);
2352 if (cleanup_version != NULL) {
2353 INSIST(EMPTY(cleanup_version->changed_list));
2354 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2355 sizeof(*cleanup_version));
2359 * Commit/rollback re-signed headers.
2361 for (header = HEAD(resigned_list);
2363 header = HEAD(resigned_list)) {
2366 ISC_LIST_UNLINK(resigned_list, header, link);
2368 lock = &rbtdb->node_locks[header->node->locknum].lock;
2369 NODE_LOCK(lock, isc_rwlocktype_write);
2371 resign_insert(rbtdb, header->node->locknum, header);
2372 decrement_reference(rbtdb, header->node, least_serial,
2373 isc_rwlocktype_write, isc_rwlocktype_none,
2375 NODE_UNLOCK(lock, isc_rwlocktype_write);
2378 if (!EMPTY(cleanup_list)) {
2379 isc_event_t *event = NULL;
2380 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2382 if (rbtdb->task != NULL)
2383 event = isc_event_allocate(rbtdb->common.mctx, NULL,
2384 DNS_EVENT_RBTDEADNODES,
2385 cleanup_dead_nodes_callback,
2386 rbtdb, sizeof(isc_event_t));
2387 if (event == NULL) {
2389 * We acquire a tree write lock here in order to make
2390 * sure that stale nodes will be removed in
2391 * decrement_reference(). If we didn't have the lock,
2392 * those nodes could miss the chance to be removed
2393 * until the server stops. The write lock is
2394 * expensive, but this event should be rare enough
2395 * to justify the cost.
2397 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2398 tlock = isc_rwlocktype_write;
2401 for (changed = HEAD(cleanup_list);
2403 changed = next_changed) {
2406 next_changed = NEXT(changed, link);
2407 rbtnode = changed->node;
2408 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2410 NODE_LOCK(lock, isc_rwlocktype_write);
2412 * This is a good opportunity to purge any dead nodes,
2416 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2419 rollback_node(rbtnode, serial);
2420 decrement_reference(rbtdb, rbtnode, least_serial,
2421 isc_rwlocktype_write, tlock,
2424 NODE_UNLOCK(lock, isc_rwlocktype_write);
2426 isc_mem_put(rbtdb->common.mctx, changed,
2429 if (event != NULL) {
2430 isc_refcount_increment(&rbtdb->references, NULL);
2431 isc_task_send(rbtdb->task, &event);
2433 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2441 * Add the necessary magic for the wildcard name 'name'
2442 * to be found in 'rbtdb'.
2444 * In order for wildcard matching to work correctly in
2445 * zone_find(), we must ensure that a node for the wildcarding
2446 * level exists in the database, and has its 'find_callback'
2447 * and 'wild' bits set.
2449 * E.g. if the wildcard name is "*.sub.example." then we
2450 * must ensure that "sub.example." exists and is marked as
2454 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2455 isc_result_t result;
2456 dns_name_t foundname;
2457 dns_offsets_t offsets;
2459 dns_rbtnode_t *node = NULL;
2461 dns_name_init(&foundname, offsets);
2462 n = dns_name_countlabels(name);
2465 dns_name_getlabelsequence(name, 1, n, &foundname);
2466 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2467 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2469 if (result == ISC_R_SUCCESS)
2470 node->nsec = DNS_RBT_NSEC_NORMAL;
2471 node->find_callback = 1;
2473 return (ISC_R_SUCCESS);
2477 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2478 isc_result_t result;
2479 dns_name_t foundname;
2480 dns_offsets_t offsets;
2481 unsigned int n, l, i;
2483 dns_name_init(&foundname, offsets);
2484 n = dns_name_countlabels(name);
2485 l = dns_name_countlabels(&rbtdb->common.origin);
2488 dns_rbtnode_t *node = NULL; /* dummy */
2489 dns_name_getlabelsequence(name, n - i, i, &foundname);
2490 if (dns_name_iswildcard(&foundname)) {
2491 result = add_wildcard_magic(rbtdb, &foundname);
2492 if (result != ISC_R_SUCCESS)
2494 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2496 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2498 if (result == ISC_R_SUCCESS)
2499 node->nsec = DNS_RBT_NSEC_NORMAL;
2503 return (ISC_R_SUCCESS);
2507 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
2508 isc_boolean_t create, dns_dbnode_t **nodep)
2510 dns_rbtnode_t *node = NULL;
2511 dns_name_t nodename;
2512 isc_result_t result;
2513 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2515 INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
2517 dns_name_init(&nodename, NULL);
2518 RWLOCK(&rbtdb->tree_lock, locktype);
2519 result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
2520 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2521 if (result != ISC_R_SUCCESS) {
2522 RWUNLOCK(&rbtdb->tree_lock, locktype);
2524 if (result == DNS_R_PARTIALMATCH)
2525 result = ISC_R_NOTFOUND;
2529 * It would be nice to try to upgrade the lock instead of
2530 * unlocking then relocking.
2532 locktype = isc_rwlocktype_write;
2533 RWLOCK(&rbtdb->tree_lock, locktype);
2535 result = dns_rbt_addnode(tree, name, &node);
2536 if (result == ISC_R_SUCCESS) {
2538 if (tree == rbtdb->tree && rbtdb->rpz_cidr != NULL) {
2539 dns_fixedname_t fnamef;
2542 dns_fixedname_init(&fnamef);
2543 fname = dns_fixedname_name(&fnamef);
2544 dns_rbt_fullnamefromnode(node, fname);
2545 dns_rpz_cidr_addip(rbtdb->rpz_cidr, fname);
2548 dns_rbt_namefromnode(node, &nodename);
2549 #ifdef DNS_RBT_USEHASH
2550 node->locknum = node->hashval % rbtdb->node_lock_count;
2552 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2553 rbtdb->node_lock_count;
2555 if (tree == rbtdb->tree) {
2556 add_empty_wildcards(rbtdb, name);
2558 if (dns_name_iswildcard(name)) {
2559 result = add_wildcard_magic(rbtdb, name);
2560 if (result != ISC_R_SUCCESS) {
2561 RWUNLOCK(&rbtdb->tree_lock, locktype);
2566 if (tree == rbtdb->nsec3)
2567 node->nsec = DNS_RBT_NSEC_NSEC3;
2568 } else if (result != ISC_R_EXISTS) {
2569 RWUNLOCK(&rbtdb->tree_lock, locktype);
2574 if (tree == rbtdb->nsec3)
2575 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2577 reactivate_node(rbtdb, node, locktype);
2578 RWUNLOCK(&rbtdb->tree_lock, locktype);
2580 *nodep = (dns_dbnode_t *)node;
2582 return (ISC_R_SUCCESS);
2586 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2587 dns_dbnode_t **nodep)
2589 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2591 REQUIRE(VALID_RBTDB(rbtdb));
2593 return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
2597 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2598 dns_dbnode_t **nodep)
2600 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2602 REQUIRE(VALID_RBTDB(rbtdb));
2604 return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
2608 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2609 rbtdb_search_t *search = arg;
2610 rdatasetheader_t *header, *header_next;
2611 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2612 rdatasetheader_t *found;
2613 isc_result_t result;
2614 dns_rbtnode_t *onode;
2617 * We only want to remember the topmost zone cut, since it's the one
2618 * that counts, so we'll just continue if we've already found a
2621 if (search->zonecut != NULL)
2622 return (DNS_R_CONTINUE);
2625 result = DNS_R_CONTINUE;
2626 onode = search->rbtdb->origin_node;
2628 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2629 isc_rwlocktype_read);
2632 * Look for an NS or DNAME rdataset active in our version.
2635 dname_header = NULL;
2636 sigdname_header = NULL;
2637 for (header = node->data; header != NULL; header = header_next) {
2638 header_next = header->next;
2639 if (header->type == dns_rdatatype_ns ||
2640 header->type == dns_rdatatype_dname ||
2641 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2643 if (header->serial <= search->serial &&
2646 * Is this a "this rdataset doesn't
2649 if (NONEXISTENT(header))
2653 header = header->down;
2654 } while (header != NULL);
2655 if (header != NULL) {
2656 if (header->type == dns_rdatatype_dname)
2657 dname_header = header;
2658 else if (header->type ==
2659 RBTDB_RDATATYPE_SIGDNAME)
2660 sigdname_header = header;
2661 else if (node != onode ||
2662 IS_STUB(search->rbtdb)) {
2664 * We've found an NS rdataset that
2665 * isn't at the origin node. We check
2666 * that they're not at the origin node,
2667 * because otherwise we'd erroneously
2668 * treat the zone top as if it were
2678 * Did we find anything?
2680 if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2681 ns_header != NULL) {
2683 * Note that NS has precedence over DNAME if both exist
2684 * in a zone. Otherwise DNAME take precedence over NS.
2687 search->zonecut_sigrdataset = NULL;
2688 } else if (dname_header != NULL) {
2689 found = dname_header;
2690 search->zonecut_sigrdataset = sigdname_header;
2691 } else if (ns_header != NULL) {
2693 search->zonecut_sigrdataset = NULL;
2696 if (found != NULL) {
2698 * We increment the reference count on node to ensure that
2699 * search->zonecut_rdataset will still be valid later.
2701 new_reference(search->rbtdb, node);
2702 search->zonecut = node;
2703 search->zonecut_rdataset = found;
2704 search->need_cleanup = ISC_TRUE;
2706 * Since we've found a zonecut, anything beneath it is
2707 * glue and is not subject to wildcard matching, so we
2708 * may clear search->wild.
2710 search->wild = ISC_FALSE;
2711 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2713 * If the caller does not want to find glue, then
2714 * this is the best answer and the search should
2717 result = DNS_R_PARTIALMATCH;
2722 * The search will continue beneath the zone cut.
2723 * This may or may not be the best match. In case it
2724 * is, we need to remember the node name.
2726 zcname = dns_fixedname_name(&search->zonecut_name);
2727 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2729 search->copy_name = ISC_TRUE;
2733 * There is no zonecut at this node which is active in this
2736 * If this is a "wild" node and the caller hasn't disabled
2737 * wildcard matching, remember that we've seen a wild node
2738 * in case we need to go searching for wildcard matches
2741 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2742 search->wild = ISC_TRUE;
2745 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2746 isc_rwlocktype_read);
2752 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2753 rdatasetheader_t *header, isc_stdtime_t now,
2754 dns_rdataset_t *rdataset)
2756 unsigned char *raw; /* RDATASLAB */
2759 * Caller must be holding the node reader lock.
2760 * XXXJT: technically, we need a writer lock, since we'll increment
2761 * the header count below. However, since the actual counter value
2762 * doesn't matter, we prioritize performance here. (We may want to
2763 * use atomic increment when available).
2766 if (rdataset == NULL)
2769 new_reference(rbtdb, node);
2771 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2773 rdataset->methods = &rdataset_methods;
2774 rdataset->rdclass = rbtdb->common.rdclass;
2775 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2776 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2777 rdataset->ttl = header->rdh_ttl - now;
2778 rdataset->trust = header->trust;
2779 if (NEGATIVE(header))
2780 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2781 if (NXDOMAIN(header))
2782 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2784 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2785 rdataset->private1 = rbtdb;
2786 rdataset->private2 = node;
2787 raw = (unsigned char *)header + sizeof(*header);
2788 rdataset->private3 = raw;
2789 rdataset->count = header->count++;
2790 if (rdataset->count == ISC_UINT32_MAX)
2791 rdataset->count = 0;
2794 * Reset iterator state.
2796 rdataset->privateuint4 = 0;
2797 rdataset->private5 = NULL;
2800 * Add noqname proof.
2802 rdataset->private6 = header->noqname;
2803 if (rdataset->private6 != NULL)
2804 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2805 rdataset->private7 = header->closest;
2806 if (rdataset->private7 != NULL)
2807 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2810 * Copy out re-signing information.
2812 if (RESIGN(header)) {
2813 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2814 rdataset->resign = header->resign;
2816 rdataset->resign = 0;
2819 static inline isc_result_t
2820 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2821 dns_name_t *foundname, dns_rdataset_t *rdataset,
2822 dns_rdataset_t *sigrdataset)
2824 isc_result_t result;
2826 rbtdb_rdatatype_t type;
2827 dns_rbtnode_t *node;
2830 * The caller MUST NOT be holding any node locks.
2833 node = search->zonecut;
2834 type = search->zonecut_rdataset->type;
2837 * If we have to set foundname, we do it before anything else.
2838 * If we were to set foundname after we had set nodep or bound the
2839 * rdataset, then we'd have to undo that work if dns_name_copy()
2840 * failed. By setting foundname first, there's nothing to undo if
2843 if (foundname != NULL && search->copy_name) {
2844 zcname = dns_fixedname_name(&search->zonecut_name);
2845 result = dns_name_copy(zcname, foundname, NULL);
2846 if (result != ISC_R_SUCCESS)
2849 if (nodep != NULL) {
2851 * Note that we don't have to increment the node's reference
2852 * count here because we're going to use the reference we
2853 * already have in the search block.
2856 search->need_cleanup = ISC_FALSE;
2858 if (rdataset != NULL) {
2859 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2860 isc_rwlocktype_read);
2861 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2862 search->now, rdataset);
2863 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2864 bind_rdataset(search->rbtdb, node,
2865 search->zonecut_sigrdataset,
2866 search->now, sigrdataset);
2867 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2868 isc_rwlocktype_read);
2871 if (type == dns_rdatatype_dname)
2872 return (DNS_R_DNAME);
2873 return (DNS_R_DELEGATION);
2876 static inline isc_boolean_t
2877 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2878 dns_rbtnode_t *node)
2880 unsigned char *raw; /* RDATASLAB */
2881 unsigned int count, size;
2883 isc_boolean_t valid = ISC_FALSE;
2884 dns_offsets_t offsets;
2885 isc_region_t region;
2886 rdatasetheader_t *header;
2889 * No additional locking is required.
2893 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2894 * if it occurs at a zone cut, but is not valid below it.
2896 if (type == dns_rdatatype_ns) {
2897 if (node != search->zonecut) {
2900 } else if (type != dns_rdatatype_a &&
2901 type != dns_rdatatype_aaaa &&
2902 type != dns_rdatatype_a6) {
2906 header = search->zonecut_rdataset;
2907 raw = (unsigned char *)header + sizeof(*header);
2908 count = raw[0] * 256 + raw[1];
2909 #if DNS_RDATASET_FIXED
2910 raw += 2 + (4 * count);
2917 size = raw[0] * 256 + raw[1];
2918 #if DNS_RDATASET_FIXED
2924 region.length = size;
2927 * XXX Until we have rdata structures, we have no choice but
2928 * to directly access the rdata format.
2930 dns_name_init(&ns_name, offsets);
2931 dns_name_fromregion(&ns_name, ®ion);
2932 if (dns_name_compare(&ns_name, name) == 0) {
2941 static inline isc_boolean_t
2942 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2945 dns_fixedname_t fnext;
2946 dns_fixedname_t forigin;
2951 dns_rbtnode_t *node;
2952 isc_result_t result;
2953 isc_boolean_t answer = ISC_FALSE;
2954 rdatasetheader_t *header;
2956 rbtdb = search->rbtdb;
2958 dns_name_init(&prefix, NULL);
2959 dns_fixedname_init(&fnext);
2960 next = dns_fixedname_name(&fnext);
2961 dns_fixedname_init(&forigin);
2962 origin = dns_fixedname_name(&forigin);
2964 result = dns_rbtnodechain_next(chain, NULL, NULL);
2965 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2967 result = dns_rbtnodechain_current(chain, &prefix,
2969 if (result != ISC_R_SUCCESS)
2971 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2972 isc_rwlocktype_read);
2973 for (header = node->data;
2975 header = header->next) {
2976 if (header->serial <= search->serial &&
2977 !IGNORE(header) && EXISTS(header))
2980 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2981 isc_rwlocktype_read);
2984 result = dns_rbtnodechain_next(chain, NULL, NULL);
2986 if (result == ISC_R_SUCCESS)
2987 result = dns_name_concatenate(&prefix, origin, next, NULL);
2988 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2993 static inline isc_boolean_t
2994 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2995 dns_fixedname_t fnext;
2996 dns_fixedname_t forigin;
2997 dns_fixedname_t fprev;
3005 dns_rbtnode_t *node;
3006 dns_rbtnodechain_t chain;
3007 isc_boolean_t check_next = ISC_TRUE;
3008 isc_boolean_t check_prev = ISC_TRUE;
3009 isc_boolean_t answer = ISC_FALSE;
3010 isc_result_t result;
3011 rdatasetheader_t *header;
3014 rbtdb = search->rbtdb;
3016 dns_name_init(&name, NULL);
3017 dns_name_init(&tname, NULL);
3018 dns_name_init(&rname, NULL);
3019 dns_fixedname_init(&fnext);
3020 next = dns_fixedname_name(&fnext);
3021 dns_fixedname_init(&fprev);
3022 prev = dns_fixedname_name(&fprev);
3023 dns_fixedname_init(&forigin);
3024 origin = dns_fixedname_name(&forigin);
3027 * Find if qname is at or below a empty node.
3028 * Use our own copy of the chain.
3031 chain = search->chain;
3034 result = dns_rbtnodechain_current(&chain, &name,
3036 if (result != ISC_R_SUCCESS)
3038 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3039 isc_rwlocktype_read);
3040 for (header = node->data;
3042 header = header->next) {
3043 if (header->serial <= search->serial &&
3044 !IGNORE(header) && EXISTS(header))
3047 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3048 isc_rwlocktype_read);
3051 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3052 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3053 if (result == ISC_R_SUCCESS)
3054 result = dns_name_concatenate(&name, origin, prev, NULL);
3055 if (result != ISC_R_SUCCESS)
3056 check_prev = ISC_FALSE;
3058 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3059 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3061 result = dns_rbtnodechain_current(&chain, &name,
3063 if (result != ISC_R_SUCCESS)
3065 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3066 isc_rwlocktype_read);
3067 for (header = node->data;
3069 header = header->next) {
3070 if (header->serial <= search->serial &&
3071 !IGNORE(header) && EXISTS(header))
3074 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3075 isc_rwlocktype_read);
3078 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3080 if (result == ISC_R_SUCCESS)
3081 result = dns_name_concatenate(&name, origin, next, NULL);
3082 if (result != ISC_R_SUCCESS)
3083 check_next = ISC_FALSE;
3085 dns_name_clone(qname, &rname);
3088 * Remove the wildcard label to find the terminal name.
3090 n = dns_name_countlabels(wname);
3091 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3094 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3095 (check_next && dns_name_issubdomain(next, &rname))) {
3100 * Remove the left hand label.
3102 n = dns_name_countlabels(&rname);
3103 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3104 } while (!dns_name_equal(&rname, &tname));
3108 static inline isc_result_t
3109 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3113 dns_rbtnode_t *node, *level_node, *wnode;
3114 rdatasetheader_t *header;
3115 isc_result_t result = ISC_R_NOTFOUND;
3118 dns_fixedname_t fwname;
3120 isc_boolean_t done, wild, active;
3121 dns_rbtnodechain_t wchain;
3124 * Caller must be holding the tree lock and MUST NOT be holding
3129 * Examine each ancestor level. If the level's wild bit
3130 * is set, then construct the corresponding wildcard name and
3131 * search for it. If the wildcard node exists, and is active in
3132 * this version, we're done. If not, then we next check to see
3133 * if the ancestor is active in this version. If so, then there
3134 * can be no possible wildcard match and again we're done. If not,
3135 * continue the search.
3138 rbtdb = search->rbtdb;
3139 i = search->chain.level_matches;
3143 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3144 isc_rwlocktype_read);
3147 * First we try to figure out if this node is active in
3148 * the search's version. We do this now, even though we
3149 * may not need the information, because it simplifies the
3150 * locking and code flow.
3152 for (header = node->data;
3154 header = header->next) {
3155 if (header->serial <= search->serial &&
3156 !IGNORE(header) && EXISTS(header))
3169 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3170 isc_rwlocktype_read);
3174 * Construct the wildcard name for this level.
3176 dns_name_init(&name, NULL);
3177 dns_rbt_namefromnode(node, &name);
3178 dns_fixedname_init(&fwname);
3179 wname = dns_fixedname_name(&fwname);
3180 result = dns_name_concatenate(dns_wildcardname, &name,
3183 while (result == ISC_R_SUCCESS && j != 0) {
3185 level_node = search->chain.levels[j];
3186 dns_name_init(&name, NULL);
3187 dns_rbt_namefromnode(level_node, &name);
3188 result = dns_name_concatenate(wname,
3193 if (result != ISC_R_SUCCESS)
3197 dns_rbtnodechain_init(&wchain, NULL);
3198 result = dns_rbt_findnode(rbtdb->tree, wname,
3199 NULL, &wnode, &wchain,
3200 DNS_RBTFIND_EMPTYDATA,
3202 if (result == ISC_R_SUCCESS) {
3206 * We have found the wildcard node. If it
3207 * is active in the search's version, we're
3210 lock = &rbtdb->node_locks[wnode->locknum].lock;
3211 NODE_LOCK(lock, isc_rwlocktype_read);
3212 for (header = wnode->data;
3214 header = header->next) {
3215 if (header->serial <= search->serial &&
3216 !IGNORE(header) && EXISTS(header))
3219 NODE_UNLOCK(lock, isc_rwlocktype_read);
3220 if (header != NULL ||
3221 activeempty(search, &wchain, wname)) {
3222 if (activeemtpynode(search, qname,
3224 return (ISC_R_NOTFOUND);
3227 * The wildcard node is active!
3229 * Note: result is still ISC_R_SUCCESS
3230 * so we don't have to set it.
3235 } else if (result != ISC_R_NOTFOUND &&
3236 result != DNS_R_PARTIALMATCH) {
3238 * An error has occurred. Bail out.
3246 * The level node is active. Any wildcarding
3247 * present at higher levels has no
3248 * effect and we're done.
3250 result = ISC_R_NOTFOUND;
3256 node = search->chain.levels[i];
3264 static isc_boolean_t
3265 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3267 dns_rdata_t rdata = DNS_RDATA_INIT;
3268 dns_rdata_nsec3_t nsec3;
3269 unsigned char *raw; /* RDATASLAB */
3270 unsigned int rdlen, count;
3271 isc_region_t region;
3272 isc_result_t result;
3274 REQUIRE(header->type == dns_rdatatype_nsec3);
3276 raw = (unsigned char *)header + sizeof(*header);
3277 count = raw[0] * 256 + raw[1]; /* count */
3278 #if DNS_RDATASET_FIXED
3279 raw += count * 4 + 2;
3283 while (count-- > 0) {
3284 rdlen = raw[0] * 256 + raw[1];
3285 #if DNS_RDATASET_FIXED
3291 region.length = rdlen;
3292 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3293 dns_rdatatype_nsec3, ®ion);
3295 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3296 INSIST(result == ISC_R_SUCCESS);
3297 if (nsec3.hash == search->rbtversion->hash &&
3298 nsec3.iterations == search->rbtversion->iterations &&
3299 nsec3.salt_length == search->rbtversion->salt_length &&
3300 memcmp(nsec3.salt, search->rbtversion->salt,
3301 nsec3.salt_length) == 0)
3303 dns_rdata_reset(&rdata);
3309 * Find node of the NSEC/NSEC3 record that is 'name'.
3311 static inline isc_result_t
3312 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3313 dns_name_t *name, dns_name_t *origin,
3314 dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3315 isc_boolean_t *firstp)
3317 dns_fixedname_t ftarget;
3319 dns_rbtnode_t *nsecnode;
3320 isc_result_t result;
3322 REQUIRE(nodep != NULL && *nodep == NULL);
3324 if (type == dns_rdatatype_nsec3) {
3325 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3326 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3328 result = dns_rbtnodechain_current(&search->chain, name, origin,
3333 dns_fixedname_init(&ftarget);
3334 target = dns_fixedname_name(&ftarget);
3339 * Construct the name of the second node to check.
3340 * It is the first node sought in the NSEC tree.
3342 *firstp = ISC_FALSE;
3343 dns_rbtnodechain_init(nsecchain, NULL);
3344 result = dns_name_concatenate(name, origin,
3346 if (result != ISC_R_SUCCESS)
3349 result = dns_rbt_findnode(search->rbtdb->nsec,
3351 &nsecnode, nsecchain,
3352 DNS_RBTFIND_NOOPTIONS,
3354 if (result == ISC_R_SUCCESS) {
3356 * Since this was the first loop, finding the
3357 * name in the NSEC tree implies that the first
3358 * node checked in the main tree had an
3359 * unacceptable NSEC record.
3360 * Try the previous node in the NSEC tree.
3362 result = dns_rbtnodechain_prev(nsecchain,
3364 if (result == DNS_R_NEWORIGIN)
3365 result = ISC_R_SUCCESS;
3366 } else if (result == ISC_R_NOTFOUND ||
3367 result == DNS_R_PARTIALMATCH) {
3368 result = dns_rbtnodechain_current(nsecchain,
3369 name, origin, NULL);
3370 if (result == ISC_R_NOTFOUND)
3371 result = ISC_R_NOMORE;
3375 * This is a second or later trip through the auxiliary
3376 * tree for the name of a third or earlier NSEC node in
3377 * the main tree. Previous trips through the NSEC tree
3378 * must have found nodes in the main tree with NSEC
3379 * records. Perhaps they lacked signature records.
3381 result = dns_rbtnodechain_prev(nsecchain, name, origin);
3382 if (result == DNS_R_NEWORIGIN)
3383 result = ISC_R_SUCCESS;
3385 if (result != ISC_R_SUCCESS)
3389 * Construct the name to seek in the main tree.
3391 result = dns_name_concatenate(name, origin, target, NULL);
3392 if (result != ISC_R_SUCCESS)
3396 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3397 nodep, &search->chain,
3398 DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3399 if (result == ISC_R_SUCCESS)
3403 * There should always be a node in the main tree with the
3404 * same name as the node in the auxiliary NSEC tree, except for
3405 * nodes in the auxiliary tree that are awaiting deletion.
3407 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3408 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3409 DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3410 "previous_closest_nsec(): %s",
3411 isc_result_totext(result));
3412 return (DNS_R_BADDB);
3418 * Find the NSEC/NSEC3 which is or before the current point on the
3419 * search chain. For NSEC3 records only NSEC3 records that match the
3420 * current NSEC3PARAM record are considered.
3422 static inline isc_result_t
3423 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3424 dns_name_t *foundname, dns_rdataset_t *rdataset,
3425 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3426 dns_db_secure_t secure)
3428 dns_rbtnode_t *node, *prevnode;
3429 rdatasetheader_t *header, *header_next, *found, *foundsig;
3430 dns_rbtnodechain_t nsecchain;
3431 isc_boolean_t empty_node;
3432 isc_result_t result;
3433 dns_fixedname_t fname, forigin;
3434 dns_name_t *name, *origin;
3435 dns_rdatatype_t type;
3436 rbtdb_rdatatype_t sigtype;
3437 isc_boolean_t wraps;
3438 isc_boolean_t first = ISC_TRUE;
3439 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3441 if (tree == search->rbtdb->nsec3) {
3442 type = dns_rdatatype_nsec3;
3443 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3446 type = dns_rdatatype_nsec;
3447 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3452 * Use the auxiliary tree only starting with the second node in the
3453 * hope that the original node will be right much of the time.
3455 dns_fixedname_init(&fname);
3456 name = dns_fixedname_name(&fname);
3457 dns_fixedname_init(&forigin);
3458 origin = dns_fixedname_name(&forigin);
3462 result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3463 if (result != ISC_R_SUCCESS)
3466 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3467 isc_rwlocktype_read);
3470 empty_node = ISC_TRUE;
3471 for (header = node->data;
3473 header = header_next) {
3474 header_next = header->next;
3476 * Look for an active, extant NSEC or RRSIG NSEC.
3479 if (header->serial <= search->serial &&
3482 * Is this a "this rdataset doesn't
3485 if (NONEXISTENT(header))
3489 header = header->down;
3490 } while (header != NULL);
3491 if (header != NULL) {
3493 * We now know that there is at least one
3494 * active rdataset at this node.
3496 empty_node = ISC_FALSE;
3497 if (header->type == type) {
3499 if (foundsig != NULL)
3501 } else if (header->type == sigtype) {
3509 if (found != NULL && search->rbtversion->havensec3 &&
3510 found->type == dns_rdatatype_nsec3 &&
3511 !matchparams(found, search)) {
3512 empty_node = ISC_TRUE;
3515 result = previous_closest_nsec(type, search,
3519 } else if (found != NULL &&
3520 (foundsig != NULL || !need_sig)) {
3522 * We've found the right NSEC/NSEC3 record.
3524 * Note: for this to really be the right
3525 * NSEC record, it's essential that the NSEC
3526 * records of any nodes obscured by a zone
3527 * cut have been removed; we assume this is
3530 result = dns_name_concatenate(name, origin,
3532 if (result == ISC_R_SUCCESS) {
3533 if (nodep != NULL) {
3534 new_reference(search->rbtdb,
3538 bind_rdataset(search->rbtdb, node,
3541 if (foundsig != NULL)
3542 bind_rdataset(search->rbtdb,
3548 } else if (found == NULL && foundsig == NULL) {
3550 * This node is active, but has no NSEC or
3551 * RRSIG NSEC. That means it's glue or
3552 * other obscured zone data that isn't
3553 * relevant for our search. Treat the
3554 * node as if it were empty and keep looking.
3556 empty_node = ISC_TRUE;
3557 result = previous_closest_nsec(type, search,
3564 * We found an active node, but either the
3565 * NSEC or the RRSIG NSEC is missing. This
3568 result = DNS_R_BADDB;
3572 * This node isn't active. We've got to keep
3575 result = previous_closest_nsec(type, search,
3576 name, origin, &prevnode,
3577 &nsecchain, &first);
3579 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3580 isc_rwlocktype_read);
3583 } while (empty_node && result == ISC_R_SUCCESS);
3586 dns_rbtnodechain_invalidate(&nsecchain);
3588 if (result == ISC_R_NOMORE && wraps) {
3589 result = dns_rbtnodechain_last(&search->chain, tree,
3591 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3598 * If the result is ISC_R_NOMORE, then we got to the beginning of
3599 * the database and didn't find a NSEC record. This shouldn't
3602 if (result == ISC_R_NOMORE)
3603 result = DNS_R_BADDB;
3609 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3610 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3611 dns_dbnode_t **nodep, dns_name_t *foundname,
3612 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3614 dns_rbtnode_t *node = NULL;
3615 isc_result_t result;
3616 rbtdb_search_t search;
3617 isc_boolean_t cname_ok = ISC_TRUE;
3618 isc_boolean_t close_version = ISC_FALSE;
3619 isc_boolean_t maybe_zonecut = ISC_FALSE;
3620 isc_boolean_t at_zonecut = ISC_FALSE;
3622 isc_boolean_t empty_node;
3623 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3624 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3625 rbtdb_rdatatype_t sigtype;
3626 isc_boolean_t active;
3627 dns_rbtnodechain_t chain;
3631 search.rbtdb = (dns_rbtdb_t *)db;
3633 REQUIRE(VALID_RBTDB(search.rbtdb));
3634 INSIST(version == NULL ||
3635 ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
3638 * We don't care about 'now'.
3643 * If the caller didn't supply a version, attach to the current
3646 if (version == NULL) {
3647 currentversion(db, &version);
3648 close_version = ISC_TRUE;
3651 search.rbtversion = version;
3652 search.serial = search.rbtversion->serial;
3653 search.options = options;
3654 search.copy_name = ISC_FALSE;
3655 search.need_cleanup = ISC_FALSE;
3656 search.wild = ISC_FALSE;
3657 search.zonecut = NULL;
3658 dns_fixedname_init(&search.zonecut_name);
3659 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3663 * 'wild' will be true iff. we've matched a wildcard.
3667 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3670 * Search down from the root of the tree. If, while going down, we
3671 * encounter a callback node, zone_zonecut_callback() will search the
3672 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3674 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3676 result = dns_rbt_findnode(tree, name, foundname, &node,
3677 &search.chain, DNS_RBTFIND_EMPTYDATA,
3678 zone_zonecut_callback, &search);
3680 if (result == DNS_R_PARTIALMATCH) {
3682 if (search.zonecut != NULL) {
3683 result = setup_delegation(&search, nodep, foundname,
3684 rdataset, sigrdataset);
3690 * At least one of the levels in the search chain
3691 * potentially has a wildcard. For each such level,
3692 * we must see if there's a matching wildcard active
3693 * in the current version.
3695 result = find_wildcard(&search, &node, name);
3696 if (result == ISC_R_SUCCESS) {
3697 result = dns_name_copy(name, foundname, NULL);
3698 if (result != ISC_R_SUCCESS)
3703 else if (result != ISC_R_NOTFOUND)
3707 chain = search.chain;
3708 active = activeempty(&search, &chain, name);
3711 * If we're here, then the name does not exist, is not
3712 * beneath a zonecut, and there's no matching wildcard.
3714 if ((search.rbtversion->secure == dns_db_secure &&
3715 !search.rbtversion->havensec3) ||
3716 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3717 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3719 result = find_closest_nsec(&search, nodep, foundname,
3720 rdataset, sigrdataset, tree,
3721 search.rbtversion->secure);
3722 if (result == ISC_R_SUCCESS)
3723 result = active ? DNS_R_EMPTYNAME :
3726 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3728 } else if (result != ISC_R_SUCCESS)
3733 * We have found a node whose name is the desired name, or we
3734 * have matched a wildcard.
3737 if (search.zonecut != NULL) {
3739 * If we're beneath a zone cut, we don't want to look for
3740 * CNAMEs because they're not legitimate zone glue.
3742 cname_ok = ISC_FALSE;
3745 * The node may be a zone cut itself. If it might be one,
3746 * make sure we check for it later.
3748 * DS records live above the zone cut in ordinary zone so
3749 * we want to ignore any referral.
3751 * Stub zones don't have anything "above" the delgation so
3752 * we always return a referral.
3754 if (node->find_callback &&
3755 ((node != search.rbtdb->origin_node &&
3756 !dns_rdatatype_atparent(type)) ||
3757 IS_STUB(search.rbtdb)))
3758 maybe_zonecut = ISC_TRUE;
3762 * Certain DNSSEC types are not subject to CNAME matching
3763 * (RFC4035, section 2.5 and RFC3007).
3765 * We don't check for RRSIG, because we don't store RRSIG records
3768 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3769 cname_ok = ISC_FALSE;
3772 * We now go looking for rdata...
3775 lock = &search.rbtdb->node_locks[node->locknum].lock;
3776 NODE_LOCK(lock, isc_rwlocktype_read);
3780 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3784 empty_node = ISC_TRUE;
3785 for (header = node->data; header != NULL; header = header_next) {
3786 header_next = header->next;
3788 * Look for an active, extant rdataset.
3791 if (header->serial <= search.serial &&
3794 * Is this a "this rdataset doesn't
3797 if (NONEXISTENT(header))
3801 header = header->down;
3802 } while (header != NULL);
3803 if (header != NULL) {
3805 * We now know that there is at least one active
3806 * rdataset at this node.
3808 empty_node = ISC_FALSE;
3811 * Do special zone cut handling, if requested.
3813 if (maybe_zonecut &&
3814 header->type == dns_rdatatype_ns) {
3816 * We increment the reference count on node to
3817 * ensure that search->zonecut_rdataset will
3818 * still be valid later.
3820 new_reference(search.rbtdb, node);
3821 search.zonecut = node;
3822 search.zonecut_rdataset = header;
3823 search.zonecut_sigrdataset = NULL;
3824 search.need_cleanup = ISC_TRUE;
3825 maybe_zonecut = ISC_FALSE;
3826 at_zonecut = ISC_TRUE;
3828 * It is not clear if KEY should still be
3829 * allowed at the parent side of the zone
3830 * cut or not. It is needed for RFC3007
3831 * validated updates.
3833 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3834 && type != dns_rdatatype_nsec
3835 && type != dns_rdatatype_key) {
3837 * Glue is not OK, but any answer we
3838 * could return would be glue. Return
3844 if (found != NULL && foundsig != NULL)
3850 * If the NSEC3 record doesn't match the chain
3851 * we are using behave as if it isn't here.
3853 if (header->type == dns_rdatatype_nsec3 &&
3854 !matchparams(header, &search)) {
3855 NODE_UNLOCK(lock, isc_rwlocktype_read);
3859 * If we found a type we were looking for,
3862 if (header->type == type ||
3863 type == dns_rdatatype_any ||
3864 (header->type == dns_rdatatype_cname &&
3867 * We've found the answer!
3870 if (header->type == dns_rdatatype_cname &&
3873 * We may be finding a CNAME instead
3874 * of the desired type.
3876 * If we've already got the CNAME RRSIG,
3877 * use it, otherwise change sigtype
3878 * so that we find it.
3880 if (cnamesig != NULL)
3881 foundsig = cnamesig;
3884 RBTDB_RDATATYPE_SIGCNAME;
3887 * If we've got all we need, end the search.
3889 if (!maybe_zonecut && foundsig != NULL)
3891 } else if (header->type == sigtype) {
3893 * We've found the RRSIG rdataset for our
3894 * target type. Remember it.
3898 * If we've got all we need, end the search.
3900 if (!maybe_zonecut && found != NULL)
3902 } else if (header->type == dns_rdatatype_nsec &&
3903 !search.rbtversion->havensec3) {
3905 * Remember a NSEC rdataset even if we're
3906 * not specifically looking for it, because
3907 * we might need it later.
3909 nsecheader = header;
3910 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3911 !search.rbtversion->havensec3) {
3913 * If we need the NSEC rdataset, we'll also
3914 * need its signature.
3917 } else if (cname_ok &&
3918 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3920 * If we get a CNAME match, we'll also need
3930 * We have an exact match for the name, but there are no
3931 * active rdatasets in the desired version. That means that
3932 * this node doesn't exist in the desired version, and that
3933 * we really have a partial match.
3936 NODE_UNLOCK(lock, isc_rwlocktype_read);
3942 * If we didn't find what we were looking for...
3944 if (found == NULL) {
3945 if (search.zonecut != NULL) {
3947 * We were trying to find glue at a node beneath a
3948 * zone cut, but didn't.
3950 * Return the delegation.
3952 NODE_UNLOCK(lock, isc_rwlocktype_read);
3953 result = setup_delegation(&search, nodep, foundname,
3954 rdataset, sigrdataset);
3958 * The desired type doesn't exist.
3960 result = DNS_R_NXRRSET;
3961 if (search.rbtversion->secure == dns_db_secure &&
3962 !search.rbtversion->havensec3 &&
3963 (nsecheader == NULL || nsecsig == NULL)) {
3965 * The zone is secure but there's no NSEC,
3966 * or the NSEC has no signature!
3969 result = DNS_R_BADDB;
3973 NODE_UNLOCK(lock, isc_rwlocktype_read);
3974 result = find_closest_nsec(&search, nodep, foundname,
3975 rdataset, sigrdataset,
3977 search.rbtversion->secure);
3978 if (result == ISC_R_SUCCESS)
3979 result = DNS_R_EMPTYWILD;
3982 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3986 * There's no NSEC record, and we were told
3989 result = DNS_R_BADDB;
3992 if (nodep != NULL) {
3993 new_reference(search.rbtdb, node);
3996 if ((search.rbtversion->secure == dns_db_secure &&
3997 !search.rbtversion->havensec3) ||
3998 (search.options & DNS_DBFIND_FORCENSEC) != 0)
4000 bind_rdataset(search.rbtdb, node, nsecheader,
4002 if (nsecsig != NULL)
4003 bind_rdataset(search.rbtdb, node,
4004 nsecsig, 0, sigrdataset);
4007 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4012 * We found what we were looking for, or we found a CNAME.
4015 if (type != found->type &&
4016 type != dns_rdatatype_any &&
4017 found->type == dns_rdatatype_cname) {
4019 * We weren't doing an ANY query and we found a CNAME instead
4020 * of the type we were looking for, so we need to indicate
4021 * that result to the caller.
4023 result = DNS_R_CNAME;
4024 } else if (search.zonecut != NULL) {
4026 * If we're beneath a zone cut, we must indicate that the
4027 * result is glue, unless we're actually at the zone cut
4028 * and the type is NSEC or KEY.
4030 if (search.zonecut == node) {
4032 * It is not clear if KEY should still be
4033 * allowed at the parent side of the zone
4034 * cut or not. It is needed for RFC3007
4035 * validated updates.
4037 if (type == dns_rdatatype_nsec ||
4038 type == dns_rdatatype_nsec3 ||
4039 type == dns_rdatatype_key)
4040 result = ISC_R_SUCCESS;
4041 else if (type == dns_rdatatype_any)
4042 result = DNS_R_ZONECUT;
4044 result = DNS_R_GLUE;
4046 result = DNS_R_GLUE;
4048 * We might have found data that isn't glue, but was occluded
4049 * by a dynamic update. If the caller cares about this, they
4050 * will have told us to validate glue.
4052 * XXX We should cache the glue validity state!
4054 if (result == DNS_R_GLUE &&
4055 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4056 !valid_glue(&search, foundname, type, node)) {
4057 NODE_UNLOCK(lock, isc_rwlocktype_read);
4058 result = setup_delegation(&search, nodep, foundname,
4059 rdataset, sigrdataset);
4064 * An ordinary successful query!
4066 result = ISC_R_SUCCESS;
4069 if (nodep != NULL) {
4071 new_reference(search.rbtdb, node);
4073 search.need_cleanup = ISC_FALSE;
4077 if (type != dns_rdatatype_any) {
4078 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4079 if (foundsig != NULL)
4080 bind_rdataset(search.rbtdb, node, foundsig, 0,
4085 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4088 NODE_UNLOCK(lock, isc_rwlocktype_read);
4091 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4094 * If we found a zonecut but aren't going to use it, we have to
4097 if (search.need_cleanup) {
4098 node = search.zonecut;
4099 INSIST(node != NULL);
4100 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4102 NODE_LOCK(lock, isc_rwlocktype_read);
4103 decrement_reference(search.rbtdb, node, 0,
4104 isc_rwlocktype_read, isc_rwlocktype_none,
4106 NODE_UNLOCK(lock, isc_rwlocktype_read);
4110 closeversion(db, &version, ISC_FALSE);
4112 dns_rbtnodechain_reset(&search.chain);
4118 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4119 isc_stdtime_t now, dns_dbnode_t **nodep,
4120 dns_name_t *foundname,
4121 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4130 UNUSED(sigrdataset);
4132 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4135 return (ISC_R_NOTIMPLEMENTED);
4139 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4140 rbtdb_search_t *search = arg;
4141 rdatasetheader_t *header, *header_prev, *header_next;
4142 rdatasetheader_t *dname_header, *sigdname_header;
4143 isc_result_t result;
4145 isc_rwlocktype_t locktype;
4149 REQUIRE(search->zonecut == NULL);
4152 * Keep compiler silent.
4156 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4157 locktype = isc_rwlocktype_read;
4158 NODE_LOCK(lock, locktype);
4161 * Look for a DNAME or RRSIG DNAME rdataset.
4163 dname_header = NULL;
4164 sigdname_header = NULL;
4166 for (header = node->data; header != NULL; header = header_next) {
4167 header_next = header->next;
4168 if (header->rdh_ttl <= search->now) {
4170 * This rdataset is stale. If no one else is
4171 * using the node, we can clean it up right
4172 * now, otherwise we mark it as stale, and
4173 * the node as dirty, so it will get cleaned
4176 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4177 (locktype == isc_rwlocktype_write ||
4178 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4180 * We update the node's status only when we
4181 * can get write access; otherwise, we leave
4182 * others to this work. Periodical cleaning
4183 * will eventually take the job as the last
4185 * We won't downgrade the lock, since other
4186 * rdatasets are probably stale, too.
4188 locktype = isc_rwlocktype_write;
4190 if (dns_rbtnode_refcurrent(node) == 0) {
4194 * header->down can be non-NULL if the
4195 * refcount has just decremented to 0
4196 * but decrement_reference() has not
4197 * performed clean_cache_node(), in
4198 * which case we need to purge the
4199 * stale headers first.
4201 mctx = search->rbtdb->common.mctx;
4202 clean_stale_headers(search->rbtdb,
4205 if (header_prev != NULL)
4209 node->data = header->next;
4210 free_rdataset(search->rbtdb, mctx,
4213 header->attributes |=
4214 RDATASET_ATTR_STALE;
4216 header_prev = header;
4219 header_prev = header;
4220 } else if (header->type == dns_rdatatype_dname &&
4222 dname_header = header;
4223 header_prev = header;
4224 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4226 sigdname_header = header;
4227 header_prev = header;
4229 header_prev = header;
4232 if (dname_header != NULL &&
4233 (!DNS_TRUST_PENDING(dname_header->trust) ||
4234 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4236 * We increment the reference count on node to ensure that
4237 * search->zonecut_rdataset will still be valid later.
4239 new_reference(search->rbtdb, node);
4240 INSIST(!ISC_LINK_LINKED(node, deadlink));
4241 search->zonecut = node;
4242 search->zonecut_rdataset = dname_header;
4243 search->zonecut_sigrdataset = sigdname_header;
4244 search->need_cleanup = ISC_TRUE;
4245 result = DNS_R_PARTIALMATCH;
4247 result = DNS_R_CONTINUE;
4249 NODE_UNLOCK(lock, locktype);
4254 static inline isc_result_t
4255 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4256 dns_dbnode_t **nodep, dns_name_t *foundname,
4257 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4260 dns_rbtnode_t *level_node;
4261 rdatasetheader_t *header, *header_prev, *header_next;
4262 rdatasetheader_t *found, *foundsig;
4263 isc_result_t result = ISC_R_NOTFOUND;
4268 isc_rwlocktype_t locktype;
4271 * Caller must be holding the tree lock.
4274 rbtdb = search->rbtdb;
4275 i = search->chain.level_matches;
4278 locktype = isc_rwlocktype_read;
4279 lock = &rbtdb->node_locks[node->locknum].lock;
4280 NODE_LOCK(lock, locktype);
4283 * Look for NS and RRSIG NS rdatasets.
4288 for (header = node->data;
4290 header = header_next) {
4291 header_next = header->next;
4292 if (header->rdh_ttl <= search->now) {
4294 * This rdataset is stale. If no one else is
4295 * using the node, we can clean it up right
4296 * now, otherwise we mark it as stale, and
4297 * the node as dirty, so it will get cleaned
4300 if ((header->rdh_ttl <= search->now -
4302 (locktype == isc_rwlocktype_write ||
4303 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4305 * We update the node's status only
4306 * when we can get write access.
4308 locktype = isc_rwlocktype_write;
4310 if (dns_rbtnode_refcurrent(node)
4314 m = search->rbtdb->common.mctx;
4315 clean_stale_headers(
4318 if (header_prev != NULL)
4324 free_rdataset(rbtdb, m,
4327 header->attributes |=
4328 RDATASET_ATTR_STALE;
4330 header_prev = header;
4333 header_prev = header;
4334 } else if (EXISTS(header)) {
4336 * We've found an extant rdataset. See if
4337 * we're interested in it.
4339 if (header->type == dns_rdatatype_ns) {
4341 if (foundsig != NULL)
4343 } else if (header->type ==
4344 RBTDB_RDATATYPE_SIGNS) {
4349 header_prev = header;
4351 header_prev = header;
4354 if (found != NULL) {
4356 * If we have to set foundname, we do it before
4357 * anything else. If we were to set foundname after
4358 * we had set nodep or bound the rdataset, then we'd
4359 * have to undo that work if dns_name_concatenate()
4360 * failed. By setting foundname first, there's
4361 * nothing to undo if we have trouble.
4363 if (foundname != NULL) {
4364 dns_name_init(&name, NULL);
4365 dns_rbt_namefromnode(node, &name);
4366 result = dns_name_copy(&name, foundname, NULL);
4367 while (result == ISC_R_SUCCESS && i > 0) {
4369 level_node = search->chain.levels[i];
4370 dns_name_init(&name, NULL);
4371 dns_rbt_namefromnode(level_node,
4374 dns_name_concatenate(foundname,
4379 if (result != ISC_R_SUCCESS) {
4384 result = DNS_R_DELEGATION;
4385 if (nodep != NULL) {
4386 new_reference(search->rbtdb, node);
4389 bind_rdataset(search->rbtdb, node, found, search->now,
4391 if (foundsig != NULL)
4392 bind_rdataset(search->rbtdb, node, foundsig,
4393 search->now, sigrdataset);
4394 if (need_headerupdate(found, search->now) ||
4395 (foundsig != NULL &&
4396 need_headerupdate(foundsig, search->now))) {
4397 if (locktype != isc_rwlocktype_write) {
4398 NODE_UNLOCK(lock, locktype);
4399 NODE_LOCK(lock, isc_rwlocktype_write);
4400 locktype = isc_rwlocktype_write;
4403 if (need_headerupdate(found, search->now))
4404 update_header(search->rbtdb, found,
4406 if (foundsig != NULL &&
4407 need_headerupdate(foundsig, search->now)) {
4408 update_header(search->rbtdb, foundsig,
4415 NODE_UNLOCK(lock, locktype);
4417 if (found == NULL && i > 0) {
4419 node = search->chain.levels[i];
4429 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4430 isc_stdtime_t now, dns_name_t *foundname,
4431 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4433 dns_rbtnode_t *node;
4434 rdatasetheader_t *header, *header_next, *header_prev;
4435 rdatasetheader_t *found, *foundsig;
4436 isc_boolean_t empty_node;
4437 isc_result_t result;
4438 dns_fixedname_t fname, forigin;
4439 dns_name_t *name, *origin;
4440 rbtdb_rdatatype_t matchtype, sigmatchtype;
4442 isc_rwlocktype_t locktype;
4444 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4445 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4446 dns_rdatatype_nsec);
4450 dns_fixedname_init(&fname);
4451 name = dns_fixedname_name(&fname);
4452 dns_fixedname_init(&forigin);
4453 origin = dns_fixedname_name(&forigin);
4454 result = dns_rbtnodechain_current(&search->chain, name,
4456 if (result != ISC_R_SUCCESS)
4458 locktype = isc_rwlocktype_read;
4459 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4460 NODE_LOCK(lock, locktype);
4463 empty_node = ISC_TRUE;
4465 for (header = node->data;
4467 header = header_next) {
4468 header_next = header->next;
4469 if (header->rdh_ttl <= now) {
4471 * This rdataset is stale. If no one else is
4472 * using the node, we can clean it up right
4473 * now, otherwise we mark it as stale, and the
4474 * node as dirty, so it will get cleaned up
4477 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4478 (locktype == isc_rwlocktype_write ||
4479 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4481 * We update the node's status only
4482 * when we can get write access.
4484 locktype = isc_rwlocktype_write;
4486 if (dns_rbtnode_refcurrent(node)
4490 m = search->rbtdb->common.mctx;
4491 clean_stale_headers(
4494 if (header_prev != NULL)
4498 node->data = header->next;
4499 free_rdataset(search->rbtdb, m,
4502 header->attributes |=
4503 RDATASET_ATTR_STALE;
4505 header_prev = header;
4508 header_prev = header;
4511 if (NONEXISTENT(header) ||
4512 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4513 header_prev = header;
4516 empty_node = ISC_FALSE;
4517 if (header->type == matchtype)
4519 else if (header->type == sigmatchtype)
4521 header_prev = header;
4523 if (found != NULL) {
4524 result = dns_name_concatenate(name, origin,
4526 if (result != ISC_R_SUCCESS)
4528 bind_rdataset(search->rbtdb, node, found,
4530 if (foundsig != NULL)
4531 bind_rdataset(search->rbtdb, node, foundsig,
4533 new_reference(search->rbtdb, node);
4535 result = DNS_R_COVERINGNSEC;
4536 } else if (!empty_node) {
4537 result = ISC_R_NOTFOUND;
4539 result = dns_rbtnodechain_prev(&search->chain, NULL,
4542 NODE_UNLOCK(lock, locktype);
4543 } while (empty_node && result == ISC_R_SUCCESS);
4548 * Mark a database for response policy rewriting.
4552 get_rpz_enabled(dns_db_t *db, dns_rpz_st_t *st)
4556 rbtdb = (dns_rbtdb_t *)db;
4557 REQUIRE(VALID_RBTDB(rbtdb));
4558 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4559 dns_rpz_enabled(rbtdb->rpz_cidr, st);
4560 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4564 * Search the CDIR block tree of a response policy tree of trees for all of
4565 * the IP addresses in an A or AAAA rdataset.
4566 * Among the policies for all IPv4 and IPv6 addresses for a name, choose
4567 * the earliest configured policy,
4568 * QNAME over IP over NSDNAME over NSIP,
4569 * the longest prefix,
4570 * the lexically smallest address.
4571 * The caller must have already checked that any existing policy was not
4572 * configured earlier than this policy zone and does not have a higher
4576 rpz_findips(dns_rpz_zone_t *rpz, dns_rpz_type_t rpz_type,
4577 dns_zone_t *zone, dns_db_t *db, dns_dbversion_t *version,
4578 dns_rdataset_t *ardataset, dns_rpz_st_t *st,
4579 dns_name_t *query_qname)
4583 struct in6_addr in6a;
4584 isc_netaddr_t netaddr;
4585 dns_fixedname_t selfnamef, qnamef;
4586 dns_name_t *selfname, *qname;
4587 dns_rbtnode_t *node;
4588 dns_rdataset_t zrdataset;
4589 dns_rpz_cidr_bits_t prefix;
4590 isc_result_t result;
4591 dns_rpz_policy_t rpz_policy;
4594 rbtdb = (dns_rbtdb_t *)db;
4595 REQUIRE(VALID_RBTDB(rbtdb));
4596 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4598 if (rbtdb->rpz_cidr == NULL) {
4599 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4600 return (ISC_R_UNEXPECTED);
4603 dns_fixedname_init(&selfnamef);
4604 dns_fixedname_init(&qnamef);
4605 selfname = dns_fixedname_name(&selfnamef);
4606 qname = dns_fixedname_name(&qnamef);
4608 for (result = dns_rdataset_first(ardataset);
4609 result == ISC_R_SUCCESS;
4610 result = dns_rdataset_next(ardataset)) {
4611 dns_rdata_t rdata = DNS_RDATA_INIT;
4612 dns_rdataset_current(ardataset, &rdata);
4613 switch (rdata.type) {
4614 case dns_rdatatype_a:
4615 INSIST(rdata.length == 4);
4616 memcpy(&ina.s_addr, rdata.data, 4);
4617 isc_netaddr_fromin(&netaddr, &ina);
4619 case dns_rdatatype_aaaa:
4620 INSIST(rdata.length == 16);
4621 memcpy(in6a.s6_addr, rdata.data, 16);
4622 isc_netaddr_fromin6(&netaddr, &in6a);
4628 result = dns_rpz_cidr_find(rbtdb->rpz_cidr, &netaddr, rpz_type,
4629 selfname, qname, &prefix);
4630 if (result != ISC_R_SUCCESS)
4634 * If we already have a rule, discard this new rule if
4636 * The caller has checked that st->m.rpz->num > rpz->num
4637 * or st->m.rpz->num == rpz->num and st->m.type >= rpz_type
4639 if (st->m.policy != DNS_RPZ_POLICY_MISS &&
4640 st->m.rpz->num == rpz->num &&
4641 (st->m.type < rpz_type ||
4642 (st->m.type == rpz_type &&
4643 (st->m.prefix > prefix ||
4644 (st->m.prefix == prefix &&
4645 0 > dns_name_rdatacompare(st->qname, qname))))))
4649 * We have rpz_st an entry with a prefix at least as long as
4650 * the prefix of the entry we had before. Find the node
4651 * corresponding to CDIR tree entry.
4654 result = dns_rbt_findnode(rbtdb->tree, qname, NULL,
4655 &node, NULL, 0, NULL, NULL);
4656 if (result != ISC_R_SUCCESS) {
4657 char namebuf[DNS_NAME_FORMATSIZE];
4659 dns_name_format(qname, namebuf, sizeof(namebuf));
4660 isc_log_write(dns_lctx, DNS_LOGCATEGORY_RPZ,
4661 DNS_LOGMODULE_RBTDB, DNS_RPZ_ERROR_LEVEL,
4662 "rpz_findips findnode(%s): %s",
4663 namebuf, isc_result_totext(result));
4667 * First look for a simple rewrite of the IP address.
4668 * If that fails, look for a CNAME. If we cannot find
4669 * a CNAME or the CNAME is neither of the special forms
4670 * "*" or ".", treat it like a real CNAME.
4672 dns_rdataset_init(&zrdataset);
4673 result = dns_db_findrdataset(db, node, version, ardataset->type,
4674 0, 0, &zrdataset, NULL);
4675 if (result != ISC_R_SUCCESS)
4676 result = dns_db_findrdataset(db, node, version,
4677 dns_rdatatype_cname,
4678 0, 0, &zrdataset, NULL);
4679 if (result == ISC_R_SUCCESS) {
4680 if (zrdataset.type != dns_rdatatype_cname) {
4681 rpz_policy = DNS_RPZ_POLICY_RECORD;
4683 rpz_policy = dns_rpz_decode_cname(&zrdataset,
4685 if (rpz_policy == DNS_RPZ_POLICY_RECORD ||
4686 rpz_policy == DNS_RPZ_POLICY_WILDCNAME)
4687 result = DNS_R_CNAME;
4689 ttl = zrdataset.ttl;
4691 rpz_policy = DNS_RPZ_POLICY_RECORD;
4692 result = DNS_R_NXRRSET;
4693 ttl = DNS_RPZ_TTL_DEFAULT;
4697 * Use an overriding action specified in the configuration file
4699 if (rpz->policy != DNS_RPZ_POLICY_GIVEN) {
4701 * only log DNS_RPZ_POLICY_DISABLED hits
4703 if (rpz->policy == DNS_RPZ_POLICY_DISABLED) {
4704 if (isc_log_wouldlog(dns_lctx,
4705 DNS_RPZ_INFO_LEVEL)) {
4706 char qname_buf[DNS_NAME_FORMATSIZE];
4707 char rpz_qname_buf[DNS_NAME_FORMATSIZE];
4708 dns_name_format(query_qname, qname_buf,
4710 dns_name_format(qname, rpz_qname_buf,
4711 sizeof(rpz_qname_buf));
4713 isc_log_write(dns_lctx,
4714 DNS_LOGCATEGORY_RPZ,
4715 DNS_LOGMODULE_RBTDB,
4717 "disabled rpz %s %s rewrite"
4719 dns_rpz_type2str(rpz_type),
4720 dns_rpz_policy2str(rpz_policy),
4721 qname_buf, rpz_qname_buf);
4726 rpz_policy = rpz->policy;
4729 if (dns_rdataset_isassociated(st->m.rdataset))
4730 dns_rdataset_disassociate(st->m.rdataset);
4731 if (st->m.node != NULL)
4732 dns_db_detachnode(st->m.db, &st->m.node);
4733 if (st->m.db != NULL)
4734 dns_db_detach(&st->m.db);
4735 if (st->m.zone != NULL)
4736 dns_zone_detach(&st->m.zone);
4738 st->m.type = rpz_type;
4739 st->m.prefix = prefix;
4740 st->m.policy = rpz_policy;
4742 st->m.result = result;
4743 dns_name_copy(qname, st->qname, NULL);
4744 if ((rpz_policy == DNS_RPZ_POLICY_RECORD ||
4745 rpz_policy == DNS_RPZ_POLICY_WILDCNAME) &&
4746 result != DNS_R_NXRRSET) {
4747 dns_rdataset_clone(&zrdataset,st->m.rdataset);
4748 dns_db_attachnode(db, node, &st->m.node);
4750 dns_db_attach(db, &st->m.db);
4751 st->m.version = version;
4752 dns_zone_attach(zone, &st->m.zone);
4753 if (dns_rdataset_isassociated(&zrdataset))
4754 dns_rdataset_disassociate(&zrdataset);
4757 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4758 return (ISC_R_SUCCESS);
4763 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4764 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4765 dns_dbnode_t **nodep, dns_name_t *foundname,
4766 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4768 dns_rbtnode_t *node = NULL;
4769 isc_result_t result;
4770 rbtdb_search_t search;
4771 isc_boolean_t cname_ok = ISC_TRUE;
4772 isc_boolean_t empty_node;
4774 isc_rwlocktype_t locktype;
4775 rdatasetheader_t *header, *header_prev, *header_next;
4776 rdatasetheader_t *found, *nsheader;
4777 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4778 rdatasetheader_t *update, *updatesig;
4779 rbtdb_rdatatype_t sigtype, negtype;
4783 search.rbtdb = (dns_rbtdb_t *)db;
4785 REQUIRE(VALID_RBTDB(search.rbtdb));
4786 REQUIRE(version == NULL);
4789 isc_stdtime_get(&now);
4791 search.rbtversion = NULL;
4793 search.options = options;
4794 search.copy_name = ISC_FALSE;
4795 search.need_cleanup = ISC_FALSE;
4796 search.wild = ISC_FALSE;
4797 search.zonecut = NULL;
4798 dns_fixedname_init(&search.zonecut_name);
4799 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4804 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4807 * Search down from the root of the tree. If, while going down, we
4808 * encounter a callback node, cache_zonecut_callback() will search the
4809 * rdatasets at the zone cut for a DNAME rdataset.
4811 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4812 &search.chain, DNS_RBTFIND_EMPTYDATA,
4813 cache_zonecut_callback, &search);
4815 if (result == DNS_R_PARTIALMATCH) {
4816 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4817 result = find_coveringnsec(&search, nodep, now,
4818 foundname, rdataset,
4820 if (result == DNS_R_COVERINGNSEC)
4823 if (search.zonecut != NULL) {
4824 result = setup_delegation(&search, nodep, foundname,
4825 rdataset, sigrdataset);
4829 result = find_deepest_zonecut(&search, node, nodep,
4830 foundname, rdataset,
4834 } else if (result != ISC_R_SUCCESS)
4838 * Certain DNSSEC types are not subject to CNAME matching
4839 * (RFC4035, section 2.5 and RFC3007).
4841 * We don't check for RRSIG, because we don't store RRSIG records
4844 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4845 cname_ok = ISC_FALSE;
4848 * We now go looking for rdata...
4851 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4852 locktype = isc_rwlocktype_read;
4853 NODE_LOCK(lock, locktype);
4857 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4858 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4862 empty_node = ISC_TRUE;
4864 for (header = node->data; header != NULL; header = header_next) {
4865 header_next = header->next;
4866 if (header->rdh_ttl <= now) {
4868 * This rdataset is stale. If no one else is using the
4869 * node, we can clean it up right now, otherwise we
4870 * mark it as stale, and the node as dirty, so it will
4871 * get cleaned up later.
4873 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4874 (locktype == isc_rwlocktype_write ||
4875 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4877 * We update the node's status only when we
4878 * can get write access.
4880 locktype = isc_rwlocktype_write;
4882 if (dns_rbtnode_refcurrent(node) == 0) {
4885 mctx = search.rbtdb->common.mctx;
4886 clean_stale_headers(search.rbtdb, mctx,
4888 if (header_prev != NULL)
4892 node->data = header->next;
4893 free_rdataset(search.rbtdb, mctx,
4896 header->attributes |=
4897 RDATASET_ATTR_STALE;
4899 header_prev = header;
4902 header_prev = header;
4903 } else if (EXISTS(header)) {
4905 * We now know that there is at least one active
4906 * non-stale rdataset at this node.
4908 empty_node = ISC_FALSE;
4911 * If we found a type we were looking for, remember
4914 if (header->type == type ||
4915 (type == dns_rdatatype_any &&
4916 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4917 (cname_ok && header->type ==
4918 dns_rdatatype_cname)) {
4920 * We've found the answer.
4923 if (header->type == dns_rdatatype_cname &&
4927 * If we've already got the
4928 * CNAME RRSIG, use it.
4930 foundsig = cnamesig;
4932 } else if (header->type == sigtype) {
4934 * We've found the RRSIG rdataset for our
4935 * target type. Remember it.
4938 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4939 header->type == negtype) {
4941 * We've found a negative cache entry.
4944 } else if (header->type == dns_rdatatype_ns) {
4946 * Remember a NS rdataset even if we're
4947 * not specifically looking for it, because
4948 * we might need it later.
4951 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4953 * If we need the NS rdataset, we'll also
4954 * need its signature.
4957 } else if (cname_ok &&
4958 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4960 * If we get a CNAME match, we'll also need
4965 header_prev = header;
4967 header_prev = header;
4972 * We have an exact match for the name, but there are no
4973 * extant rdatasets. That means that this node doesn't
4974 * meaningfully exist, and that we really have a partial match.
4976 NODE_UNLOCK(lock, locktype);
4981 * If we didn't find what we were looking for...
4983 if (found == NULL ||
4984 (DNS_TRUST_ADDITIONAL(found->trust) &&
4985 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4986 (found->trust == dns_trust_glue &&
4987 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4988 (DNS_TRUST_PENDING(found->trust) &&
4989 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4991 * If there is an NS rdataset at this node, then this is the
4994 if (nsheader != NULL) {
4995 if (nodep != NULL) {
4996 new_reference(search.rbtdb, node);
4997 INSIST(!ISC_LINK_LINKED(node, deadlink));
5000 bind_rdataset(search.rbtdb, node, nsheader, search.now,
5002 if (need_headerupdate(nsheader, search.now))
5004 if (nssig != NULL) {
5005 bind_rdataset(search.rbtdb, node, nssig,
5006 search.now, sigrdataset);
5007 if (need_headerupdate(nssig, search.now))
5010 result = DNS_R_DELEGATION;
5015 * Go find the deepest zone cut.
5017 NODE_UNLOCK(lock, locktype);
5022 * We found what we were looking for, or we found a CNAME.
5025 if (nodep != NULL) {
5026 new_reference(search.rbtdb, node);
5027 INSIST(!ISC_LINK_LINKED(node, deadlink));
5031 if (NEGATIVE(found)) {
5033 * We found a negative cache entry.
5035 if (NXDOMAIN(found))
5036 result = DNS_R_NCACHENXDOMAIN;
5038 result = DNS_R_NCACHENXRRSET;
5039 } else if (type != found->type &&
5040 type != dns_rdatatype_any &&
5041 found->type == dns_rdatatype_cname) {
5043 * We weren't doing an ANY query and we found a CNAME instead
5044 * of the type we were looking for, so we need to indicate
5045 * that result to the caller.
5047 result = DNS_R_CNAME;
5050 * An ordinary successful query!
5052 result = ISC_R_SUCCESS;
5055 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5056 result == DNS_R_NCACHENXRRSET) {
5057 bind_rdataset(search.rbtdb, node, found, search.now,
5059 if (need_headerupdate(found, search.now))
5061 if (!NEGATIVE(found) && foundsig != NULL) {
5062 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5064 if (need_headerupdate(foundsig, search.now))
5065 updatesig = foundsig;
5070 if ((update != NULL || updatesig != NULL) &&
5071 locktype != isc_rwlocktype_write) {
5072 NODE_UNLOCK(lock, locktype);
5073 NODE_LOCK(lock, isc_rwlocktype_write);
5074 locktype = isc_rwlocktype_write;
5077 if (update != NULL && need_headerupdate(update, search.now))
5078 update_header(search.rbtdb, update, search.now);
5079 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5080 update_header(search.rbtdb, updatesig, search.now);
5082 NODE_UNLOCK(lock, locktype);
5085 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5088 * If we found a zonecut but aren't going to use it, we have to
5091 if (search.need_cleanup) {
5092 node = search.zonecut;
5093 INSIST(node != NULL);
5094 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5096 NODE_LOCK(lock, isc_rwlocktype_read);
5097 decrement_reference(search.rbtdb, node, 0,
5098 isc_rwlocktype_read, isc_rwlocktype_none,
5100 NODE_UNLOCK(lock, isc_rwlocktype_read);
5103 dns_rbtnodechain_reset(&search.chain);
5109 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5110 isc_stdtime_t now, dns_dbnode_t **nodep,
5111 dns_name_t *foundname,
5112 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5114 dns_rbtnode_t *node = NULL;
5116 isc_result_t result;
5117 rbtdb_search_t search;
5118 rdatasetheader_t *header, *header_prev, *header_next;
5119 rdatasetheader_t *found, *foundsig;
5120 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5121 isc_rwlocktype_t locktype;
5123 search.rbtdb = (dns_rbtdb_t *)db;
5125 REQUIRE(VALID_RBTDB(search.rbtdb));
5128 isc_stdtime_get(&now);
5130 search.rbtversion = NULL;
5132 search.options = options;
5133 search.copy_name = ISC_FALSE;
5134 search.need_cleanup = ISC_FALSE;
5135 search.wild = ISC_FALSE;
5136 search.zonecut = NULL;
5137 dns_fixedname_init(&search.zonecut_name);
5138 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5141 if ((options & DNS_DBFIND_NOEXACT) != 0)
5142 rbtoptions |= DNS_RBTFIND_NOEXACT;
5144 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5147 * Search down from the root of the tree.
5149 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5150 &search.chain, rbtoptions, NULL, &search);
5152 if (result == DNS_R_PARTIALMATCH) {
5154 result = find_deepest_zonecut(&search, node, nodep, foundname,
5155 rdataset, sigrdataset);
5157 } else if (result != ISC_R_SUCCESS)
5161 * We now go looking for an NS rdataset at the node.
5164 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5165 locktype = isc_rwlocktype_read;
5166 NODE_LOCK(lock, locktype);
5171 for (header = node->data; header != NULL; header = header_next) {
5172 header_next = header->next;
5173 if (header->rdh_ttl <= now) {
5175 * This rdataset is stale. If no one else is using the
5176 * node, we can clean it up right now, otherwise we
5177 * mark it as stale, and the node as dirty, so it will
5178 * get cleaned up later.
5180 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5181 (locktype == isc_rwlocktype_write ||
5182 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5184 * We update the node's status only when we
5185 * can get write access.
5187 locktype = isc_rwlocktype_write;
5189 if (dns_rbtnode_refcurrent(node) == 0) {
5192 mctx = search.rbtdb->common.mctx;
5193 clean_stale_headers(search.rbtdb, mctx,
5195 if (header_prev != NULL)
5199 node->data = header->next;
5200 free_rdataset(search.rbtdb, mctx,
5203 header->attributes |=
5204 RDATASET_ATTR_STALE;
5206 header_prev = header;
5209 header_prev = header;
5210 } else if (EXISTS(header)) {
5212 * If we found a type we were looking for, remember
5215 if (header->type == dns_rdatatype_ns) {
5217 * Remember a NS rdataset even if we're
5218 * not specifically looking for it, because
5219 * we might need it later.
5222 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5224 * If we need the NS rdataset, we'll also
5225 * need its signature.
5229 header_prev = header;
5231 header_prev = header;
5234 if (found == NULL) {
5236 * No NS records here.
5238 NODE_UNLOCK(lock, locktype);
5242 if (nodep != NULL) {
5243 new_reference(search.rbtdb, node);
5244 INSIST(!ISC_LINK_LINKED(node, deadlink));
5248 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5249 if (foundsig != NULL)
5250 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5253 if (need_headerupdate(found, search.now) ||
5254 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
5255 if (locktype != isc_rwlocktype_write) {
5256 NODE_UNLOCK(lock, locktype);
5257 NODE_LOCK(lock, isc_rwlocktype_write);
5258 locktype = isc_rwlocktype_write;
5261 if (need_headerupdate(found, search.now))
5262 update_header(search.rbtdb, found, search.now);
5263 if (foundsig != NULL &&
5264 need_headerupdate(foundsig, search.now)) {
5265 update_header(search.rbtdb, foundsig, search.now);
5269 NODE_UNLOCK(lock, locktype);
5272 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5274 INSIST(!search.need_cleanup);
5276 dns_rbtnodechain_reset(&search.chain);
5278 if (result == DNS_R_DELEGATION)
5279 result = ISC_R_SUCCESS;
5285 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5286 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5287 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5290 REQUIRE(VALID_RBTDB(rbtdb));
5291 REQUIRE(targetp != NULL && *targetp == NULL);
5293 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5294 dns_rbtnode_refincrement(node, &refs);
5296 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5302 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5303 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5304 dns_rbtnode_t *node;
5305 isc_boolean_t want_free = ISC_FALSE;
5306 isc_boolean_t inactive = ISC_FALSE;
5307 rbtdb_nodelock_t *nodelock;
5309 REQUIRE(VALID_RBTDB(rbtdb));
5310 REQUIRE(targetp != NULL && *targetp != NULL);
5312 node = (dns_rbtnode_t *)(*targetp);
5313 nodelock = &rbtdb->node_locks[node->locknum];
5315 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5317 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5318 isc_rwlocktype_none, ISC_FALSE)) {
5319 if (isc_refcount_current(&nodelock->references) == 0 &&
5320 nodelock->exiting) {
5321 inactive = ISC_TRUE;
5325 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5330 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5332 if (rbtdb->active == 0)
5333 want_free = ISC_TRUE;
5334 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5336 char buf[DNS_NAME_FORMATSIZE];
5337 if (dns_name_dynamic(&rbtdb->common.origin))
5338 dns_name_format(&rbtdb->common.origin, buf,
5341 strcpy(buf, "<UNKNOWN>");
5342 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5343 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5344 "calling free_rbtdb(%s)", buf);
5345 free_rbtdb(rbtdb, ISC_TRUE, NULL);
5351 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5352 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5353 dns_rbtnode_t *rbtnode = node;
5354 rdatasetheader_t *header;
5355 isc_boolean_t force_expire = ISC_FALSE;
5357 * These are the category and module used by the cache cleaner.
5359 isc_boolean_t log = ISC_FALSE;
5360 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5361 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5362 int level = ISC_LOG_DEBUG(2);
5363 char printname[DNS_NAME_FORMATSIZE];
5365 REQUIRE(VALID_RBTDB(rbtdb));
5368 * Caller must hold a tree lock.
5372 isc_stdtime_get(&now);
5374 if (isc_mem_isovermem(rbtdb->common.mctx)) {
5377 isc_random_get(&val);
5379 * XXXDCL Could stand to have a better policy, like LRU.
5381 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5384 * Note that 'log' can be true IFF overmem is also true.
5385 * overmem can currently only be true for cache
5386 * databases -- hence all of the "overmem cache" log strings.
5388 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5390 isc_log_write(dns_lctx, category, module, level,
5391 "overmem cache: %s %s",
5392 force_expire ? "FORCE" : "check",
5393 dns_rbt_formatnodename(rbtnode,
5395 sizeof(printname)));
5399 * We may not need write access, but this code path is not performance
5400 * sensitive, so it should be okay to always lock as a writer.
5402 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5403 isc_rwlocktype_write);
5405 for (header = rbtnode->data; header != NULL; header = header->next)
5406 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5408 * We don't check if refcurrent(rbtnode) == 0 and try
5409 * to free like we do in cache_find(), because
5410 * refcurrent(rbtnode) must be non-zero. This is so
5411 * because 'node' is an argument to the function.
5413 header->attributes |= RDATASET_ATTR_STALE;
5416 isc_log_write(dns_lctx, category, module,
5417 level, "overmem cache: stale %s",
5419 } else if (force_expire) {
5420 if (! RETAIN(header)) {
5421 set_ttl(rbtdb, header, 0);
5422 header->attributes |= RDATASET_ATTR_STALE;
5425 isc_log_write(dns_lctx, category, module,
5426 level, "overmem cache: "
5427 "reprieve by RETAIN() %s",
5430 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5431 isc_log_write(dns_lctx, category, module, level,
5432 "overmem cache: saved %s", printname);
5434 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5435 isc_rwlocktype_write);
5437 return (ISC_R_SUCCESS);
5441 overmem(dns_db_t *db, isc_boolean_t overmem) {
5442 /* This is an empty callback. See adb.c:water() */
5451 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5452 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5453 dns_rbtnode_t *rbtnode = node;
5454 isc_boolean_t first;
5456 REQUIRE(VALID_RBTDB(rbtdb));
5458 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5459 isc_rwlocktype_read);
5461 fprintf(out, "node %p, %u references, locknum = %u\n",
5462 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5464 if (rbtnode->data != NULL) {
5465 rdatasetheader_t *current, *top_next;
5467 for (current = rbtnode->data; current != NULL;
5468 current = top_next) {
5469 top_next = current->next;
5471 fprintf(out, "\ttype %u", current->type);
5477 "\tserial = %lu, ttl = %u, "
5478 "trust = %u, attributes = %u, "
5480 (unsigned long)current->serial,
5483 current->attributes,
5485 current = current->down;
5486 } while (current != NULL);
5489 fprintf(out, "(empty)\n");
5491 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5492 isc_rwlocktype_read);
5496 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5498 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5499 rbtdb_dbiterator_t *rbtdbiter;
5501 REQUIRE(VALID_RBTDB(rbtdb));
5503 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5504 if (rbtdbiter == NULL)
5505 return (ISC_R_NOMEMORY);
5507 rbtdbiter->common.methods = &dbiterator_methods;
5508 rbtdbiter->common.db = NULL;
5509 dns_db_attach(db, &rbtdbiter->common.db);
5510 rbtdbiter->common.relative_names =
5511 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5512 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5513 rbtdbiter->common.cleaning = ISC_FALSE;
5514 rbtdbiter->paused = ISC_TRUE;
5515 rbtdbiter->tree_locked = isc_rwlocktype_none;
5516 rbtdbiter->result = ISC_R_SUCCESS;
5517 dns_fixedname_init(&rbtdbiter->name);
5518 dns_fixedname_init(&rbtdbiter->origin);
5519 rbtdbiter->node = NULL;
5520 rbtdbiter->delete = 0;
5521 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5522 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5523 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5524 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5525 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5526 if (rbtdbiter->nsec3only)
5527 rbtdbiter->current = &rbtdbiter->nsec3chain;
5529 rbtdbiter->current = &rbtdbiter->chain;
5531 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5533 return (ISC_R_SUCCESS);
5537 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5538 dns_rdatatype_t type, dns_rdatatype_t covers,
5539 isc_stdtime_t now, dns_rdataset_t *rdataset,
5540 dns_rdataset_t *sigrdataset)
5542 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5543 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5544 rdatasetheader_t *header, *header_next, *found, *foundsig;
5545 rbtdb_serial_t serial;
5546 rbtdb_version_t *rbtversion = version;
5547 isc_boolean_t close_version = ISC_FALSE;
5548 rbtdb_rdatatype_t matchtype, sigmatchtype;
5550 REQUIRE(VALID_RBTDB(rbtdb));
5551 REQUIRE(type != dns_rdatatype_any);
5552 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
5554 if (rbtversion == NULL) {
5555 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5556 close_version = ISC_TRUE;
5558 serial = rbtversion->serial;
5561 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5562 isc_rwlocktype_read);
5566 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5568 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5572 for (header = rbtnode->data; header != NULL; header = header_next) {
5573 header_next = header->next;
5575 if (header->serial <= serial &&
5578 * Is this a "this rdataset doesn't
5581 if (NONEXISTENT(header))
5585 header = header->down;
5586 } while (header != NULL);
5587 if (header != NULL) {
5589 * We have an active, extant rdataset. If it's a
5590 * type we're looking for, remember it.
5592 if (header->type == matchtype) {
5594 if (foundsig != NULL)
5596 } else if (header->type == sigmatchtype) {
5603 if (found != NULL) {
5604 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5605 if (foundsig != NULL)
5606 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5610 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5611 isc_rwlocktype_read);
5614 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5618 return (ISC_R_NOTFOUND);
5620 return (ISC_R_SUCCESS);
5624 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5625 dns_rdatatype_t type, dns_rdatatype_t covers,
5626 isc_stdtime_t now, dns_rdataset_t *rdataset,
5627 dns_rdataset_t *sigrdataset)
5629 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5630 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5631 rdatasetheader_t *header, *header_next, *found, *foundsig;
5632 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5633 isc_result_t result;
5635 isc_rwlocktype_t locktype;
5637 REQUIRE(VALID_RBTDB(rbtdb));
5638 REQUIRE(type != dns_rdatatype_any);
5642 result = ISC_R_SUCCESS;
5645 isc_stdtime_get(&now);
5647 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5648 locktype = isc_rwlocktype_read;
5649 NODE_LOCK(lock, locktype);
5653 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5654 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5656 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5660 for (header = rbtnode->data; header != NULL; header = header_next) {
5661 header_next = header->next;
5662 if (header->rdh_ttl <= now) {
5663 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5664 (locktype == isc_rwlocktype_write ||
5665 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5667 * We update the node's status only when we
5668 * can get write access.
5670 locktype = isc_rwlocktype_write;
5673 * We don't check if refcurrent(rbtnode) == 0
5674 * and try to free like we do in cache_find(),
5675 * because refcurrent(rbtnode) must be
5676 * non-zero. This is so because 'node' is an
5677 * argument to the function.
5679 header->attributes |= RDATASET_ATTR_STALE;
5682 } else if (EXISTS(header)) {
5683 if (header->type == matchtype)
5685 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5686 header->type == negtype)
5688 else if (header->type == sigmatchtype)
5692 if (found != NULL) {
5693 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5694 if (!NEGATIVE(found) && foundsig != NULL)
5695 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5699 NODE_UNLOCK(lock, locktype);
5702 return (ISC_R_NOTFOUND);
5704 if (NEGATIVE(found)) {
5706 * We found a negative cache entry.
5708 if (NXDOMAIN(found))
5709 result = DNS_R_NCACHENXDOMAIN;
5711 result = DNS_R_NCACHENXRRSET;
5718 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5719 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5721 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5722 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5723 rbtdb_version_t *rbtversion = version;
5724 rbtdb_rdatasetiter_t *iterator;
5727 REQUIRE(VALID_RBTDB(rbtdb));
5729 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5730 if (iterator == NULL)
5731 return (ISC_R_NOMEMORY);
5733 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5735 if (rbtversion == NULL)
5737 (dns_dbversion_t **) (void *)(&rbtversion));
5741 INSIST(rbtversion->rbtdb == rbtdb);
5743 isc_refcount_increment(&rbtversion->references,
5749 isc_stdtime_get(&now);
5753 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5754 iterator->common.methods = &rdatasetiter_methods;
5755 iterator->common.db = db;
5756 iterator->common.node = node;
5757 iterator->common.version = (dns_dbversion_t *)rbtversion;
5758 iterator->common.now = now;
5760 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5762 dns_rbtnode_refincrement(rbtnode, &refs);
5765 iterator->current = NULL;
5767 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5769 *iteratorp = (dns_rdatasetiter_t *)iterator;
5771 return (ISC_R_SUCCESS);
5774 static isc_boolean_t
5775 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5776 rdatasetheader_t *header, *header_next;
5777 isc_boolean_t cname, other_data;
5778 dns_rdatatype_t rdtype;
5781 * The caller must hold the node lock.
5785 * Look for CNAME and "other data" rdatasets active in our version.
5788 other_data = ISC_FALSE;
5789 for (header = node->data; header != NULL; header = header_next) {
5790 header_next = header->next;
5791 if (header->type == dns_rdatatype_cname) {
5793 * Look for an active extant CNAME.
5796 if (header->serial <= serial &&
5799 * Is this a "this rdataset doesn't
5802 if (NONEXISTENT(header))
5806 header = header->down;
5807 } while (header != NULL);
5812 * Look for active extant "other data".
5814 * "Other data" is any rdataset whose type is not
5815 * KEY, NSEC, SIG or RRSIG.
5817 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5818 if (rdtype != dns_rdatatype_key &&
5819 rdtype != dns_rdatatype_sig &&
5820 rdtype != dns_rdatatype_nsec &&
5821 rdtype != dns_rdatatype_rrsig) {
5823 * Is it active and extant?
5826 if (header->serial <= serial &&
5829 * Is this a "this rdataset
5830 * doesn't exist" record?
5832 if (NONEXISTENT(header))
5836 header = header->down;
5837 } while (header != NULL);
5839 other_data = ISC_TRUE;
5844 if (cname && other_data)
5851 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5852 isc_result_t result;
5854 INSIST(!IS_CACHE(rbtdb));
5855 INSIST(newheader->heap_index == 0);
5856 INSIST(!ISC_LINK_LINKED(newheader, link));
5858 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5863 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5864 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5865 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5867 rbtdb_changed_t *changed = NULL;
5868 rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5869 unsigned char *merged;
5870 isc_result_t result;
5871 isc_boolean_t header_nx;
5872 isc_boolean_t newheader_nx;
5873 isc_boolean_t merge;
5874 dns_rdatatype_t rdtype, covers;
5875 rbtdb_rdatatype_t negtype, sigtype;
5880 * Add an rdatasetheader_t to a node.
5884 * Caller must be holding the node lock.
5887 if ((options & DNS_DBADD_MERGE) != 0) {
5888 REQUIRE(rbtversion != NULL);
5893 if ((options & DNS_DBADD_FORCE) != 0)
5894 trust = dns_trust_ultimate;
5896 trust = newheader->trust;
5898 if (rbtversion != NULL && !loading) {
5900 * We always add a changed record, even if no changes end up
5901 * being made to this node, because it's harmless and
5902 * simplifies the code.
5904 changed = add_changed(rbtdb, rbtversion, rbtnode);
5905 if (changed == NULL) {
5906 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5907 return (ISC_R_NOMEMORY);
5911 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5912 topheader_prev = NULL;
5915 if (rbtversion == NULL && !newheader_nx) {
5916 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5917 if (NEGATIVE(newheader)) {
5919 * We're adding a negative cache entry.
5921 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5922 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5924 for (topheader = rbtnode->data;
5926 topheader = topheader->next) {
5928 * If we're adding an negative cache entry
5929 * which covers all types (NXDOMAIN,
5930 * NODATA(QTYPE=ANY)).
5932 * We make all other data stale so that the
5933 * only rdataset that can be found at this
5934 * node is the negative cache entry.
5936 * Otherwise look for any RRSIGs of the
5937 * given type so they can be marked stale
5940 if (covers == dns_rdatatype_any) {
5941 set_ttl(rbtdb, topheader, 0);
5942 topheader->attributes |=
5943 RDATASET_ATTR_STALE;
5945 } else if (topheader->type == sigtype)
5946 sigheader = topheader;
5948 if (covers == dns_rdatatype_any)
5950 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5953 * We're adding something that isn't a
5954 * negative cache entry. Look for an extant
5955 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5958 for (topheader = rbtnode->data;
5960 topheader = topheader->next) {
5961 if (topheader->type ==
5962 RBTDB_RDATATYPE_NCACHEANY)
5965 if (topheader != NULL && EXISTS(topheader) &&
5966 topheader->rdh_ttl > now) {
5970 if (trust < topheader->trust) {
5972 * The NXDOMAIN/NODATA(QTYPE=ANY)
5975 free_rdataset(rbtdb,
5978 if (addedrdataset != NULL)
5979 bind_rdataset(rbtdb, rbtnode,
5982 return (DNS_R_UNCHANGED);
5985 * The new rdataset is better. Expire the
5986 * NXDOMAIN/NODATA(QTYPE=ANY).
5988 set_ttl(rbtdb, topheader, 0);
5989 topheader->attributes |= RDATASET_ATTR_STALE;
5994 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5998 for (topheader = rbtnode->data;
6000 topheader = topheader->next) {
6001 if (topheader->type == newheader->type ||
6002 topheader->type == negtype)
6004 topheader_prev = topheader;
6009 * If header isn't NULL, we've found the right type. There may be
6010 * IGNORE rdatasets between the top of the chain and the first real
6011 * data. We skip over them.
6014 while (header != NULL && IGNORE(header))
6015 header = header->down;
6016 if (header != NULL) {
6017 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6020 * Deleting an already non-existent rdataset has no effect.
6022 if (header_nx && newheader_nx) {
6023 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6024 return (DNS_R_UNCHANGED);
6028 * Trying to add an rdataset with lower trust to a cache DB
6029 * has no effect, provided that the cache data isn't stale.
6031 if (rbtversion == NULL && trust < header->trust &&
6032 (header->rdh_ttl > now || header_nx)) {
6033 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6034 if (addedrdataset != NULL)
6035 bind_rdataset(rbtdb, rbtnode, header, now,
6037 return (DNS_R_UNCHANGED);
6041 * Don't merge if a nonexistent rdataset is involved.
6043 if (merge && (header_nx || newheader_nx))
6047 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6048 * that is the union of 'newheader' and 'header'.
6051 unsigned int flags = 0;
6052 INSIST(rbtversion->serial >= header->serial);
6054 result = ISC_R_SUCCESS;
6056 if ((options & DNS_DBADD_EXACT) != 0)
6057 flags |= DNS_RDATASLAB_EXACT;
6058 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6059 newheader->rdh_ttl != header->rdh_ttl)
6060 result = DNS_R_NOTEXACT;
6061 else if (newheader->rdh_ttl != header->rdh_ttl)
6062 flags |= DNS_RDATASLAB_FORCE;
6063 if (result == ISC_R_SUCCESS)
6064 result = dns_rdataslab_merge(
6065 (unsigned char *)header,
6066 (unsigned char *)newheader,
6067 (unsigned int)(sizeof(*newheader)),
6069 rbtdb->common.rdclass,
6070 (dns_rdatatype_t)header->type,
6072 if (result == ISC_R_SUCCESS) {
6074 * If 'header' has the same serial number as
6075 * we do, we could clean it up now if we knew
6076 * that our caller had no references to it.
6077 * We don't know this, however, so we leave it
6078 * alone. It will get cleaned up when
6079 * clean_zone_node() runs.
6081 free_rdataset(rbtdb, rbtdb->common.mctx,
6083 newheader = (rdatasetheader_t *)merged;
6084 init_rdataset(rbtdb, newheader);
6085 if (loading && RESIGN(newheader) &&
6087 header->resign < newheader->resign)
6088 newheader->resign = header->resign;
6090 free_rdataset(rbtdb, rbtdb->common.mctx,
6096 * Don't replace existing NS, A and AAAA RRsets
6097 * in the cache if they are already exist. This
6098 * prevents named being locked to old servers.
6099 * Don't lower trust of existing record if the
6102 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6103 header->type == dns_rdatatype_ns &&
6104 !header_nx && !newheader_nx &&
6105 header->trust >= newheader->trust &&
6106 dns_rdataslab_equalx((unsigned char *)header,
6107 (unsigned char *)newheader,
6108 (unsigned int)(sizeof(*newheader)),
6109 rbtdb->common.rdclass,
6110 (dns_rdatatype_t)header->type)) {
6112 * Honour the new ttl if it is less than the
6115 if (header->rdh_ttl > newheader->rdh_ttl)
6116 set_ttl(rbtdb, header, newheader->rdh_ttl);
6117 if (header->noqname == NULL &&
6118 newheader->noqname != NULL) {
6119 header->noqname = newheader->noqname;
6120 newheader->noqname = NULL;
6122 if (header->closest == NULL &&
6123 newheader->closest != NULL) {
6124 header->closest = newheader->closest;
6125 newheader->closest = NULL;
6127 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6128 if (addedrdataset != NULL)
6129 bind_rdataset(rbtdb, rbtnode, header, now,
6131 return (ISC_R_SUCCESS);
6134 * If we have will be replacing a NS RRset force its TTL
6135 * to be no more than the current NS RRset's TTL. This
6136 * ensures the delegations that are withdrawn are honoured.
6138 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6139 header->type == dns_rdatatype_ns &&
6140 !header_nx && !newheader_nx &&
6141 header->trust <= newheader->trust) {
6142 if (newheader->rdh_ttl > header->rdh_ttl) {
6143 newheader->rdh_ttl = header->rdh_ttl;
6146 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6147 (header->type == dns_rdatatype_a ||
6148 header->type == dns_rdatatype_aaaa) &&
6149 !header_nx && !newheader_nx &&
6150 header->trust >= newheader->trust &&
6151 dns_rdataslab_equal((unsigned char *)header,
6152 (unsigned char *)newheader,
6153 (unsigned int)(sizeof(*newheader)))) {
6155 * Honour the new ttl if it is less than the
6158 if (header->rdh_ttl > newheader->rdh_ttl)
6159 set_ttl(rbtdb, header, newheader->rdh_ttl);
6160 if (header->noqname == NULL &&
6161 newheader->noqname != NULL) {
6162 header->noqname = newheader->noqname;
6163 newheader->noqname = NULL;
6165 if (header->closest == NULL &&
6166 newheader->closest != NULL) {
6167 header->closest = newheader->closest;
6168 newheader->closest = NULL;
6170 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6171 if (addedrdataset != NULL)
6172 bind_rdataset(rbtdb, rbtnode, header, now,
6174 return (ISC_R_SUCCESS);
6176 INSIST(rbtversion == NULL ||
6177 rbtversion->serial >= topheader->serial);
6178 if (topheader_prev != NULL)
6179 topheader_prev->next = newheader;
6181 rbtnode->data = newheader;
6182 newheader->next = topheader->next;
6185 * There are no other references to 'header' when
6186 * loading, so we MAY clean up 'header' now.
6187 * Since we don't generate changed records when
6188 * loading, we MUST clean up 'header' now.
6190 newheader->down = NULL;
6191 free_rdataset(rbtdb, rbtdb->common.mctx, header);
6193 newheader->down = topheader;
6194 topheader->next = newheader;
6196 if (changed != NULL)
6197 changed->dirty = ISC_TRUE;
6198 if (rbtversion == NULL) {
6199 set_ttl(rbtdb, header, 0);
6200 header->attributes |= RDATASET_ATTR_STALE;
6201 if (sigheader != NULL) {
6202 set_ttl(rbtdb, sigheader, 0);
6203 sigheader->attributes |=
6204 RDATASET_ATTR_STALE;
6207 idx = newheader->node->locknum;
6208 if (IS_CACHE(rbtdb)) {
6209 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6212 * XXXMLG We don't check the return value
6213 * here. If it fails, we will not do TTL
6214 * based expiry on this node. However, we
6215 * will do it on the LRU side, so memory
6216 * will not leak... for long.
6218 isc_heap_insert(rbtdb->heaps[idx], newheader);
6219 } else if (RESIGN(newheader))
6220 resign_insert(rbtdb, idx, newheader);
6224 * No non-IGNORED rdatasets of the given type exist at
6229 * If we're trying to delete the type, don't bother.
6232 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6233 return (DNS_R_UNCHANGED);
6236 if (topheader != NULL) {
6238 * We have an list of rdatasets of the given type,
6239 * but they're all marked IGNORE. We simply insert
6240 * the new rdataset at the head of the list.
6242 * Ignored rdatasets cannot occur during loading, so
6246 INSIST(rbtversion == NULL ||
6247 rbtversion->serial >= topheader->serial);
6248 if (topheader_prev != NULL)
6249 topheader_prev->next = newheader;
6251 rbtnode->data = newheader;
6252 newheader->next = topheader->next;
6253 newheader->down = topheader;
6254 topheader->next = newheader;
6256 if (changed != NULL)
6257 changed->dirty = ISC_TRUE;
6260 * No rdatasets of the given type exist at the node.
6262 newheader->next = rbtnode->data;
6263 newheader->down = NULL;
6264 rbtnode->data = newheader;
6266 idx = newheader->node->locknum;
6267 if (IS_CACHE(rbtdb)) {
6268 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6270 isc_heap_insert(rbtdb->heaps[idx], newheader);
6271 } else if (RESIGN(newheader)) {
6272 resign_insert(rbtdb, idx, newheader);
6277 * Check if the node now contains CNAME and other data.
6279 if (rbtversion != NULL &&
6280 cname_and_other_data(rbtnode, rbtversion->serial))
6281 return (DNS_R_CNAMEANDOTHER);
6283 if (addedrdataset != NULL)
6284 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6286 return (ISC_R_SUCCESS);
6289 static inline isc_boolean_t
6290 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6291 rbtdb_rdatatype_t type)
6293 if (IS_CACHE(rbtdb)) {
6294 if (type == dns_rdatatype_dname)
6298 } else if (type == dns_rdatatype_dname ||
6299 (type == dns_rdatatype_ns &&
6300 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6305 static inline isc_result_t
6306 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6307 dns_rdataset_t *rdataset)
6309 struct noqname *noqname;
6310 isc_mem_t *mctx = rbtdb->common.mctx;
6312 dns_rdataset_t neg, negsig;
6313 isc_result_t result;
6316 dns_name_init(&name, NULL);
6317 dns_rdataset_init(&neg);
6318 dns_rdataset_init(&negsig);
6320 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6321 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6323 noqname = isc_mem_get(mctx, sizeof(*noqname));
6324 if (noqname == NULL) {
6325 result = ISC_R_NOMEMORY;
6328 dns_name_init(&noqname->name, NULL);
6329 noqname->neg = NULL;
6330 noqname->negsig = NULL;
6331 noqname->type = neg.type;
6332 result = dns_name_dup(&name, mctx, &noqname->name);
6333 if (result != ISC_R_SUCCESS)
6335 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6336 if (result != ISC_R_SUCCESS)
6338 noqname->neg = r.base;
6339 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6340 if (result != ISC_R_SUCCESS)
6342 noqname->negsig = r.base;
6343 dns_rdataset_disassociate(&neg);
6344 dns_rdataset_disassociate(&negsig);
6345 newheader->noqname = noqname;
6346 return (ISC_R_SUCCESS);
6349 dns_rdataset_disassociate(&neg);
6350 dns_rdataset_disassociate(&negsig);
6351 free_noqname(mctx, &noqname);
6355 static inline isc_result_t
6356 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6357 dns_rdataset_t *rdataset)
6359 struct noqname *closest;
6360 isc_mem_t *mctx = rbtdb->common.mctx;
6362 dns_rdataset_t neg, negsig;
6363 isc_result_t result;
6366 dns_name_init(&name, NULL);
6367 dns_rdataset_init(&neg);
6368 dns_rdataset_init(&negsig);
6370 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6371 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6373 closest = isc_mem_get(mctx, sizeof(*closest));
6374 if (closest == NULL) {
6375 result = ISC_R_NOMEMORY;
6378 dns_name_init(&closest->name, NULL);
6379 closest->neg = NULL;
6380 closest->negsig = NULL;
6381 closest->type = neg.type;
6382 result = dns_name_dup(&name, mctx, &closest->name);
6383 if (result != ISC_R_SUCCESS)
6385 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6386 if (result != ISC_R_SUCCESS)
6388 closest->neg = r.base;
6389 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6390 if (result != ISC_R_SUCCESS)
6392 closest->negsig = r.base;
6393 dns_rdataset_disassociate(&neg);
6394 dns_rdataset_disassociate(&negsig);
6395 newheader->closest = closest;
6396 return (ISC_R_SUCCESS);
6399 dns_rdataset_disassociate(&neg);
6400 dns_rdataset_disassociate(&negsig);
6401 free_noqname(mctx, &closest);
6405 static dns_dbmethods_t zone_methods;
6408 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6409 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6410 dns_rdataset_t *addedrdataset)
6412 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6413 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6414 rbtdb_version_t *rbtversion = version;
6415 isc_region_t region;
6416 rdatasetheader_t *newheader;
6417 rdatasetheader_t *header;
6418 isc_result_t result;
6419 isc_boolean_t delegating;
6420 isc_boolean_t newnsec;
6421 isc_boolean_t tree_locked = ISC_FALSE;
6422 isc_boolean_t cache_is_overmem = ISC_FALSE;
6424 REQUIRE(VALID_RBTDB(rbtdb));
6425 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6427 if (rbtdb->common.methods == &zone_methods)
6428 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6429 (rdataset->type == dns_rdatatype_nsec3 ||
6430 rdataset->covers == dns_rdatatype_nsec3)) ||
6431 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6432 rdataset->type != dns_rdatatype_nsec3 &&
6433 rdataset->covers != dns_rdatatype_nsec3)));
6435 if (rbtversion == NULL) {
6437 isc_stdtime_get(&now);
6441 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6442 ®ion, sizeof(rdatasetheader_t));
6443 if (result != ISC_R_SUCCESS)
6446 newheader = (rdatasetheader_t *)region.base;
6447 init_rdataset(rbtdb, newheader);
6448 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6449 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6451 newheader->attributes = 0;
6452 newheader->noqname = NULL;
6453 newheader->closest = NULL;
6454 newheader->count = init_count++;
6455 newheader->trust = rdataset->trust;
6456 newheader->additional_auth = NULL;
6457 newheader->additional_glue = NULL;
6458 newheader->last_used = now;
6459 newheader->node = rbtnode;
6460 if (rbtversion != NULL) {
6461 newheader->serial = rbtversion->serial;
6464 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6465 newheader->attributes |= RDATASET_ATTR_RESIGN;
6466 newheader->resign = rdataset->resign;
6468 newheader->resign = 0;
6470 newheader->serial = 1;
6471 newheader->resign = 0;
6472 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6473 newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6474 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6475 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6476 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6477 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6478 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6479 result = addnoqname(rbtdb, newheader, rdataset);
6480 if (result != ISC_R_SUCCESS) {
6481 free_rdataset(rbtdb, rbtdb->common.mctx,
6486 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6487 result = addclosest(rbtdb, newheader, rdataset);
6488 if (result != ISC_R_SUCCESS) {
6489 free_rdataset(rbtdb, rbtdb->common.mctx,
6497 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6498 * just DNAME for the cache), then we need to set the callback bit
6501 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6502 delegating = ISC_TRUE;
6504 delegating = ISC_FALSE;
6507 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6509 if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6510 rdataset->type == dns_rdatatype_nsec)
6513 newnsec = ISC_FALSE;
6516 * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6517 * or the DB is a cache in an overmem state, hold an exclusive lock on
6518 * the tree. In the latter case the lock does not necessarily have to
6519 * be acquired but it will help purge stale entries more effectively.
6521 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6522 cache_is_overmem = ISC_TRUE;
6523 if (delegating || newnsec || cache_is_overmem) {
6524 tree_locked = ISC_TRUE;
6525 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6528 if (cache_is_overmem)
6529 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6531 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6532 isc_rwlocktype_write);
6534 if (rbtdb->rrsetstats != NULL) {
6535 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6536 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6539 if (IS_CACHE(rbtdb)) {
6541 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6543 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6544 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6545 expire_header(rbtdb, header, tree_locked);
6548 * If we've been holding a write lock on the tree just for
6549 * cleaning, we can release it now. However, we still need the
6552 if (tree_locked && !delegating && !newnsec) {
6553 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6554 tree_locked = ISC_FALSE;
6558 result = ISC_R_SUCCESS;
6560 dns_fixedname_t fname;
6562 dns_rbtnode_t *nsecnode;
6564 dns_fixedname_init(&fname);
6565 name = dns_fixedname_name(&fname);
6566 dns_rbt_fullnamefromnode(rbtnode, name);
6568 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6569 if (result == ISC_R_SUCCESS) {
6570 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6571 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6572 } else if (result == ISC_R_EXISTS) {
6573 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6574 result = ISC_R_SUCCESS;
6578 if (result == ISC_R_SUCCESS)
6579 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6580 ISC_FALSE, addedrdataset, now);
6581 if (result == ISC_R_SUCCESS && delegating)
6582 rbtnode->find_callback = 1;
6584 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6585 isc_rwlocktype_write);
6588 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6591 * Update the zone's secure status. If version is non-NULL
6592 * this is deferred until closeversion() is called.
6594 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6595 iszonesecure(db, version, rbtdb->origin_node);
6601 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6602 dns_rdataset_t *rdataset, unsigned int options,
6603 dns_rdataset_t *newrdataset)
6605 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6606 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6607 rbtdb_version_t *rbtversion = version;
6608 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6609 unsigned char *subresult;
6610 isc_region_t region;
6611 isc_result_t result;
6612 rbtdb_changed_t *changed;
6614 REQUIRE(VALID_RBTDB(rbtdb));
6615 REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
6617 if (rbtdb->common.methods == &zone_methods)
6618 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6619 (rdataset->type == dns_rdatatype_nsec3 ||
6620 rdataset->covers == dns_rdatatype_nsec3)) ||
6621 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6622 rdataset->type != dns_rdatatype_nsec3 &&
6623 rdataset->covers != dns_rdatatype_nsec3)));
6625 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6627 sizeof(rdatasetheader_t));
6628 if (result != ISC_R_SUCCESS)
6630 newheader = (rdatasetheader_t *)region.base;
6631 init_rdataset(rbtdb, newheader);
6632 set_ttl(rbtdb, newheader, rdataset->ttl);
6633 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6635 newheader->attributes = 0;
6636 newheader->serial = rbtversion->serial;
6637 newheader->trust = 0;
6638 newheader->noqname = NULL;
6639 newheader->closest = NULL;
6640 newheader->count = init_count++;
6641 newheader->additional_auth = NULL;
6642 newheader->additional_glue = NULL;
6643 newheader->last_used = 0;
6644 newheader->node = rbtnode;
6645 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6646 newheader->attributes |= RDATASET_ATTR_RESIGN;
6647 newheader->resign = rdataset->resign;
6649 newheader->resign = 0;
6651 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6652 isc_rwlocktype_write);
6654 changed = add_changed(rbtdb, rbtversion, rbtnode);
6655 if (changed == NULL) {
6656 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6657 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6658 isc_rwlocktype_write);
6659 return (ISC_R_NOMEMORY);
6662 topheader_prev = NULL;
6663 for (topheader = rbtnode->data;
6665 topheader = topheader->next) {
6666 if (topheader->type == newheader->type)
6668 topheader_prev = topheader;
6671 * If header isn't NULL, we've found the right type. There may be
6672 * IGNORE rdatasets between the top of the chain and the first real
6673 * data. We skip over them.
6676 while (header != NULL && IGNORE(header))
6677 header = header->down;
6678 if (header != NULL && EXISTS(header)) {
6679 unsigned int flags = 0;
6681 result = ISC_R_SUCCESS;
6682 if ((options & DNS_DBSUB_EXACT) != 0) {
6683 flags |= DNS_RDATASLAB_EXACT;
6684 if (newheader->rdh_ttl != header->rdh_ttl)
6685 result = DNS_R_NOTEXACT;
6687 if (result == ISC_R_SUCCESS)
6688 result = dns_rdataslab_subtract(
6689 (unsigned char *)header,
6690 (unsigned char *)newheader,
6691 (unsigned int)(sizeof(*newheader)),
6693 rbtdb->common.rdclass,
6694 (dns_rdatatype_t)header->type,
6696 if (result == ISC_R_SUCCESS) {
6697 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6698 newheader = (rdatasetheader_t *)subresult;
6699 init_rdataset(rbtdb, newheader);
6701 * We have to set the serial since the rdataslab
6702 * subtraction routine copies the reserved portion of
6703 * header, not newheader.
6705 newheader->serial = rbtversion->serial;
6707 * XXXJT: dns_rdataslab_subtract() copied the pointers
6708 * to additional info. We need to clear these fields
6709 * to avoid having duplicated references.
6711 newheader->additional_auth = NULL;
6712 newheader->additional_glue = NULL;
6713 } else if (result == DNS_R_NXRRSET) {
6715 * This subtraction would remove all of the rdata;
6716 * add a nonexistent header instead.
6718 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6719 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6720 if (newheader == NULL) {
6721 result = ISC_R_NOMEMORY;
6724 set_ttl(rbtdb, newheader, 0);
6725 newheader->type = topheader->type;
6726 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6727 newheader->trust = 0;
6728 newheader->serial = rbtversion->serial;
6729 newheader->noqname = NULL;
6730 newheader->closest = NULL;
6731 newheader->count = 0;
6732 newheader->additional_auth = NULL;
6733 newheader->additional_glue = NULL;
6734 newheader->node = rbtnode;
6735 newheader->resign = 0;
6736 newheader->last_used = 0;
6738 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6743 * If we're here, we want to link newheader in front of
6746 INSIST(rbtversion->serial >= topheader->serial);
6747 if (topheader_prev != NULL)
6748 topheader_prev->next = newheader;
6750 rbtnode->data = newheader;
6751 newheader->next = topheader->next;
6752 newheader->down = topheader;
6753 topheader->next = newheader;
6755 changed->dirty = ISC_TRUE;
6758 * The rdataset doesn't exist, so we don't need to do anything
6759 * to satisfy the deletion request.
6761 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6762 if ((options & DNS_DBSUB_EXACT) != 0)
6763 result = DNS_R_NOTEXACT;
6765 result = DNS_R_UNCHANGED;
6768 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6769 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6772 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6773 isc_rwlocktype_write);
6776 * Update the zone's secure status. If version is non-NULL
6777 * this is deferred until closeversion() is called.
6779 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6780 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6786 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6787 dns_rdatatype_t type, dns_rdatatype_t covers)
6789 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6790 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6791 rbtdb_version_t *rbtversion = version;
6792 isc_result_t result;
6793 rdatasetheader_t *newheader;
6795 REQUIRE(VALID_RBTDB(rbtdb));
6796 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6798 if (type == dns_rdatatype_any)
6799 return (ISC_R_NOTIMPLEMENTED);
6800 if (type == dns_rdatatype_rrsig && covers == 0)
6801 return (ISC_R_NOTIMPLEMENTED);
6803 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6804 if (newheader == NULL)
6805 return (ISC_R_NOMEMORY);
6806 set_ttl(rbtdb, newheader, 0);
6807 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6808 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6809 newheader->trust = 0;
6810 newheader->noqname = NULL;
6811 newheader->closest = NULL;
6812 newheader->additional_auth = NULL;
6813 newheader->additional_glue = NULL;
6814 if (rbtversion != NULL)
6815 newheader->serial = rbtversion->serial;
6817 newheader->serial = 0;
6818 newheader->count = 0;
6819 newheader->last_used = 0;
6820 newheader->node = rbtnode;
6822 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6823 isc_rwlocktype_write);
6825 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6826 ISC_FALSE, NULL, 0);
6828 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6829 isc_rwlocktype_write);
6832 * Update the zone's secure status. If version is non-NULL
6833 * this is deferred until closeversion() is called.
6835 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6836 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6842 * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6845 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6846 isc_boolean_t hasnsec)
6848 isc_result_t noderesult, nsecresult;
6849 dns_rbtnode_t *nsecnode;
6851 noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6854 if (noderesult == ISC_R_SUCCESS)
6855 dns_rpz_cidr_addip(rbtdb->rpz_cidr, name);
6859 return (noderesult);
6860 if (noderesult == ISC_R_EXISTS) {
6862 * Add a node to the auxiliary NSEC tree for an old node
6863 * just now getting an NSEC record.
6865 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6866 return (noderesult);
6867 } else if (noderesult != ISC_R_SUCCESS) {
6868 return (noderesult);
6872 * Build the auxiliary tree for NSECs as we go.
6873 * This tree speeds searches for closest NSECs that would otherwise
6874 * need to examine many irrelevant nodes in large TLDs.
6876 * Add nodes to the auxiliary tree after corresponding nodes have
6877 * been added to the main tree.
6880 nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6881 if (nsecresult == ISC_R_SUCCESS) {
6882 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6883 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6884 return (noderesult);
6887 if (nsecresult == ISC_R_EXISTS) {
6889 isc_log_write(dns_lctx,
6890 DNS_LOGCATEGORY_DATABASE,
6891 DNS_LOGMODULE_CACHE,
6893 "addnode: NSEC node already exists");
6895 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6896 return (noderesult);
6899 nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6900 if (nsecresult != ISC_R_SUCCESS)
6901 isc_log_write(dns_lctx,
6902 DNS_LOGCATEGORY_DATABASE,
6903 DNS_LOGMODULE_CACHE,
6905 "loading_addrdataset: "
6906 "dns_rbt_deletenode: %s after "
6907 "dns_rbt_addnode(NSEC): %s",
6908 isc_result_totext(nsecresult),
6909 isc_result_totext(noderesult));
6910 return (noderesult);
6914 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6915 rbtdb_load_t *loadctx = arg;
6916 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6917 dns_rbtnode_t *node;
6918 isc_result_t result;
6919 isc_region_t region;
6920 rdatasetheader_t *newheader;
6923 * This routine does no node locking. See comments in
6924 * 'load' below for more information on loading and
6930 * SOA records are only allowed at top of zone.
6932 if (rdataset->type == dns_rdatatype_soa &&
6933 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6934 return (DNS_R_NOTZONETOP);
6936 if (rdataset->type != dns_rdatatype_nsec3 &&
6937 rdataset->covers != dns_rdatatype_nsec3)
6938 add_empty_wildcards(rbtdb, name);
6940 if (dns_name_iswildcard(name)) {
6942 * NS record owners cannot legally be wild cards.
6944 if (rdataset->type == dns_rdatatype_ns)
6945 return (DNS_R_INVALIDNS);
6947 * NSEC3 record owners cannot legally be wild cards.
6949 if (rdataset->type == dns_rdatatype_nsec3)
6950 return (DNS_R_INVALIDNSEC3);
6951 result = add_wildcard_magic(rbtdb, name);
6952 if (result != ISC_R_SUCCESS)
6957 if (rdataset->type == dns_rdatatype_nsec3 ||
6958 rdataset->covers == dns_rdatatype_nsec3) {
6959 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6960 if (result == ISC_R_SUCCESS)
6961 node->nsec = DNS_RBT_NSEC_NSEC3;
6962 } else if (rdataset->type == dns_rdatatype_nsec) {
6963 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6965 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6967 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6969 if (result == ISC_R_SUCCESS) {
6970 dns_name_t foundname;
6971 dns_name_init(&foundname, NULL);
6972 dns_rbt_namefromnode(node, &foundname);
6973 #ifdef DNS_RBT_USEHASH
6974 node->locknum = node->hashval % rbtdb->node_lock_count;
6976 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6977 rbtdb->node_lock_count;
6981 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6983 sizeof(rdatasetheader_t));
6984 if (result != ISC_R_SUCCESS)
6986 newheader = (rdatasetheader_t *)region.base;
6987 init_rdataset(rbtdb, newheader);
6988 set_ttl(rbtdb, newheader,
6989 rdataset->ttl + loadctx->now); /* XXX overflow check */
6990 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6992 newheader->attributes = 0;
6993 newheader->trust = rdataset->trust;
6994 newheader->serial = 1;
6995 newheader->noqname = NULL;
6996 newheader->closest = NULL;
6997 newheader->count = init_count++;
6998 newheader->additional_auth = NULL;
6999 newheader->additional_glue = NULL;
7000 newheader->last_used = 0;
7001 newheader->node = node;
7002 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
7003 newheader->attributes |= RDATASET_ATTR_RESIGN;
7004 newheader->resign = rdataset->resign;
7006 newheader->resign = 0;
7008 result = add(rbtdb, node, rbtdb->current_version, newheader,
7009 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
7010 if (result == ISC_R_SUCCESS &&
7011 delegating_type(rbtdb, node, rdataset->type))
7012 node->find_callback = 1;
7013 else if (result == DNS_R_UNCHANGED)
7014 result = ISC_R_SUCCESS;
7020 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
7021 rbtdb_load_t *loadctx;
7024 rbtdb = (dns_rbtdb_t *)db;
7026 REQUIRE(VALID_RBTDB(rbtdb));
7028 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
7029 if (loadctx == NULL)
7030 return (ISC_R_NOMEMORY);
7032 loadctx->rbtdb = rbtdb;
7033 if (IS_CACHE(rbtdb))
7034 isc_stdtime_get(&loadctx->now);
7038 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7040 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7042 rbtdb->attributes |= RBTDB_ATTR_LOADING;
7044 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7046 *addp = loading_addrdataset;
7049 return (ISC_R_SUCCESS);
7053 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
7054 rbtdb_load_t *loadctx;
7055 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7057 REQUIRE(VALID_RBTDB(rbtdb));
7058 REQUIRE(dbloadp != NULL);
7060 REQUIRE(loadctx->rbtdb == rbtdb);
7062 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7064 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7065 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7067 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7068 rbtdb->attributes |= RBTDB_ATTR_LOADED;
7070 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7073 * If there's a KEY rdataset at the zone origin containing a
7074 * zone key, we consider the zone secure.
7076 if (! IS_CACHE(rbtdb))
7077 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7081 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7083 return (ISC_R_SUCCESS);
7087 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7088 dns_masterformat_t masterformat) {
7090 rbtdb_version_t *rbtversion = version;
7092 rbtdb = (dns_rbtdb_t *)db;
7094 REQUIRE(VALID_RBTDB(rbtdb));
7095 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7098 return (dns_master_dump2(rbtdb->common.mctx, db, version,
7099 &dns_master_style_default,
7100 filename, masterformat));
7104 UNUSED(masterformat);
7106 return (ISC_R_NOTIMPLEMENTED);
7111 delete_callback(void *data, void *arg) {
7112 dns_rbtdb_t *rbtdb = arg;
7113 rdatasetheader_t *current, *next;
7114 unsigned int locknum;
7117 locknum = current->node->locknum;
7118 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7119 while (current != NULL) {
7120 next = current->next;
7121 free_rdataset(rbtdb, rbtdb->common.mctx, current);
7124 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7127 static isc_boolean_t
7128 issecure(dns_db_t *db) {
7130 isc_boolean_t secure;
7132 rbtdb = (dns_rbtdb_t *)db;
7134 REQUIRE(VALID_RBTDB(rbtdb));
7136 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7137 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7138 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7143 static isc_boolean_t
7144 isdnssec(dns_db_t *db) {
7146 isc_boolean_t dnssec;
7148 rbtdb = (dns_rbtdb_t *)db;
7150 REQUIRE(VALID_RBTDB(rbtdb));
7152 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7153 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7154 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7160 nodecount(dns_db_t *db) {
7164 rbtdb = (dns_rbtdb_t *)db;
7166 REQUIRE(VALID_RBTDB(rbtdb));
7168 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7169 count = dns_rbt_nodecount(rbtdb->tree);
7170 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7176 settask(dns_db_t *db, isc_task_t *task) {
7179 rbtdb = (dns_rbtdb_t *)db;
7181 REQUIRE(VALID_RBTDB(rbtdb));
7183 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7184 if (rbtdb->task != NULL)
7185 isc_task_detach(&rbtdb->task);
7187 isc_task_attach(task, &rbtdb->task);
7188 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7191 static isc_boolean_t
7192 ispersistent(dns_db_t *db) {
7198 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7199 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7200 dns_rbtnode_t *onode;
7201 isc_result_t result = ISC_R_SUCCESS;
7203 REQUIRE(VALID_RBTDB(rbtdb));
7204 REQUIRE(nodep != NULL && *nodep == NULL);
7206 /* Note that the access to origin_node doesn't require a DB lock */
7207 onode = (dns_rbtnode_t *)rbtdb->origin_node;
7208 if (onode != NULL) {
7209 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7210 new_reference(rbtdb, onode);
7211 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7213 *nodep = rbtdb->origin_node;
7215 INSIST(IS_CACHE(rbtdb));
7216 result = ISC_R_NOTFOUND;
7223 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7224 isc_uint8_t *flags, isc_uint16_t *iterations,
7225 unsigned char *salt, size_t *salt_length)
7228 isc_result_t result = ISC_R_NOTFOUND;
7229 rbtdb_version_t *rbtversion = version;
7231 rbtdb = (dns_rbtdb_t *)db;
7233 REQUIRE(VALID_RBTDB(rbtdb));
7234 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7236 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7238 if (rbtversion == NULL)
7239 rbtversion = rbtdb->current_version;
7241 if (rbtversion->havensec3) {
7243 *hash = rbtversion->hash;
7244 if (salt != NULL && salt_length != NULL) {
7245 REQUIRE(*salt_length >= rbtversion->salt_length);
7246 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
7248 if (salt_length != NULL)
7249 *salt_length = rbtversion->salt_length;
7250 if (iterations != NULL)
7251 *iterations = rbtversion->iterations;
7253 *flags = rbtversion->flags;
7254 result = ISC_R_SUCCESS;
7256 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7262 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7263 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7264 isc_stdtime_t oldresign;
7265 isc_result_t result = ISC_R_SUCCESS;
7266 rdatasetheader_t *header;
7268 REQUIRE(VALID_RBTDB(rbtdb));
7269 REQUIRE(!IS_CACHE(rbtdb));
7270 REQUIRE(rdataset != NULL);
7272 header = rdataset->private3;
7275 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7276 isc_rwlocktype_write);
7278 oldresign = header->resign;
7279 header->resign = resign;
7280 if (header->heap_index != 0) {
7281 INSIST(RESIGN(header));
7283 isc_heap_delete(rbtdb->heaps[header->node->locknum],
7284 header->heap_index);
7285 header->heap_index = 0;
7286 } else if (resign < oldresign)
7287 isc_heap_increased(rbtdb->heaps[header->node->locknum],
7288 header->heap_index);
7289 else if (resign > oldresign)
7290 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7291 header->heap_index);
7292 } else if (resign && header->heap_index == 0) {
7293 header->attributes |= RDATASET_ATTR_RESIGN;
7294 result = resign_insert(rbtdb, header->node->locknum, header);
7296 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7297 isc_rwlocktype_write);
7302 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7303 dns_name_t *foundname)
7305 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7306 rdatasetheader_t *header = NULL, *this;
7308 isc_result_t result = ISC_R_NOTFOUND;
7309 unsigned int locknum;
7311 REQUIRE(VALID_RBTDB(rbtdb));
7313 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7315 for (i = 0; i < rbtdb->node_lock_count; i++) {
7316 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7317 this = isc_heap_element(rbtdb->heaps[i], 1);
7319 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7320 isc_rwlocktype_read);
7325 else if (isc_serial_lt(this->resign, header->resign)) {
7326 locknum = header->node->locknum;
7327 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7328 isc_rwlocktype_read);
7331 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7332 isc_rwlocktype_read);
7338 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7340 if (foundname != NULL)
7341 dns_rbt_fullnamefromnode(header->node, foundname);
7343 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7344 isc_rwlocktype_read);
7346 result = ISC_R_SUCCESS;
7349 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7355 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7357 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7358 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7359 dns_rbtnode_t *node;
7360 rdatasetheader_t *header;
7362 REQUIRE(VALID_RBTDB(rbtdb));
7363 REQUIRE(rdataset != NULL);
7364 REQUIRE(rdataset->methods == &rdataset_methods);
7365 REQUIRE(rbtdb->future_version == rbtversion);
7366 REQUIRE(rbtversion != NULL);
7367 REQUIRE(rbtversion->writer);
7368 REQUIRE(rbtversion->rbtdb == rbtdb);
7370 node = rdataset->private2;
7371 INSIST(node != NULL);
7372 header = rdataset->private3;
7373 INSIST(header != NULL);
7376 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7377 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7378 isc_rwlocktype_write);
7380 * Delete from heap and save to re-signed list so that it can
7381 * be restored if we backout of this change.
7383 new_reference(rbtdb, node);
7384 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7385 header->heap_index = 0;
7386 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7388 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7389 isc_rwlocktype_write);
7390 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7393 static dns_stats_t *
7394 getrrsetstats(dns_db_t *db) {
7395 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7397 REQUIRE(VALID_RBTDB(rbtdb));
7398 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7400 return (rbtdb->rrsetstats);
7403 static dns_dbmethods_t zone_methods = {
7449 static dns_dbmethods_t cache_methods = {
7491 #ifdef DNS_RBTDB_VERSION64
7496 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7497 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7498 void *driverarg, dns_db_t **dbp)
7501 isc_result_t result;
7504 isc_boolean_t (*sooner)(void *, void *);
7505 isc_mem_t *hmctx = mctx;
7507 /* Keep the compiler happy. */
7510 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7512 return (ISC_R_NOMEMORY);
7515 * If argv[0] exists, it points to a memory context to use for heap
7518 hmctx = (isc_mem_t *) argv[0];
7520 memset(rbtdb, '\0', sizeof(*rbtdb));
7521 dns_name_init(&rbtdb->common.origin, NULL);
7522 rbtdb->common.attributes = 0;
7523 if (type == dns_dbtype_cache) {
7524 rbtdb->common.methods = &cache_methods;
7525 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7526 } else if (type == dns_dbtype_stub) {
7527 rbtdb->common.methods = &zone_methods;
7528 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7530 rbtdb->common.methods = &zone_methods;
7531 rbtdb->common.rdclass = rdclass;
7532 rbtdb->common.mctx = NULL;
7534 result = RBTDB_INITLOCK(&rbtdb->lock);
7535 if (result != ISC_R_SUCCESS)
7538 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7539 if (result != ISC_R_SUCCESS)
7543 * Initialize node_lock_count in a generic way to support future
7544 * extension which allows the user to specify this value on creation.
7545 * Note that when specified for a cache DB it must be larger than 1
7546 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7548 if (rbtdb->node_lock_count == 0) {
7549 if (IS_CACHE(rbtdb))
7550 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7552 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7553 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7554 result = ISC_R_RANGE;
7555 goto cleanup_tree_lock;
7557 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7558 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7559 sizeof(rbtdb_nodelock_t));
7560 if (rbtdb->node_locks == NULL) {
7561 result = ISC_R_NOMEMORY;
7562 goto cleanup_tree_lock;
7565 rbtdb->rrsetstats = NULL;
7566 if (IS_CACHE(rbtdb)) {
7567 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7568 if (result != ISC_R_SUCCESS)
7569 goto cleanup_node_locks;
7570 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7571 sizeof(rdatasetheaderlist_t));
7572 if (rbtdb->rdatasets == NULL) {
7573 result = ISC_R_NOMEMORY;
7574 goto cleanup_rrsetstats;
7576 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7577 ISC_LIST_INIT(rbtdb->rdatasets[i]);
7579 rbtdb->rdatasets = NULL;
7584 rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7585 sizeof(isc_heap_t *));
7586 if (rbtdb->heaps == NULL) {
7587 result = ISC_R_NOMEMORY;
7588 goto cleanup_rdatasets;
7590 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7591 rbtdb->heaps[i] = NULL;
7592 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7593 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7594 result = isc_heap_create(hmctx, sooner, set_index, 0,
7596 if (result != ISC_R_SUCCESS)
7601 * Create deadnode lists.
7603 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7604 sizeof(rbtnodelist_t));
7605 if (rbtdb->deadnodes == NULL) {
7606 result = ISC_R_NOMEMORY;
7609 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7610 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7612 rbtdb->active = rbtdb->node_lock_count;
7614 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7615 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7616 if (result == ISC_R_SUCCESS) {
7617 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7618 if (result != ISC_R_SUCCESS)
7619 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7621 if (result != ISC_R_SUCCESS) {
7623 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7624 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7625 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7627 goto cleanup_deadnodes;
7629 rbtdb->node_locks[i].exiting = ISC_FALSE;
7633 * Attach to the mctx. The database will persist so long as there
7634 * are references to it, and attaching to the mctx ensures that our
7635 * mctx won't disappear out from under us.
7637 isc_mem_attach(mctx, &rbtdb->common.mctx);
7638 isc_mem_attach(hmctx, &rbtdb->hmctx);
7641 * Must be initialized before free_rbtdb() is called.
7643 isc_ondestroy_init(&rbtdb->common.ondest);
7646 * Make a copy of the origin name.
7648 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7649 if (result != ISC_R_SUCCESS) {
7650 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7655 * Make the Red-Black Trees.
7657 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7658 if (result != ISC_R_SUCCESS) {
7659 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7663 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7664 if (result != ISC_R_SUCCESS) {
7665 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7669 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7670 if (result != ISC_R_SUCCESS) {
7671 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7677 * Get ready for response policy IP address searching if at least one
7678 * zone has been configured as a response policy zone and this
7679 * is not a cache zone.
7680 * It would be better to know that this database is for a policy
7681 * zone named for a view, but that would require knowledge from
7682 * above such as an argv[] set from data in the zone.
7684 if (type == dns_dbtype_zone && !dns_name_equal(origin, dns_rootname)) {
7685 result = dns_rpz_new_cidr(mctx, origin, &rbtdb->rpz_cidr);
7686 if (result != ISC_R_SUCCESS) {
7687 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7694 * In order to set the node callback bit correctly in zone databases,
7695 * we need to know if the node has the origin name of the zone.
7696 * In loading_addrdataset() we could simply compare the new name
7697 * to the origin name, but this is expensive. Also, we don't know the
7698 * node name in addrdataset(), so we need another way of knowing the
7701 * We now explicitly create a node for the zone's origin, and then
7702 * we simply remember the node's address. This is safe, because
7703 * the top-of-zone node can never be deleted, nor can its address
7706 if (!IS_CACHE(rbtdb)) {
7707 dns_rbtnode_t *nsec3node;
7709 rbtdb->origin_node = NULL;
7710 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7711 &rbtdb->origin_node);
7712 if (result != ISC_R_SUCCESS) {
7713 INSIST(result != ISC_R_EXISTS);
7714 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7717 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7719 * We need to give the origin node the right locknum.
7721 dns_name_init(&name, NULL);
7722 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7723 #ifdef DNS_RBT_USEHASH
7724 rbtdb->origin_node->locknum =
7725 rbtdb->origin_node->hashval %
7726 rbtdb->node_lock_count;
7728 rbtdb->origin_node->locknum =
7729 dns_name_hash(&name, ISC_TRUE) %
7730 rbtdb->node_lock_count;
7733 * Add an apex node to the NSEC3 tree so that NSEC3 searches
7734 * return partial matches when there is only a single NSEC3
7735 * record in the tree.
7738 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7740 if (result != ISC_R_SUCCESS) {
7741 INSIST(result != ISC_R_EXISTS);
7742 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7745 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
7747 * We need to give the nsec3 origin node the right locknum.
7749 dns_name_init(&name, NULL);
7750 dns_rbt_namefromnode(nsec3node, &name);
7751 #ifdef DNS_RBT_USEHASH
7752 nsec3node->locknum = nsec3node->hashval %
7753 rbtdb->node_lock_count;
7755 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7756 rbtdb->node_lock_count;
7761 * Misc. Initialization.
7763 result = isc_refcount_init(&rbtdb->references, 1);
7764 if (result != ISC_R_SUCCESS) {
7765 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7768 rbtdb->attributes = 0;
7772 * Version Initialization.
7774 rbtdb->current_serial = 1;
7775 rbtdb->least_serial = 1;
7776 rbtdb->next_serial = 2;
7777 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7778 if (rbtdb->current_version == NULL) {
7779 isc_refcount_decrement(&rbtdb->references, NULL);
7780 isc_refcount_destroy(&rbtdb->references);
7781 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7782 return (ISC_R_NOMEMORY);
7784 rbtdb->current_version->rbtdb = rbtdb;
7785 rbtdb->current_version->secure = dns_db_insecure;
7786 rbtdb->current_version->havensec3 = ISC_FALSE;
7787 rbtdb->current_version->flags = 0;
7788 rbtdb->current_version->iterations = 0;
7789 rbtdb->current_version->hash = 0;
7790 rbtdb->current_version->salt_length = 0;
7791 memset(rbtdb->current_version->salt, 0,
7792 sizeof(rbtdb->current_version->salt));
7793 rbtdb->future_version = NULL;
7794 ISC_LIST_INIT(rbtdb->open_versions);
7796 * Keep the current version in the open list so that list operation
7797 * won't happen in normal lookup operations.
7799 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7801 rbtdb->common.magic = DNS_DB_MAGIC;
7802 rbtdb->common.impmagic = RBTDB_MAGIC;
7804 *dbp = (dns_db_t *)rbtdb;
7806 return (ISC_R_SUCCESS);
7809 isc_mem_put(mctx, rbtdb->deadnodes,
7810 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7813 if (rbtdb->heaps != NULL) {
7814 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7815 if (rbtdb->heaps[i] != NULL)
7816 isc_heap_destroy(&rbtdb->heaps[i]);
7817 isc_mem_put(hmctx, rbtdb->heaps,
7818 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7822 if (rbtdb->rdatasets != NULL)
7823 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7824 sizeof(rdatasetheaderlist_t));
7826 if (rbtdb->rrsetstats != NULL)
7827 dns_stats_detach(&rbtdb->rrsetstats);
7830 isc_mem_put(mctx, rbtdb->node_locks,
7831 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7834 isc_rwlock_destroy(&rbtdb->tree_lock);
7837 RBTDB_DESTROYLOCK(&rbtdb->lock);
7840 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7846 * Slabbed Rdataset Methods
7850 rdataset_disassociate(dns_rdataset_t *rdataset) {
7851 dns_db_t *db = rdataset->private1;
7852 dns_dbnode_t *node = rdataset->private2;
7854 detachnode(db, &node);
7858 rdataset_first(dns_rdataset_t *rdataset) {
7859 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7862 count = raw[0] * 256 + raw[1];
7864 rdataset->private5 = NULL;
7865 return (ISC_R_NOMORE);
7868 #if DNS_RDATASET_FIXED
7869 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7870 raw += 2 + (4 * count);
7876 * The privateuint4 field is the number of rdata beyond the
7877 * cursor position, so we decrement the total count by one
7878 * before storing it.
7880 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7881 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7882 * to the first entry in the offset table.
7885 rdataset->privateuint4 = count;
7886 rdataset->private5 = raw;
7888 return (ISC_R_SUCCESS);
7892 rdataset_next(dns_rdataset_t *rdataset) {
7894 unsigned int length;
7895 unsigned char *raw; /* RDATASLAB */
7897 count = rdataset->privateuint4;
7899 return (ISC_R_NOMORE);
7901 rdataset->privateuint4 = count;
7904 * Skip forward one record (length + 4) or one offset (4).
7906 raw = rdataset->private5;
7907 #if DNS_RDATASET_FIXED
7908 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7910 length = raw[0] * 256 + raw[1];
7912 #if DNS_RDATASET_FIXED
7914 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7916 rdataset->private5 = raw + 2; /* length(2) */
7919 return (ISC_R_SUCCESS);
7923 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7924 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7925 #if DNS_RDATASET_FIXED
7926 unsigned int offset;
7928 unsigned int length;
7930 unsigned int flags = 0;
7932 REQUIRE(raw != NULL);
7935 * Find the start of the record if not already in private5
7936 * then skip the length and order fields.
7938 #if DNS_RDATASET_FIXED
7939 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7940 offset = (raw[0] << 24) + (raw[1] << 16) +
7941 (raw[2] << 8) + raw[3];
7942 raw = rdataset->private3;
7946 length = raw[0] * 256 + raw[1];
7947 #if DNS_RDATASET_FIXED
7952 if (rdataset->type == dns_rdatatype_rrsig) {
7953 if (*raw & DNS_RDATASLAB_OFFLINE)
7954 flags |= DNS_RDATA_OFFLINE;
7960 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7961 rdata->flags |= flags;
7965 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7966 dns_db_t *db = source->private1;
7967 dns_dbnode_t *node = source->private2;
7968 dns_dbnode_t *cloned_node = NULL;
7970 attachnode(db, node, &cloned_node);
7974 * Reset iterator state.
7976 target->privateuint4 = 0;
7977 target->private5 = NULL;
7981 rdataset_count(dns_rdataset_t *rdataset) {
7982 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7985 count = raw[0] * 256 + raw[1];
7991 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7992 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7994 dns_db_t *db = rdataset->private1;
7995 dns_dbnode_t *node = rdataset->private2;
7996 dns_dbnode_t *cloned_node;
7997 struct noqname *noqname = rdataset->private6;
8000 attachnode(db, node, &cloned_node);
8001 nsec->methods = &rdataset_methods;
8002 nsec->rdclass = db->rdclass;
8003 nsec->type = noqname->type;
8005 nsec->ttl = rdataset->ttl;
8006 nsec->trust = rdataset->trust;
8007 nsec->private1 = rdataset->private1;
8008 nsec->private2 = rdataset->private2;
8009 nsec->private3 = noqname->neg;
8010 nsec->privateuint4 = 0;
8011 nsec->private5 = NULL;
8012 nsec->private6 = NULL;
8013 nsec->private7 = NULL;
8016 attachnode(db, node, &cloned_node);
8017 nsecsig->methods = &rdataset_methods;
8018 nsecsig->rdclass = db->rdclass;
8019 nsecsig->type = dns_rdatatype_rrsig;
8020 nsecsig->covers = noqname->type;
8021 nsecsig->ttl = rdataset->ttl;
8022 nsecsig->trust = rdataset->trust;
8023 nsecsig->private1 = rdataset->private1;
8024 nsecsig->private2 = rdataset->private2;
8025 nsecsig->private3 = noqname->negsig;
8026 nsecsig->privateuint4 = 0;
8027 nsecsig->private5 = NULL;
8028 nsec->private6 = NULL;
8029 nsec->private7 = NULL;
8031 dns_name_clone(&noqname->name, name);
8033 return (ISC_R_SUCCESS);
8037 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
8038 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8040 dns_db_t *db = rdataset->private1;
8041 dns_dbnode_t *node = rdataset->private2;
8042 dns_dbnode_t *cloned_node;
8043 struct noqname *closest = rdataset->private7;
8046 attachnode(db, node, &cloned_node);
8047 nsec->methods = &rdataset_methods;
8048 nsec->rdclass = db->rdclass;
8049 nsec->type = closest->type;
8051 nsec->ttl = rdataset->ttl;
8052 nsec->trust = rdataset->trust;
8053 nsec->private1 = rdataset->private1;
8054 nsec->private2 = rdataset->private2;
8055 nsec->private3 = closest->neg;
8056 nsec->privateuint4 = 0;
8057 nsec->private5 = NULL;
8058 nsec->private6 = NULL;
8059 nsec->private7 = NULL;
8062 attachnode(db, node, &cloned_node);
8063 nsecsig->methods = &rdataset_methods;
8064 nsecsig->rdclass = db->rdclass;
8065 nsecsig->type = dns_rdatatype_rrsig;
8066 nsecsig->covers = closest->type;
8067 nsecsig->ttl = rdataset->ttl;
8068 nsecsig->trust = rdataset->trust;
8069 nsecsig->private1 = rdataset->private1;
8070 nsecsig->private2 = rdataset->private2;
8071 nsecsig->private3 = closest->negsig;
8072 nsecsig->privateuint4 = 0;
8073 nsecsig->private5 = NULL;
8074 nsec->private6 = NULL;
8075 nsec->private7 = NULL;
8077 dns_name_clone(&closest->name, name);
8079 return (ISC_R_SUCCESS);
8083 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8084 dns_rbtdb_t *rbtdb = rdataset->private1;
8085 dns_rbtnode_t *rbtnode = rdataset->private2;
8086 rdatasetheader_t *header = rdataset->private3;
8089 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8090 isc_rwlocktype_write);
8091 header->trust = rdataset->trust = trust;
8092 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8093 isc_rwlocktype_write);
8097 rdataset_expire(dns_rdataset_t *rdataset) {
8098 dns_rbtdb_t *rbtdb = rdataset->private1;
8099 dns_rbtnode_t *rbtnode = rdataset->private2;
8100 rdatasetheader_t *header = rdataset->private3;
8103 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8104 isc_rwlocktype_write);
8105 expire_header(rbtdb, header, ISC_FALSE);
8106 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8107 isc_rwlocktype_write);
8111 * Rdataset Iterator Methods
8115 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8116 rbtdb_rdatasetiter_t *rbtiterator;
8118 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8120 if (rbtiterator->common.version != NULL)
8121 closeversion(rbtiterator->common.db,
8122 &rbtiterator->common.version, ISC_FALSE);
8123 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8124 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8125 sizeof(*rbtiterator));
8131 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8132 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8133 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8134 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8135 rbtdb_version_t *rbtversion = rbtiterator->common.version;
8136 rdatasetheader_t *header, *top_next;
8137 rbtdb_serial_t serial;
8140 if (IS_CACHE(rbtdb)) {
8142 now = rbtiterator->common.now;
8144 serial = rbtversion->serial;
8148 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8149 isc_rwlocktype_read);
8151 for (header = rbtnode->data; header != NULL; header = top_next) {
8152 top_next = header->next;
8154 if (header->serial <= serial && !IGNORE(header)) {
8156 * Is this a "this rdataset doesn't exist"
8157 * record? Or is it too old in the cache?
8159 * Note: unlike everywhere else, we
8160 * check for now > header->rdh_ttl instead
8161 * of now >= header->rdh_ttl. This allows
8162 * ANY and RRSIG queries for 0 TTL
8163 * rdatasets to work.
8165 if (NONEXISTENT(header) ||
8166 (now != 0 && now > header->rdh_ttl))
8170 header = header->down;
8171 } while (header != NULL);
8176 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8177 isc_rwlocktype_read);
8179 rbtiterator->current = header;
8182 return (ISC_R_NOMORE);
8184 return (ISC_R_SUCCESS);
8188 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8189 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8190 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8191 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8192 rbtdb_version_t *rbtversion = rbtiterator->common.version;
8193 rdatasetheader_t *header, *top_next;
8194 rbtdb_serial_t serial;
8196 rbtdb_rdatatype_t type, negtype;
8197 dns_rdatatype_t rdtype, covers;
8199 header = rbtiterator->current;
8201 return (ISC_R_NOMORE);
8203 if (IS_CACHE(rbtdb)) {
8205 now = rbtiterator->common.now;
8207 serial = rbtversion->serial;
8211 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8212 isc_rwlocktype_read);
8214 type = header->type;
8215 rdtype = RBTDB_RDATATYPE_BASE(header->type);
8216 if (NEGATIVE(header)) {
8217 covers = RBTDB_RDATATYPE_EXT(header->type);
8218 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8220 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8221 for (header = header->next; header != NULL; header = top_next) {
8222 top_next = header->next;
8224 * If not walking back up the down list.
8226 if (header->type != type && header->type != negtype) {
8228 if (header->serial <= serial &&
8231 * Is this a "this rdataset doesn't
8234 * Note: unlike everywhere else, we
8235 * check for now > header->ttl instead
8236 * of now >= header->ttl. This allows
8237 * ANY and RRSIG queries for 0 TTL
8238 * rdatasets to work.
8240 if ((header->attributes &
8241 RDATASET_ATTR_NONEXISTENT) != 0 ||
8242 (now != 0 && now > header->rdh_ttl))
8246 header = header->down;
8247 } while (header != NULL);
8253 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8254 isc_rwlocktype_read);
8256 rbtiterator->current = header;
8259 return (ISC_R_NOMORE);
8261 return (ISC_R_SUCCESS);
8265 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8266 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8267 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8268 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8269 rdatasetheader_t *header;
8271 header = rbtiterator->current;
8272 REQUIRE(header != NULL);
8274 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8275 isc_rwlocktype_read);
8277 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8280 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8281 isc_rwlocktype_read);
8286 * Database Iterator Methods
8290 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8291 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8292 dns_rbtnode_t *node = rbtdbiter->node;
8297 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8298 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8302 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8303 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8304 dns_rbtnode_t *node = rbtdbiter->node;
8310 lock = &rbtdb->node_locks[node->locknum].lock;
8311 NODE_LOCK(lock, isc_rwlocktype_read);
8312 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8313 rbtdbiter->tree_locked, ISC_FALSE);
8314 NODE_UNLOCK(lock, isc_rwlocktype_read);
8316 rbtdbiter->node = NULL;
8320 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8321 dns_rbtnode_t *node;
8322 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8323 isc_boolean_t was_read_locked = ISC_FALSE;
8327 if (rbtdbiter->delete != 0) {
8329 * Note that "%d node of %d in tree" can report things like
8330 * "flush_deletions: 59 nodes of 41 in tree". This means
8331 * That some nodes appear on the deletions list more than
8332 * once. Only the last occurence will actually be deleted.
8334 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8335 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8336 "flush_deletions: %d nodes of %d in tree",
8338 dns_rbt_nodecount(rbtdb->tree));
8340 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8341 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8342 was_read_locked = ISC_TRUE;
8344 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8345 rbtdbiter->tree_locked = isc_rwlocktype_write;
8347 for (i = 0; i < rbtdbiter->delete; i++) {
8348 node = rbtdbiter->deletions[i];
8349 lock = &rbtdb->node_locks[node->locknum].lock;
8351 NODE_LOCK(lock, isc_rwlocktype_read);
8352 decrement_reference(rbtdb, node, 0,
8353 isc_rwlocktype_read,
8354 rbtdbiter->tree_locked, ISC_FALSE);
8355 NODE_UNLOCK(lock, isc_rwlocktype_read);
8358 rbtdbiter->delete = 0;
8360 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8361 if (was_read_locked) {
8362 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8363 rbtdbiter->tree_locked = isc_rwlocktype_read;
8366 rbtdbiter->tree_locked = isc_rwlocktype_none;
8372 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8373 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8375 REQUIRE(rbtdbiter->paused);
8376 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8378 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8379 rbtdbiter->tree_locked = isc_rwlocktype_read;
8381 rbtdbiter->paused = ISC_FALSE;
8385 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8386 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8387 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8388 dns_db_t *db = NULL;
8390 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8391 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8392 rbtdbiter->tree_locked = isc_rwlocktype_none;
8394 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8396 dereference_iter_node(rbtdbiter);
8398 flush_deletions(rbtdbiter);
8400 dns_db_attach(rbtdbiter->common.db, &db);
8401 dns_db_detach(&rbtdbiter->common.db);
8403 dns_rbtnodechain_reset(&rbtdbiter->chain);
8404 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8405 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8412 dbiterator_first(dns_dbiterator_t *iterator) {
8413 isc_result_t result;
8414 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8415 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8416 dns_name_t *name, *origin;
8418 if (rbtdbiter->result != ISC_R_SUCCESS &&
8419 rbtdbiter->result != ISC_R_NOMORE)
8420 return (rbtdbiter->result);
8422 if (rbtdbiter->paused)
8423 resume_iteration(rbtdbiter);
8425 dereference_iter_node(rbtdbiter);
8427 name = dns_fixedname_name(&rbtdbiter->name);
8428 origin = dns_fixedname_name(&rbtdbiter->origin);
8429 dns_rbtnodechain_reset(&rbtdbiter->chain);
8430 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8432 if (rbtdbiter->nsec3only) {
8433 rbtdbiter->current = &rbtdbiter->nsec3chain;
8434 result = dns_rbtnodechain_first(rbtdbiter->current,
8435 rbtdb->nsec3, name, origin);
8437 rbtdbiter->current = &rbtdbiter->chain;
8438 result = dns_rbtnodechain_first(rbtdbiter->current,
8439 rbtdb->tree, name, origin);
8440 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8441 rbtdbiter->current = &rbtdbiter->nsec3chain;
8442 result = dns_rbtnodechain_first(rbtdbiter->current,
8447 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8448 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8449 NULL, &rbtdbiter->node);
8450 if (result == ISC_R_SUCCESS) {
8451 rbtdbiter->new_origin = ISC_TRUE;
8452 reference_iter_node(rbtdbiter);
8455 INSIST(result == ISC_R_NOTFOUND);
8456 result = ISC_R_NOMORE; /* The tree is empty. */
8459 rbtdbiter->result = result;
8465 dbiterator_last(dns_dbiterator_t *iterator) {
8466 isc_result_t result;
8467 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8468 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8469 dns_name_t *name, *origin;
8471 if (rbtdbiter->result != ISC_R_SUCCESS &&
8472 rbtdbiter->result != ISC_R_NOMORE)
8473 return (rbtdbiter->result);
8475 if (rbtdbiter->paused)
8476 resume_iteration(rbtdbiter);
8478 dereference_iter_node(rbtdbiter);
8480 name = dns_fixedname_name(&rbtdbiter->name);
8481 origin = dns_fixedname_name(&rbtdbiter->origin);
8482 dns_rbtnodechain_reset(&rbtdbiter->chain);
8483 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8485 result = ISC_R_NOTFOUND;
8486 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8487 rbtdbiter->current = &rbtdbiter->nsec3chain;
8488 result = dns_rbtnodechain_last(rbtdbiter->current,
8489 rbtdb->nsec3, name, origin);
8491 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8492 rbtdbiter->current = &rbtdbiter->chain;
8493 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8496 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8497 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8498 NULL, &rbtdbiter->node);
8499 if (result == ISC_R_SUCCESS) {
8500 rbtdbiter->new_origin = ISC_TRUE;
8501 reference_iter_node(rbtdbiter);
8504 INSIST(result == ISC_R_NOTFOUND);
8505 result = ISC_R_NOMORE; /* The tree is empty. */
8508 rbtdbiter->result = result;
8514 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8515 isc_result_t result, tresult;
8516 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8517 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8518 dns_name_t *iname, *origin;
8520 if (rbtdbiter->result != ISC_R_SUCCESS &&
8521 rbtdbiter->result != ISC_R_NOTFOUND &&
8522 rbtdbiter->result != ISC_R_NOMORE)
8523 return (rbtdbiter->result);
8525 if (rbtdbiter->paused)
8526 resume_iteration(rbtdbiter);
8528 dereference_iter_node(rbtdbiter);
8530 iname = dns_fixedname_name(&rbtdbiter->name);
8531 origin = dns_fixedname_name(&rbtdbiter->origin);
8532 dns_rbtnodechain_reset(&rbtdbiter->chain);
8533 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8535 if (rbtdbiter->nsec3only) {
8536 rbtdbiter->current = &rbtdbiter->nsec3chain;
8537 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8540 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8541 } else if (rbtdbiter->nonsec3) {
8542 rbtdbiter->current = &rbtdbiter->chain;
8543 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8546 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8549 * Stay on main chain if not found on either chain.
8551 rbtdbiter->current = &rbtdbiter->chain;
8552 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8555 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8556 if (result == DNS_R_PARTIALMATCH) {
8557 dns_rbtnode_t *node = NULL;
8558 tresult = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8559 &node, &rbtdbiter->nsec3chain,
8560 DNS_RBTFIND_EMPTYDATA,
8562 if (tresult == ISC_R_SUCCESS) {
8563 rbtdbiter->node = node;
8564 rbtdbiter->current = &rbtdbiter->nsec3chain;
8571 if (result == ISC_R_SUCCESS) {
8572 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8574 if (result == ISC_R_SUCCESS) {
8575 rbtdbiter->new_origin = ISC_TRUE;
8576 reference_iter_node(rbtdbiter);
8578 } else if (result == DNS_R_PARTIALMATCH) {
8579 result = ISC_R_NOTFOUND;
8580 rbtdbiter->node = NULL;
8583 rbtdbiter->result = result;
8585 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8586 isc_result_t tresult;
8587 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8589 if (tresult == ISC_R_SUCCESS) {
8590 rbtdbiter->new_origin = ISC_TRUE;
8591 reference_iter_node(rbtdbiter);
8594 rbtdbiter->node = NULL;
8597 rbtdbiter->node = NULL;
8599 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8600 ISC_R_SUCCESS : result;
8607 dbiterator_prev(dns_dbiterator_t *iterator) {
8608 isc_result_t result;
8609 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8610 dns_name_t *name, *origin;
8611 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8613 REQUIRE(rbtdbiter->node != NULL);
8615 if (rbtdbiter->result != ISC_R_SUCCESS)
8616 return (rbtdbiter->result);
8618 if (rbtdbiter->paused)
8619 resume_iteration(rbtdbiter);
8621 name = dns_fixedname_name(&rbtdbiter->name);
8622 origin = dns_fixedname_name(&rbtdbiter->origin);
8623 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8624 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8625 !rbtdbiter->nonsec3 &&
8626 &rbtdbiter->nsec3chain == rbtdbiter->current) {
8627 rbtdbiter->current = &rbtdbiter->chain;
8628 dns_rbtnodechain_reset(rbtdbiter->current);
8629 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8631 if (result == ISC_R_NOTFOUND)
8632 result = ISC_R_NOMORE;
8635 dereference_iter_node(rbtdbiter);
8637 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8638 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8639 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8640 NULL, &rbtdbiter->node);
8643 if (result == ISC_R_SUCCESS)
8644 reference_iter_node(rbtdbiter);
8646 rbtdbiter->result = result;
8652 dbiterator_next(dns_dbiterator_t *iterator) {
8653 isc_result_t result;
8654 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8655 dns_name_t *name, *origin;
8656 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8658 REQUIRE(rbtdbiter->node != NULL);
8660 if (rbtdbiter->result != ISC_R_SUCCESS)
8661 return (rbtdbiter->result);
8663 if (rbtdbiter->paused)
8664 resume_iteration(rbtdbiter);
8666 name = dns_fixedname_name(&rbtdbiter->name);
8667 origin = dns_fixedname_name(&rbtdbiter->origin);
8668 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8669 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8670 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8671 rbtdbiter->current = &rbtdbiter->nsec3chain;
8672 dns_rbtnodechain_reset(rbtdbiter->current);
8673 result = dns_rbtnodechain_first(rbtdbiter->current,
8674 rbtdb->nsec3, name, origin);
8675 if (result == ISC_R_NOTFOUND)
8676 result = ISC_R_NOMORE;
8679 dereference_iter_node(rbtdbiter);
8681 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8682 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8683 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8684 NULL, &rbtdbiter->node);
8686 if (result == ISC_R_SUCCESS)
8687 reference_iter_node(rbtdbiter);
8689 rbtdbiter->result = result;
8695 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8698 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8699 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8700 dns_rbtnode_t *node = rbtdbiter->node;
8701 isc_result_t result;
8702 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8703 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8705 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8706 REQUIRE(rbtdbiter->node != NULL);
8708 if (rbtdbiter->paused)
8709 resume_iteration(rbtdbiter);
8712 if (rbtdbiter->common.relative_names)
8714 result = dns_name_concatenate(nodename, origin, name, NULL);
8715 if (result != ISC_R_SUCCESS)
8717 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8718 result = DNS_R_NEWORIGIN;
8720 result = ISC_R_SUCCESS;
8722 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8723 new_reference(rbtdb, node);
8724 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8726 *nodep = rbtdbiter->node;
8728 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8729 isc_result_t expire_result;
8732 * If the deletion array is full, flush it before trying
8733 * to expire the current node. The current node can't
8734 * fully deleted while the iteration cursor is still on it.
8736 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8737 flush_deletions(rbtdbiter);
8739 expire_result = expirenode(iterator->db, *nodep, 0);
8742 * expirenode() currently always returns success.
8744 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8747 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8748 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8749 dns_rbtnode_refincrement(node, &refs);
8751 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8759 dbiterator_pause(dns_dbiterator_t *iterator) {
8760 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8761 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8763 if (rbtdbiter->result != ISC_R_SUCCESS &&
8764 rbtdbiter->result != ISC_R_NOMORE)
8765 return (rbtdbiter->result);
8767 if (rbtdbiter->paused)
8768 return (ISC_R_SUCCESS);
8770 rbtdbiter->paused = ISC_TRUE;
8772 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8773 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8774 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8775 rbtdbiter->tree_locked = isc_rwlocktype_none;
8778 flush_deletions(rbtdbiter);
8780 return (ISC_R_SUCCESS);
8784 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8785 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8786 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8788 if (rbtdbiter->result != ISC_R_SUCCESS)
8789 return (rbtdbiter->result);
8791 return (dns_name_copy(origin, name, NULL));
8795 * Additional cache routines.
8798 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8799 dns_rdatatype_t qtype, dns_acache_t *acache,
8800 dns_zone_t **zonep, dns_db_t **dbp,
8801 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8802 dns_name_t *fname, dns_message_t *msg,
8818 return (ISC_R_NOTIMPLEMENTED);
8820 dns_rbtdb_t *rbtdb = rdataset->private1;
8821 dns_rbtnode_t *rbtnode = rdataset->private2;
8822 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8823 unsigned int current_count = rdataset->privateuint4;
8825 rdatasetheader_t *header;
8826 nodelock_t *nodelock;
8827 unsigned int total_count;
8828 acachectl_t *acarray;
8829 dns_acacheentry_t *entry;
8830 isc_result_t result;
8832 UNUSED(qtype); /* we do not use this value at least for now */
8835 header = (struct rdatasetheader *)(raw - sizeof(*header));
8837 total_count = raw[0] * 256 + raw[1];
8838 INSIST(total_count > current_count);
8839 count = total_count - current_count - 1;
8843 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8844 NODE_LOCK(nodelock, isc_rwlocktype_read);
8847 case dns_rdatasetadditional_fromauth:
8848 acarray = header->additional_auth;
8850 case dns_rdatasetadditional_fromcache:
8853 case dns_rdatasetadditional_fromglue:
8854 acarray = header->additional_glue;
8860 if (acarray == NULL) {
8861 if (type != dns_rdatasetadditional_fromcache)
8862 dns_acache_countquerymiss(acache);
8863 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8864 return (ISC_R_NOTFOUND);
8867 if (acarray[count].entry == NULL) {
8868 dns_acache_countquerymiss(acache);
8869 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8870 return (ISC_R_NOTFOUND);
8874 dns_acache_attachentry(acarray[count].entry, &entry);
8876 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8878 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8879 nodep, fname, msg, now);
8881 dns_acache_detachentry(&entry);
8887 acache_callback(dns_acacheentry_t *entry, void **arg) {
8889 dns_rbtnode_t *rbtnode;
8890 nodelock_t *nodelock;
8891 acachectl_t *acarray = NULL;
8892 acache_cbarg_t *cbarg;
8895 REQUIRE(arg != NULL);
8899 * The caller must hold the entry lock.
8902 rbtdb = (dns_rbtdb_t *)cbarg->db;
8903 rbtnode = (dns_rbtnode_t *)cbarg->node;
8905 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8906 NODE_LOCK(nodelock, isc_rwlocktype_write);
8908 switch (cbarg->type) {
8909 case dns_rdatasetadditional_fromauth:
8910 acarray = cbarg->header->additional_auth;
8912 case dns_rdatasetadditional_fromglue:
8913 acarray = cbarg->header->additional_glue;
8919 count = cbarg->count;
8920 if (acarray != NULL && acarray[count].entry == entry) {
8921 acarray[count].entry = NULL;
8922 INSIST(acarray[count].cbarg == cbarg);
8923 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8924 acarray[count].cbarg = NULL;
8926 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8928 dns_acache_detachentry(&entry);
8930 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8932 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8933 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8941 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8942 acache_cbarg_t **cbargp)
8944 acache_cbarg_t *cbarg;
8946 REQUIRE(mctx != NULL);
8947 REQUIRE(entry != NULL);
8948 REQUIRE(cbargp != NULL && *cbargp != NULL);
8952 dns_acache_cancelentry(entry);
8953 dns_db_detachnode(cbarg->db, &cbarg->node);
8954 dns_db_detach(&cbarg->db);
8956 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8963 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8964 dns_rdatatype_t qtype, dns_acache_t *acache,
8965 dns_zone_t *zone, dns_db_t *db,
8966 dns_dbversion_t *version, dns_dbnode_t *node,
8980 return (ISC_R_NOTIMPLEMENTED);
8982 dns_rbtdb_t *rbtdb = rdataset->private1;
8983 dns_rbtnode_t *rbtnode = rdataset->private2;
8984 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8985 unsigned int current_count = rdataset->privateuint4;
8986 rdatasetheader_t *header;
8987 unsigned int total_count, count;
8988 nodelock_t *nodelock;
8989 isc_result_t result;
8990 acachectl_t *acarray;
8991 dns_acacheentry_t *newentry, *oldentry = NULL;
8992 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8996 if (type == dns_rdatasetadditional_fromcache)
8997 return (ISC_R_SUCCESS);
8999 header = (struct rdatasetheader *)(raw - sizeof(*header));
9001 total_count = raw[0] * 256 + raw[1];
9002 INSIST(total_count > current_count);
9003 count = total_count - current_count - 1; /* should be private data */
9005 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
9006 if (newcbarg == NULL)
9007 return (ISC_R_NOMEMORY);
9008 newcbarg->type = type;
9009 newcbarg->count = count;
9010 newcbarg->header = header;
9011 newcbarg->db = NULL;
9012 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
9013 newcbarg->node = NULL;
9014 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
9017 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
9018 acache_callback, newcbarg, &newentry);
9019 if (result != ISC_R_SUCCESS)
9021 /* Set cache data in the new entry. */
9022 result = dns_acache_setentry(acache, newentry, zone, db,
9023 version, node, fname);
9024 if (result != ISC_R_SUCCESS)
9027 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9028 NODE_LOCK(nodelock, isc_rwlocktype_write);
9032 case dns_rdatasetadditional_fromauth:
9033 acarray = header->additional_auth;
9035 case dns_rdatasetadditional_fromglue:
9036 acarray = header->additional_glue;
9042 if (acarray == NULL) {
9045 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
9046 sizeof(acachectl_t));
9048 if (acarray == NULL) {
9049 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9053 for (i = 0; i < total_count; i++) {
9054 acarray[i].entry = NULL;
9055 acarray[i].cbarg = NULL;
9059 case dns_rdatasetadditional_fromauth:
9060 header->additional_auth = acarray;
9062 case dns_rdatasetadditional_fromglue:
9063 header->additional_glue = acarray;
9069 if (acarray[count].entry != NULL) {
9071 * Swap the entry. Delay cleaning-up the old entry since
9072 * it would require a node lock.
9074 oldentry = acarray[count].entry;
9075 INSIST(acarray[count].cbarg != NULL);
9076 oldcbarg = acarray[count].cbarg;
9078 acarray[count].entry = newentry;
9079 acarray[count].cbarg = newcbarg;
9081 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9083 if (oldentry != NULL) {
9084 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9085 dns_acache_detachentry(&oldentry);
9088 return (ISC_R_SUCCESS);
9091 if (newcbarg != NULL) {
9092 if (newentry != NULL) {
9093 acache_cancelentry(rbtdb->common.mctx, newentry,
9095 dns_acache_detachentry(&newentry);
9097 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9098 dns_db_detach(&newcbarg->db);
9099 isc_mem_put(rbtdb->common.mctx, newcbarg,
9109 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9110 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9118 return (ISC_R_NOTIMPLEMENTED);
9120 dns_rbtdb_t *rbtdb = rdataset->private1;
9121 dns_rbtnode_t *rbtnode = rdataset->private2;
9122 unsigned char *raw = rdataset->private3; /* RDATASLAB */
9123 unsigned int current_count = rdataset->privateuint4;
9124 rdatasetheader_t *header;
9125 nodelock_t *nodelock;
9126 unsigned int total_count, count;
9127 acachectl_t *acarray;
9128 dns_acacheentry_t *entry;
9129 acache_cbarg_t *cbarg;
9131 UNUSED(qtype); /* we do not use this value at least for now */
9134 if (type == dns_rdatasetadditional_fromcache)
9135 return (ISC_R_SUCCESS);
9137 header = (struct rdatasetheader *)(raw - sizeof(*header));
9139 total_count = raw[0] * 256 + raw[1];
9140 INSIST(total_count > current_count);
9141 count = total_count - current_count - 1;
9146 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9147 NODE_LOCK(nodelock, isc_rwlocktype_write);
9150 case dns_rdatasetadditional_fromauth:
9151 acarray = header->additional_auth;
9153 case dns_rdatasetadditional_fromglue:
9154 acarray = header->additional_glue;
9160 if (acarray == NULL) {
9161 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9162 return (ISC_R_NOTFOUND);
9165 entry = acarray[count].entry;
9166 if (entry == NULL) {
9167 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9168 return (ISC_R_NOTFOUND);
9171 acarray[count].entry = NULL;
9172 cbarg = acarray[count].cbarg;
9173 acarray[count].cbarg = NULL;
9175 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9177 if (entry != NULL) {
9179 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9180 dns_acache_detachentry(&entry);
9183 return (ISC_R_SUCCESS);
9188 * Routines for LRU-based cache management.
9192 * See if a given cache entry that is being reused needs to be updated
9193 * in the LRU-list. From the LRU management point of view, this function is
9194 * expected to return true for almost all cases. When used with threads,
9195 * however, this may cause a non-negligible performance penalty because a
9196 * writer lock will have to be acquired before updating the list.
9197 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9198 * function returns true if the entry has not been updated for some period of
9199 * time. We differentiate the NS or glue address case and the others since
9200 * experiments have shown that the former tends to be accessed relatively
9201 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9202 * may cause external queries at a higher level zone, involving more
9205 * Caller must hold the node (read or write) lock.
9207 static inline isc_boolean_t
9208 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9209 if ((header->attributes &
9210 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9213 #if DNS_RBTDB_LIMITLRUUPDATE
9214 if (header->type == dns_rdatatype_ns ||
9215 (header->trust == dns_trust_glue &&
9216 (header->type == dns_rdatatype_a ||
9217 header->type == dns_rdatatype_aaaa))) {
9219 * Glue records are updated if at least 60 seconds have passed
9220 * since the previous update time.
9222 return (header->last_used + 60 <= now);
9225 /* Other records are updated if 5 minutes have passed. */
9226 return (header->last_used + 300 <= now);
9235 * Update the timestamp of a given cache entry and move it to the head
9236 * of the corresponding LRU list.
9238 * Caller must hold the node (write) lock.
9240 * Note that the we do NOT touch the heap here, as the TTL has not changed.
9243 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9246 INSIST(IS_CACHE(rbtdb));
9248 /* To be checked: can we really assume this? XXXMLG */
9249 INSIST(ISC_LINK_LINKED(header, link));
9251 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9252 header->last_used = now;
9253 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9257 * Purge some expired and/or stale (i.e. unused for some period) cache entries
9258 * under an overmem condition. To recover from this condition quickly, up to
9259 * 2 entries will be purged. This process is triggered while adding a new
9260 * entry, and we specifically avoid purging entries in the same LRU bucket as
9261 * the one to which the new entry will belong. Otherwise, we might purge
9262 * entries of the same name of different RR types while adding RRsets from a
9263 * single response (consider the case where we're adding A and AAAA glue records
9264 * of the same NS name).
9267 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9268 isc_stdtime_t now, isc_boolean_t tree_locked)
9270 rdatasetheader_t *header, *header_prev;
9271 unsigned int locknum;
9274 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9275 locknum != locknum_start && purgecount > 0;
9276 locknum = (locknum + 1) % rbtdb->node_lock_count) {
9277 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9278 isc_rwlocktype_write);
9280 header = isc_heap_element(rbtdb->heaps[locknum], 1);
9281 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
9282 expire_header(rbtdb, header, tree_locked);
9286 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9287 header != NULL && purgecount > 0;
9288 header = header_prev) {
9289 header_prev = ISC_LIST_PREV(header, link);
9291 * Unlink the entry at this point to avoid checking it
9292 * again even if it's currently used someone else and
9293 * cannot be purged at this moment. This entry won't be
9294 * referenced any more (so unlinking is safe) since the
9295 * TTL was reset to 0.
9297 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9299 expire_header(rbtdb, header, tree_locked);
9303 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9304 isc_rwlocktype_write);
9309 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9310 isc_boolean_t tree_locked)
9312 set_ttl(rbtdb, header, 0);
9313 header->attributes |= RDATASET_ATTR_STALE;
9314 header->node->dirty = 1;
9317 * Caller must hold the node (write) lock.
9320 if (dns_rbtnode_refcurrent(header->node) == 0) {
9322 * If no one else is using the node, we can clean it up now.
9323 * We first need to gain a new reference to the node to meet a
9324 * requirement of decrement_reference().
9326 new_reference(rbtdb, header->node);
9327 decrement_reference(rbtdb, header->node, 0,
9328 isc_rwlocktype_write,
9329 tree_locked ? isc_rwlocktype_write :
9330 isc_rwlocktype_none, ISC_FALSE);