2 * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.270.12.16.8.3 2010/02/26 00:24:39 marka Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
65 #include <dns/zonekey.h>
67 #ifdef DNS_RBTDB_VERSION64
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
80 * Note that "impmagic" is not the first four bytes of the struct, so
81 * ISC_MAGIC_VALID cannot be used.
83 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
84 (rbtdb)->common.impmagic == RBTDB_MAGIC)
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t rbtdb_serial_t;
89 * Make casting easier in symbolic debuggers by using different names
90 * for the 64 bit version.
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
96 typedef isc_uint32_t rbtdb_serial_t;
99 typedef isc_uint32_t rbtdb_rdatatype_t;
101 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
105 #define RBTDB_RDATATYPE_SIGNSEC \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
119 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120 * Using rwlock is effective with regard to lookup performance only when
121 * it is implemented in an efficient way.
122 * Otherwise, it is generally wise to stick to the simple locking since rwlock
123 * would require more memory or can even make lookups slower due to its own
124 * overhead (when it internally calls mutex locks).
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
129 #define DNS_RBTDB_USERWLOCK 0
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
138 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t) LOCK(l)
141 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
145 * Since node locking is sensitive to both performance and memory footprint,
146 * we need some trick here. If we have both high-performance rwlock and
147 * high performance and small-memory reference counters, we use rwlock for
148 * node lock and isc_refcount for node references. In this case, we don't have
149 * to protect the access to the counters by locks.
150 * Otherwise, we simply use ordinary mutex lock for node locking, and use
151 * simple integers as reference counters which is protected by the lock.
152 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153 * NODE_UNLOCK. In some other cases, however, we need to protect reference
154 * counters first and then protect other parts of a node as read-only data.
155 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156 * provided for these special cases. When we can use the efficient backend
157 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159 * section including the access to the reference counter.
160 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161 * section is also protected by NODE_STRONGLOCK().
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
166 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t) RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
172 #define NODE_STRONGLOCK(l) ((void)0)
173 #define NODE_STRONGUNLOCK(l) ((void)0)
174 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
178 typedef isc_mutex_t nodelock_t;
180 #define NODE_INITLOCK(l) isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
182 #define NODE_LOCK(l, t) LOCK(l)
183 #define NODE_UNLOCK(l, t) UNLOCK(l)
184 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
186 #define NODE_STRONGLOCK(l) LOCK(l)
187 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t) ((void)0)
189 #define NODE_WEAKUNLOCK(l, t) ((void)0)
190 #define NODE_WEAKDOWNGRADE(l) ((void)0)
194 * Whether to rate-limit updating the LRU to avoid possible thread contention.
195 * Our performance measurement has shown the cost is marginal, so it's defined
196 * to be 0 by default either with or without threads.
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 * Allow clients with a virtual time of up to 5 minutes in the past to see
204 * records that would have otherwise have expired.
206 #define RBTDB_VIRTUAL 300
212 dns_rdatatype_t type;
215 typedef struct acachectl acachectl_t;
217 typedef struct rdatasetheader {
219 * Locked by the owning node's lock.
221 rbtdb_serial_t serial;
223 rbtdb_rdatatype_t type;
224 isc_uint16_t attributes;
226 struct noqname *noqname;
227 struct noqname *closest;
229 * We don't use the LIST macros, because the LIST structure has
230 * both head and tail pointers, and is doubly linked.
233 struct rdatasetheader *next;
235 * If this is the top header for an rdataset, 'next' points
236 * to the top header for the next rdataset (i.e., the next type).
237 * Otherwise, it points up to the header whose down pointer points
241 struct rdatasetheader *down;
243 * Points to the header for the next older version of
249 * Monotonously increased every time this rdataset is bound so that
250 * it is used as the base of the starting point in DNS responses
251 * when the "cyclic" rrset-order is required. Since the ordering
252 * should not be so crucial, no lock is set for the counter for
253 * performance reasons.
256 acachectl_t *additional_auth;
257 acachectl_t *additional_glue;
260 isc_stdtime_t last_used;
261 ISC_LINK(struct rdatasetheader) link;
263 unsigned int heap_index;
265 * Used for TTL-based cache cleaning.
267 isc_stdtime_t resign;
270 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
273 #define RDATASET_ATTR_NONEXISTENT 0x0001
274 #define RDATASET_ATTR_STALE 0x0002
275 #define RDATASET_ATTR_IGNORE 0x0004
276 #define RDATASET_ATTR_RETAIN 0x0008
277 #define RDATASET_ATTR_NXDOMAIN 0x0010
278 #define RDATASET_ATTR_RESIGN 0x0020
279 #define RDATASET_ATTR_STATCOUNT 0x0040
280 #define RDATASET_ATTR_OPTOUT 0x0080
281 #define RDATASET_ATTR_NEGATIVE 0x0100
283 typedef struct acache_cbarg {
284 dns_rdatasetadditional_t type;
288 rdatasetheader_t *header;
292 dns_acacheentry_t *entry;
293 acache_cbarg_t *cbarg;
298 * When the cache will pre-expire data (due to memory low or other
299 * situations) before the rdataset's TTL has expired, it MUST
300 * respect the RETAIN bit and not expire the data until its TTL is
304 #undef IGNORE /* WIN32 winbase.h defines this. */
306 #define EXISTS(header) \
307 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
308 #define NONEXISTENT(header) \
309 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
310 #define IGNORE(header) \
311 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
312 #define RETAIN(header) \
313 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
314 #define NXDOMAIN(header) \
315 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
316 #define RESIGN(header) \
317 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
318 #define OPTOUT(header) \
319 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define NEGATIVE(header) \
321 (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
323 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
326 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
327 * There is a tradeoff issue about configuring this value: if this is too
328 * small, it may cause heavier contention between threads; if this is too large,
329 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
330 * The default value should work well for most environments, but this can
331 * also be configurable at compilation time via the
332 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
333 * 1 due to the assumption of overmem_purge().
335 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
336 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
337 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
342 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
343 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
347 /* Protected in the refcount routines. */
348 isc_refcount_t references;
349 /* Locked by lock. */
350 isc_boolean_t exiting;
353 typedef struct rbtdb_changed {
354 dns_rbtnode_t * node;
356 ISC_LINK(struct rbtdb_changed) link;
359 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
367 typedef struct rbtdb_version {
369 rbtdb_serial_t serial;
371 * Protected in the refcount routines.
372 * XXXJT: should we change the lock policy based on the refcount
375 isc_refcount_t references;
376 /* Locked by database lock. */
377 isc_boolean_t writer;
378 isc_boolean_t commit_ok;
379 rbtdb_changedlist_t changed_list;
380 rdatasetheaderlist_t resigned_list;
381 ISC_LINK(struct rbtdb_version) link;
382 dns_db_secure_t secure;
383 isc_boolean_t havensec3;
384 /* NSEC3 parameters */
387 isc_uint16_t iterations;
388 isc_uint8_t salt_length;
389 unsigned char salt[DNS_NSEC3_SALTSIZE];
392 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
397 #if DNS_RBTDB_USERWLOCK
402 isc_rwlock_t tree_lock;
403 unsigned int node_lock_count;
404 rbtdb_nodelock_t * node_locks;
405 dns_rbtnode_t * origin_node;
406 dns_stats_t * rrsetstats; /* cache DB only */
407 /* Locked by lock. */
409 isc_refcount_t references;
410 unsigned int attributes;
411 rbtdb_serial_t current_serial;
412 rbtdb_serial_t least_serial;
413 rbtdb_serial_t next_serial;
414 rbtdb_version_t * current_version;
415 rbtdb_version_t * future_version;
416 rbtdb_versionlist_t open_versions;
417 isc_boolean_t overmem;
419 dns_dbnode_t *soanode;
420 dns_dbnode_t *nsnode;
423 * This is a linked list used to implement the LRU cache. There will
424 * be node_lock_count linked lists here. Nodes in bucket 1 will be
425 * placed on the linked list rdatasets[1].
427 rdatasetheaderlist_t *rdatasets;
430 * Temporary storage for stale cache nodes and dynamically deleted
431 * nodes that await being cleaned up.
433 rbtnodelist_t *deadnodes;
436 * Heaps. Each of these is used for TTL based expiry.
440 /* Locked by tree_lock. */
445 unsigned int quantum;
448 #define RBTDB_ATTR_LOADED 0x01
449 #define RBTDB_ATTR_LOADING 0x02
456 rbtdb_version_t * rbtversion;
457 rbtdb_serial_t serial;
458 unsigned int options;
459 dns_rbtnodechain_t chain;
460 isc_boolean_t copy_name;
461 isc_boolean_t need_cleanup;
463 dns_rbtnode_t * zonecut;
464 rdatasetheader_t * zonecut_rdataset;
465 rdatasetheader_t * zonecut_sigrdataset;
466 dns_fixedname_t zonecut_name;
478 static void rdataset_disassociate(dns_rdataset_t *rdataset);
479 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
480 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
481 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
482 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
483 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
484 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
487 dns_rdataset_t *negsig);
488 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
491 dns_rdataset_t *negsig);
492 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
493 dns_rdatasetadditional_t type,
494 dns_rdatatype_t qtype,
495 dns_acache_t *acache,
498 dns_dbversion_t **versionp,
499 dns_dbnode_t **nodep,
503 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
504 dns_rdatasetadditional_t type,
505 dns_rdatatype_t qtype,
506 dns_acache_t *acache,
509 dns_dbversion_t *version,
512 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
513 dns_rdataset_t *rdataset,
514 dns_rdatasetadditional_t type,
515 dns_rdatatype_t qtype);
516 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
518 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
520 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
521 isc_boolean_t tree_locked);
522 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
523 isc_stdtime_t now, isc_boolean_t tree_locked);
524 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
525 rdatasetheader_t *newheader);
526 static void prune_tree(isc_task_t *task, isc_event_t *event);
527 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
528 static void rdataset_expire(dns_rdataset_t *rdataset);
530 static dns_rdatasetmethods_t rdataset_methods = {
531 rdataset_disassociate,
541 rdataset_getadditional,
542 rdataset_setadditional,
543 rdataset_putadditional,
548 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
549 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
550 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
551 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
552 dns_rdataset_t *rdataset);
554 static dns_rdatasetitermethods_t rdatasetiter_methods = {
555 rdatasetiter_destroy,
561 typedef struct rbtdb_rdatasetiter {
562 dns_rdatasetiter_t common;
563 rdatasetheader_t * current;
564 } rbtdb_rdatasetiter_t;
566 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
567 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
568 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
569 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
571 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
572 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
573 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
574 dns_dbnode_t **nodep,
576 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
577 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
580 static dns_dbiteratormethods_t dbiterator_methods = {
592 #define DELETION_BATCH_MAX 64
595 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
597 typedef struct rbtdb_dbiterator {
598 dns_dbiterator_t common;
599 isc_boolean_t paused;
600 isc_boolean_t new_origin;
601 isc_rwlocktype_t tree_locked;
603 dns_fixedname_t name;
604 dns_fixedname_t origin;
605 dns_rbtnodechain_t chain;
606 dns_rbtnodechain_t nsec3chain;
607 dns_rbtnodechain_t *current;
609 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
611 isc_boolean_t nsec3only;
612 isc_boolean_t nonsec3;
613 } rbtdb_dbiterator_t;
616 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
617 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
619 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
621 static void overmem(dns_db_t *db, isc_boolean_t overmem);
622 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
623 isc_boolean_t *nsec3createflag);
626 * 'init_count' is used to initialize 'newheader->count' which inturn
627 * is used to determine where in the cycle rrset-order cyclic starts.
628 * We don't lock this as we don't care about simultaneous updates.
631 * Both init_count and header->count can be ISC_UINT32_MAX.
632 * The count on the returned rdataset however can't be as
633 * that indicates that the database does not implement cyclic
636 static unsigned int init_count;
641 * If a routine is going to lock more than one lock in this module, then
642 * the locking must be done in the following order:
646 * Node Lock (Only one from the set may be locked at one time by
651 * Failure to follow this hierarchy can result in deadlock.
657 * For zone databases the node for the origin of the zone MUST NOT be deleted.
666 attach(dns_db_t *source, dns_db_t **targetp) {
667 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
669 REQUIRE(VALID_RBTDB(rbtdb));
671 isc_refcount_increment(&rbtdb->references, NULL);
677 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
678 dns_rbtdb_t *rbtdb = event->ev_arg;
682 free_rbtdb(rbtdb, ISC_TRUE, event);
686 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
687 isc_boolean_t increment)
689 dns_rdatastatstype_t statattributes = 0;
690 dns_rdatastatstype_t base = 0;
691 dns_rdatastatstype_t type;
693 /* At the moment we count statistics only for cache DB */
694 INSIST(IS_CACHE(rbtdb));
696 if (NXDOMAIN(header))
697 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
698 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
699 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
700 base = RBTDB_RDATATYPE_EXT(header->type);
702 base = RBTDB_RDATATYPE_BASE(header->type);
704 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
706 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
708 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
712 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
717 oldttl = header->rdh_ttl;
718 header->rdh_ttl = newttl;
720 if (!IS_CACHE(rbtdb))
724 * It's possible the rbtdb is not a cache. If this is the case,
725 * we will not have a heap, and we move on. If we do, though,
726 * we might need to adjust things.
728 if (header->heap_index == 0 || newttl == oldttl)
730 idx = header->node->locknum;
731 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
733 heap = rbtdb->heaps[idx];
736 isc_heap_increased(heap, header->heap_index);
738 isc_heap_decreased(heap, header->heap_index);
742 * These functions allow the heap code to rank the priority of each
743 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
746 ttl_sooner(void *v1, void *v2) {
747 rdatasetheader_t *h1 = v1;
748 rdatasetheader_t *h2 = v2;
750 if (h1->rdh_ttl < h2->rdh_ttl)
756 resign_sooner(void *v1, void *v2) {
757 rdatasetheader_t *h1 = v1;
758 rdatasetheader_t *h2 = v2;
760 if (h1->resign < h2->resign)
766 * This function sets the heap index into the header.
769 set_index(void *what, unsigned int index) {
770 rdatasetheader_t *h = what;
772 h->heap_index = index;
776 * Work out how many nodes can be deleted in the time between two
777 * requests to the nameserver. Smooth the resulting number and use it
778 * as a estimate for the number of nodes to be deleted in the next
782 adjust_quantum(unsigned int old, isc_time_t *start) {
783 unsigned int pps = dns_pps; /* packets per second */
784 unsigned int interval;
793 interval = 1000000 / pps; /* interval in usec */
796 usecs = isc_time_microdiff(&end, start);
799 * We were unable to measure the amount of time taken.
800 * Double the nodes deleted next time.
807 new = old * interval;
808 new /= (unsigned int)usecs;
815 new = (new + old * 3) / 4;
817 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
818 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
824 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
826 isc_ondestroy_t ondest;
828 char buf[DNS_NAME_FORMATSIZE];
831 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
832 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
834 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
835 REQUIRE(rbtdb->future_version == NULL);
837 if (rbtdb->current_version != NULL) {
840 isc_refcount_decrement(&rbtdb->current_version->references,
843 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
844 isc_refcount_destroy(&rbtdb->current_version->references);
845 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
846 sizeof(rbtdb_version_t));
850 * We assume the number of remaining dead nodes is reasonably small;
851 * the overhead of unlinking all nodes here should be negligible.
853 for (i = 0; i < rbtdb->node_lock_count; i++) {
856 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
857 while (node != NULL) {
858 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
859 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
864 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
866 if (rbtdb->tree != NULL) {
867 isc_time_now(&start);
868 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
869 if (result == ISC_R_QUOTA) {
870 INSIST(rbtdb->task != NULL);
871 if (rbtdb->quantum != 0)
872 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
875 event = isc_event_allocate(rbtdb->common.mctx,
877 DNS_EVENT_FREESTORAGE,
880 sizeof(isc_event_t));
883 isc_task_send(rbtdb->task, &event);
886 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
889 if (rbtdb->nsec3 != NULL) {
890 isc_time_now(&start);
891 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
892 if (result == ISC_R_QUOTA) {
893 INSIST(rbtdb->task != NULL);
894 if (rbtdb->quantum != 0)
895 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
898 event = isc_event_allocate(rbtdb->common.mctx,
900 DNS_EVENT_FREESTORAGE,
903 sizeof(isc_event_t));
906 isc_task_send(rbtdb->task, &event);
909 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
913 isc_event_free(&event);
915 if (dns_name_dynamic(&rbtdb->common.origin))
916 dns_name_format(&rbtdb->common.origin, buf,
919 strcpy(buf, "<UNKNOWN>");
920 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
921 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
922 "done free_rbtdb(%s)", buf);
924 if (dns_name_dynamic(&rbtdb->common.origin))
925 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
926 for (i = 0; i < rbtdb->node_lock_count; i++) {
927 isc_refcount_destroy(&rbtdb->node_locks[i].references);
928 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
932 * Clean up LRU / re-signing order lists.
934 if (rbtdb->rdatasets != NULL) {
935 for (i = 0; i < rbtdb->node_lock_count; i++)
936 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
937 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
938 rbtdb->node_lock_count *
939 sizeof(rdatasetheaderlist_t));
942 * Clean up dead node buckets.
944 if (rbtdb->deadnodes != NULL) {
945 for (i = 0; i < rbtdb->node_lock_count; i++)
946 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
947 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
948 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
951 * Clean up heap objects.
953 if (rbtdb->heaps != NULL) {
954 for (i = 0; i < rbtdb->node_lock_count; i++)
955 isc_heap_destroy(&rbtdb->heaps[i]);
956 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
957 rbtdb->node_lock_count *
958 sizeof(isc_heap_t *));
961 if (rbtdb->rrsetstats != NULL)
962 dns_stats_detach(&rbtdb->rrsetstats);
964 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
965 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
966 isc_rwlock_destroy(&rbtdb->tree_lock);
967 isc_refcount_destroy(&rbtdb->references);
968 if (rbtdb->task != NULL)
969 isc_task_detach(&rbtdb->task);
971 RBTDB_DESTROYLOCK(&rbtdb->lock);
972 rbtdb->common.magic = 0;
973 rbtdb->common.impmagic = 0;
974 ondest = rbtdb->common.ondest;
975 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
976 isc_ondestroy_notify(&ondest, rbtdb);
980 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
981 isc_boolean_t want_free = ISC_FALSE;
983 unsigned int inactive = 0;
985 /* XXX check for open versions here */
987 if (rbtdb->soanode != NULL)
988 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
989 if (rbtdb->nsnode != NULL)
990 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
993 * Even though there are no external direct references, there still
994 * may be nodes in use.
996 for (i = 0; i < rbtdb->node_lock_count; i++) {
997 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
998 rbtdb->node_locks[i].exiting = ISC_TRUE;
999 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1000 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1006 if (inactive != 0) {
1007 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1008 rbtdb->active -= inactive;
1009 if (rbtdb->active == 0)
1010 want_free = ISC_TRUE;
1011 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1013 char buf[DNS_NAME_FORMATSIZE];
1014 if (dns_name_dynamic(&rbtdb->common.origin))
1015 dns_name_format(&rbtdb->common.origin, buf,
1018 strcpy(buf, "<UNKNOWN>");
1019 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1020 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1021 "calling free_rbtdb(%s)", buf);
1022 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1028 detach(dns_db_t **dbp) {
1029 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1032 REQUIRE(VALID_RBTDB(rbtdb));
1034 isc_refcount_decrement(&rbtdb->references, &refs);
1037 maybe_free_rbtdb(rbtdb);
1043 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1044 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1045 rbtdb_version_t *version;
1048 REQUIRE(VALID_RBTDB(rbtdb));
1050 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1051 version = rbtdb->current_version;
1052 isc_refcount_increment(&version->references, &refs);
1053 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1055 *versionp = (dns_dbversion_t *)version;
1058 static inline rbtdb_version_t *
1059 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1060 unsigned int references, isc_boolean_t writer)
1062 isc_result_t result;
1063 rbtdb_version_t *version;
1065 version = isc_mem_get(mctx, sizeof(*version));
1066 if (version == NULL)
1068 version->serial = serial;
1069 result = isc_refcount_init(&version->references, references);
1070 if (result != ISC_R_SUCCESS) {
1071 isc_mem_put(mctx, version, sizeof(*version));
1074 version->writer = writer;
1075 version->commit_ok = ISC_FALSE;
1076 ISC_LIST_INIT(version->changed_list);
1077 ISC_LIST_INIT(version->resigned_list);
1078 ISC_LINK_INIT(version, link);
1084 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1085 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1086 rbtdb_version_t *version;
1088 REQUIRE(VALID_RBTDB(rbtdb));
1089 REQUIRE(versionp != NULL && *versionp == NULL);
1090 REQUIRE(rbtdb->future_version == NULL);
1092 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1093 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1094 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1096 if (version != NULL) {
1097 version->commit_ok = ISC_TRUE;
1098 version->secure = rbtdb->current_version->secure;
1099 version->havensec3 = rbtdb->current_version->havensec3;
1100 if (version->havensec3) {
1101 version->flags = rbtdb->current_version->flags;
1102 version->iterations =
1103 rbtdb->current_version->iterations;
1104 version->hash = rbtdb->current_version->hash;
1105 version->salt_length =
1106 rbtdb->current_version->salt_length;
1107 memcpy(version->salt, rbtdb->current_version->salt,
1108 version->salt_length);
1111 version->iterations = 0;
1113 version->salt_length = 0;
1114 memset(version->salt, 0, sizeof(version->salt));
1116 rbtdb->next_serial++;
1117 rbtdb->future_version = version;
1119 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1121 if (version == NULL)
1122 return (ISC_R_NOMEMORY);
1124 *versionp = version;
1126 return (ISC_R_SUCCESS);
1130 attachversion(dns_db_t *db, dns_dbversion_t *source,
1131 dns_dbversion_t **targetp)
1133 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1134 rbtdb_version_t *rbtversion = source;
1137 REQUIRE(VALID_RBTDB(rbtdb));
1139 isc_refcount_increment(&rbtversion->references, &refs);
1142 *targetp = rbtversion;
1145 static rbtdb_changed_t *
1146 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1147 dns_rbtnode_t *node)
1149 rbtdb_changed_t *changed;
1153 * Caller must be holding the node lock if its reference must be
1154 * protected by the lock.
1157 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1159 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1161 REQUIRE(version->writer);
1163 if (changed != NULL) {
1164 dns_rbtnode_refincrement(node, &refs);
1166 changed->node = node;
1167 changed->dirty = ISC_FALSE;
1168 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1170 version->commit_ok = ISC_FALSE;
1172 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1178 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1183 unsigned char *raw; /* RDATASLAB */
1186 * The caller must be holding the corresponding node lock.
1192 raw = (unsigned char *)header + sizeof(*header);
1193 count = raw[0] * 256 + raw[1];
1196 * Sanity check: since an additional cache entry has a reference to
1197 * the original DB node (in the callback arg), there should be no
1198 * acache entries when the node can be freed.
1200 for (i = 0; i < count; i++)
1201 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1203 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1207 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1209 if (dns_name_dynamic(&(*noqname)->name))
1210 dns_name_free(&(*noqname)->name, mctx);
1211 if ((*noqname)->neg != NULL)
1212 isc_mem_put(mctx, (*noqname)->neg,
1213 dns_rdataslab_size((*noqname)->neg, 0));
1214 if ((*noqname)->negsig != NULL)
1215 isc_mem_put(mctx, (*noqname)->negsig,
1216 dns_rdataslab_size((*noqname)->negsig, 0));
1217 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1222 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1224 ISC_LINK_INIT(h, link);
1228 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1229 fprintf(stderr, "initialized header: %p\n", h);
1235 static inline rdatasetheader_t *
1236 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1238 rdatasetheader_t *h;
1240 h = isc_mem_get(mctx, sizeof(*h));
1245 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1246 fprintf(stderr, "allocated header: %p\n", h);
1248 init_rdataset(rbtdb, h);
1253 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1258 if (EXISTS(rdataset) &&
1259 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1260 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1263 idx = rdataset->node->locknum;
1264 if (ISC_LINK_LINKED(rdataset, link)) {
1265 INSIST(IS_CACHE(rbtdb));
1266 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1268 if (rdataset->heap_index != 0)
1269 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1270 rdataset->heap_index = 0;
1272 if (rdataset->noqname != NULL)
1273 free_noqname(mctx, &rdataset->noqname);
1274 if (rdataset->closest != NULL)
1275 free_noqname(mctx, &rdataset->closest);
1277 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1278 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1280 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1281 size = sizeof(*rdataset);
1283 size = dns_rdataslab_size((unsigned char *)rdataset,
1285 isc_mem_put(mctx, rdataset, size);
1289 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1290 rdatasetheader_t *header, *dcurrent;
1291 isc_boolean_t make_dirty = ISC_FALSE;
1294 * Caller must hold the node lock.
1298 * We set the IGNORE attribute on rdatasets with serial number
1299 * 'serial'. When the reference count goes to zero, these rdatasets
1300 * will be cleaned up; until that time, they will be ignored.
1302 for (header = node->data; header != NULL; header = header->next) {
1303 if (header->serial == serial) {
1304 header->attributes |= RDATASET_ATTR_IGNORE;
1305 make_dirty = ISC_TRUE;
1307 for (dcurrent = header->down;
1309 dcurrent = dcurrent->down) {
1310 if (dcurrent->serial == serial) {
1311 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1312 make_dirty = ISC_TRUE;
1321 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1323 rdatasetheader_t *d, *down_next;
1325 for (d = top->down; d != NULL; d = down_next) {
1326 down_next = d->down;
1327 free_rdataset(rbtdb, mctx, d);
1333 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1334 rdatasetheader_t *current, *top_prev, *top_next;
1335 isc_mem_t *mctx = rbtdb->common.mctx;
1338 * Caller must be holding the node lock.
1342 for (current = node->data; current != NULL; current = top_next) {
1343 top_next = current->next;
1344 clean_stale_headers(rbtdb, mctx, current);
1346 * If current is nonexistent or stale, we can clean it up.
1348 if ((current->attributes &
1349 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1350 if (top_prev != NULL)
1351 top_prev->next = current->next;
1353 node->data = current->next;
1354 free_rdataset(rbtdb, mctx, current);
1362 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1363 rbtdb_serial_t least_serial)
1365 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1366 rdatasetheader_t *top_prev, *top_next;
1367 isc_mem_t *mctx = rbtdb->common.mctx;
1368 isc_boolean_t still_dirty = ISC_FALSE;
1371 * Caller must be holding the node lock.
1373 REQUIRE(least_serial != 0);
1376 for (current = node->data; current != NULL; current = top_next) {
1377 top_next = current->next;
1380 * First, we clean up any instances of multiple rdatasets
1381 * with the same serial number, or that have the IGNORE
1385 for (dcurrent = current->down;
1387 dcurrent = down_next) {
1388 down_next = dcurrent->down;
1389 INSIST(dcurrent->serial <= dparent->serial);
1390 if (dcurrent->serial == dparent->serial ||
1392 if (down_next != NULL)
1393 down_next->next = dparent;
1394 dparent->down = down_next;
1395 free_rdataset(rbtdb, mctx, dcurrent);
1401 * We've now eliminated all IGNORE datasets with the possible
1402 * exception of current, which we now check.
1404 if (IGNORE(current)) {
1405 down_next = current->down;
1406 if (down_next == NULL) {
1407 if (top_prev != NULL)
1408 top_prev->next = current->next;
1410 node->data = current->next;
1411 free_rdataset(rbtdb, mctx, current);
1413 * current no longer exists, so we can
1414 * just continue with the loop.
1419 * Pull up current->down, making it the new
1422 if (top_prev != NULL)
1423 top_prev->next = down_next;
1425 node->data = down_next;
1426 down_next->next = top_next;
1427 free_rdataset(rbtdb, mctx, current);
1428 current = down_next;
1433 * We now try to find the first down node less than the
1437 for (dcurrent = current->down;
1439 dcurrent = down_next) {
1440 down_next = dcurrent->down;
1441 if (dcurrent->serial < least_serial)
1447 * If there is a such an rdataset, delete it and any older
1450 if (dcurrent != NULL) {
1452 down_next = dcurrent->down;
1453 INSIST(dcurrent->serial <= least_serial);
1454 free_rdataset(rbtdb, mctx, dcurrent);
1455 dcurrent = down_next;
1456 } while (dcurrent != NULL);
1457 dparent->down = NULL;
1461 * Note. The serial number of 'current' might be less than
1462 * least_serial too, but we cannot delete it because it is
1463 * the most recent version, unless it is a NONEXISTENT
1466 if (current->down != NULL) {
1467 still_dirty = ISC_TRUE;
1471 * If this is a NONEXISTENT rdataset, we can delete it.
1473 if (NONEXISTENT(current)) {
1474 if (top_prev != NULL)
1475 top_prev->next = current->next;
1477 node->data = current->next;
1478 free_rdataset(rbtdb, mctx, current);
1488 * Clean up dead nodes. These are nodes which have no references, and
1489 * have no data. They are dead but we could not or chose not to delete
1490 * them when we deleted all the data at that node because we did not want
1491 * to wait for the tree write lock.
1493 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1496 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1497 dns_rbtnode_t *node;
1498 isc_result_t result;
1499 int count = 10; /* XXXJT: should be adjustable */
1501 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1502 while (node != NULL && count > 0) {
1503 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1506 * Since we're holding a tree write lock, it should be
1507 * impossible for this node to be referenced by others.
1509 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1510 node->data == NULL);
1512 INSIST(!ISC_LINK_LINKED(node, deadlink));
1514 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1517 result = dns_rbt_deletenode(rbtdb->tree, node,
1519 if (result != ISC_R_SUCCESS)
1520 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1521 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1522 "cleanup_dead_nodes: "
1523 "dns_rbt_deletenode: %s",
1524 isc_result_totext(result));
1525 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1531 * Caller must be holding the node lock if its reference must be protected
1535 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1536 unsigned int lockrefs, noderefs;
1537 isc_refcount_t *lockref;
1539 dns_rbtnode_refincrement0(node, &noderefs);
1540 if (noderefs == 1) { /* this is the first reference to the node */
1541 lockref = &rbtdb->node_locks[node->locknum].references;
1542 isc_refcount_increment0(lockref, &lockrefs);
1543 INSIST(lockrefs != 0);
1545 INSIST(noderefs != 0);
1549 * This function is assumed to be called when a node is newly referenced
1550 * and can be in the deadnode list. In that case the node must be retrieved
1551 * from the list because it is going to be used. In addition, if the caller
1552 * happens to hold a write lock on the tree, it's a good chance to purge dead
1554 * Note: while a new reference is gained in multiple places, there are only very
1555 * few cases where the node can be in the deadnode list (only empty nodes can
1556 * have been added to the list).
1559 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1560 isc_rwlocktype_t treelocktype)
1562 isc_boolean_t need_relock = ISC_FALSE;
1564 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1565 new_reference(rbtdb, node);
1567 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1568 isc_rwlocktype_read);
1569 if (ISC_LINK_LINKED(node, deadlink))
1570 need_relock = ISC_TRUE;
1571 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1572 treelocktype == isc_rwlocktype_write)
1573 need_relock = ISC_TRUE;
1574 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1575 isc_rwlocktype_read);
1577 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1578 isc_rwlocktype_write);
1579 if (ISC_LINK_LINKED(node, deadlink))
1580 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1582 if (treelocktype == isc_rwlocktype_write)
1583 cleanup_dead_nodes(rbtdb, node->locknum);
1584 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1585 isc_rwlocktype_write);
1588 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1592 * Caller must be holding the node lock; either the "strong", read or write
1593 * lock. Note that the lock must be held even when node references are
1594 * atomically modified; in that case the decrement operation itself does not
1595 * have to be protected, but we must avoid a race condition where multiple
1596 * threads are decreasing the reference to zero simultaneously and at least
1597 * one of them is going to free the node.
1598 * This function returns ISC_TRUE if and only if the node reference decreases
1601 static isc_boolean_t
1602 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1603 rbtdb_serial_t least_serial,
1604 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1605 isc_boolean_t pruning)
1607 isc_result_t result;
1608 isc_boolean_t write_locked;
1609 rbtdb_nodelock_t *nodelock;
1610 unsigned int refs, nrefs;
1611 int bucket = node->locknum;
1612 isc_boolean_t no_reference;
1614 nodelock = &rbtdb->node_locks[bucket];
1616 /* Handle easy and typical case first. */
1617 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1618 dns_rbtnode_refdecrement(node, &nrefs);
1619 INSIST((int)nrefs >= 0);
1621 isc_refcount_decrement(&nodelock->references, &refs);
1622 INSIST((int)refs >= 0);
1624 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1627 /* Upgrade the lock? */
1628 if (nlock == isc_rwlocktype_read) {
1629 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1630 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1632 dns_rbtnode_refdecrement(node, &nrefs);
1633 INSIST((int)nrefs >= 0);
1635 /* Restore the lock? */
1636 if (nlock == isc_rwlocktype_read)
1637 NODE_WEAKDOWNGRADE(&nodelock->lock);
1641 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1642 if (IS_CACHE(rbtdb))
1643 clean_cache_node(rbtdb, node);
1645 if (least_serial == 0) {
1647 * Caller doesn't know the least serial.
1650 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1651 least_serial = rbtdb->least_serial;
1652 RBTDB_UNLOCK(&rbtdb->lock,
1653 isc_rwlocktype_read);
1655 clean_zone_node(rbtdb, node, least_serial);
1659 isc_refcount_decrement(&nodelock->references, &refs);
1660 INSIST((int)refs >= 0);
1663 * XXXDCL should this only be done for cache zones?
1665 if (node->data != NULL || node->down != NULL) {
1666 /* Restore the lock? */
1667 if (nlock == isc_rwlocktype_read)
1668 NODE_WEAKDOWNGRADE(&nodelock->lock);
1673 * Attempt to switch to a write lock on the tree. If this fails,
1674 * we will add this node to a linked list of nodes in this locking
1675 * bucket which we will free later.
1677 if (tlock != isc_rwlocktype_write) {
1679 * Locking hierarchy notwithstanding, we don't need to free
1680 * the node lock before acquiring the tree write lock because
1681 * we only do a trylock.
1683 if (tlock == isc_rwlocktype_read)
1684 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1686 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1687 isc_rwlocktype_write);
1688 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1689 result == ISC_R_LOCKBUSY);
1691 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1693 write_locked = ISC_TRUE;
1695 no_reference = ISC_TRUE;
1696 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1698 * We can now delete the node if the reference counter is
1699 * zero. This should be typically the case, but a different
1700 * thread may still gain a (new) reference just before the
1701 * current thread locks the tree (e.g., in findnode()).
1705 * If this node is the only one in the level it's in, deleting
1706 * this node may recursively make its parent the only node in
1707 * the parent level; if so, and if no one is currently using
1708 * the parent node, this is almost the only opportunity to
1709 * clean it up. But the recursive cleanup is not that trivial
1710 * since the child and parent may be in different lock buckets,
1711 * which would cause a lock order reversal problem. To avoid
1712 * the trouble, we'll dispatch a separate event for batch
1713 * cleaning. We need to check whether we're deleting the node
1714 * as a result of pruning to avoid infinite dispatching.
1715 * Note: pruning happens only when a task has been set for the
1716 * rbtdb. If the user of the rbtdb chooses not to set a task,
1717 * it's their responsibility to purge stale leaves (e.g. by
1718 * periodic walk-through).
1720 if (!pruning && node->parent != NULL &&
1721 node->parent->down == node && node->left == NULL &&
1722 node->right == NULL && rbtdb->task != NULL) {
1726 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1729 sizeof(isc_event_t));
1731 new_reference(rbtdb, node);
1733 attach((dns_db_t *)rbtdb, &db);
1735 isc_task_send(rbtdb->task, &ev);
1736 no_reference = ISC_FALSE;
1739 * XXX: this is a weird situation. We could
1740 * ignore this error case, but then the stale
1741 * node will unlikely be purged except via a
1742 * rare condition such as manual cleanup. So
1743 * we queue it in the deadnodes list, hoping
1744 * the memory shortage is temporary and the node
1745 * will be deleted later.
1747 isc_log_write(dns_lctx,
1748 DNS_LOGCATEGORY_DATABASE,
1749 DNS_LOGMODULE_CACHE,
1751 "decrement_reference: failed to "
1752 "allocate pruning event");
1753 INSIST(!ISC_LINK_LINKED(node, deadlink));
1754 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1758 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1759 char printname[DNS_NAME_FORMATSIZE];
1761 isc_log_write(dns_lctx,
1762 DNS_LOGCATEGORY_DATABASE,
1763 DNS_LOGMODULE_CACHE,
1765 "decrement_reference: "
1766 "delete from rbt: %p %s",
1768 dns_rbt_formatnodename(node,
1770 sizeof(printname)));
1773 INSIST(!ISC_LINK_LINKED(node, deadlink));
1775 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1778 result = dns_rbt_deletenode(rbtdb->tree, node,
1780 if (result != ISC_R_SUCCESS) {
1781 isc_log_write(dns_lctx,
1782 DNS_LOGCATEGORY_DATABASE,
1783 DNS_LOGMODULE_CACHE,
1785 "decrement_reference: "
1786 "dns_rbt_deletenode: %s",
1787 isc_result_totext(result));
1790 } else if (dns_rbtnode_refcurrent(node) == 0) {
1791 INSIST(!ISC_LINK_LINKED(node, deadlink));
1792 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1794 no_reference = ISC_FALSE;
1796 /* Restore the lock? */
1797 if (nlock == isc_rwlocktype_read)
1798 NODE_WEAKDOWNGRADE(&nodelock->lock);
1801 * Relock a read lock, or unlock the write lock if no lock was held.
1803 if (tlock == isc_rwlocktype_none)
1805 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1807 if (tlock == isc_rwlocktype_read)
1809 isc_rwlock_downgrade(&rbtdb->tree_lock);
1811 return (no_reference);
1815 * Prune the tree by recursively cleaning-up single leaves. In the worst
1816 * case, the number of iteration is the number of tree levels, which is at
1817 * most the maximum number of domain name labels, i.e, 127. In practice, this
1818 * should be much smaller (only a few times), and even the worst case would be
1819 * acceptable for a single event.
1822 prune_tree(isc_task_t *task, isc_event_t *event) {
1823 dns_rbtdb_t *rbtdb = event->ev_sender;
1824 dns_rbtnode_t *node = event->ev_arg;
1825 dns_rbtnode_t *parent;
1826 unsigned int locknum;
1830 isc_event_free(&event);
1832 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1833 locknum = node->locknum;
1834 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1836 parent = node->parent;
1837 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1838 isc_rwlocktype_write, ISC_TRUE);
1840 if (parent != NULL && parent->down == NULL) {
1842 * node was the only down child of the parent and has
1843 * just been removed. We'll then need to examine the
1844 * parent. Keep the lock if possible; otherwise,
1845 * release the old lock and acquire one for the parent.
1847 if (parent->locknum != locknum) {
1848 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1849 isc_rwlocktype_write);
1850 locknum = parent->locknum;
1851 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1852 isc_rwlocktype_write);
1856 * We need to gain a reference to the node before
1857 * decrementing it in the next iteration. In addition,
1858 * if the node is in the dead-nodes list, extract it
1859 * from the list beforehand as we do in
1860 * reactivate_node().
1862 new_reference(rbtdb, parent);
1863 if (ISC_LINK_LINKED(parent, deadlink)) {
1864 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1871 } while (node != NULL);
1872 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1873 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1875 detach((dns_db_t **)&rbtdb);
1879 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1880 rbtdb_changedlist_t *cleanup_list)
1883 * Caller must be holding the database lock.
1886 rbtdb->least_serial = version->serial;
1887 *cleanup_list = version->changed_list;
1888 ISC_LIST_INIT(version->changed_list);
1892 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1893 rbtdb_changed_t *changed, *next_changed;
1896 * If the changed record is dirty, then
1897 * an update created multiple versions of
1898 * a given rdataset. We keep this list
1899 * until we're the least open version, at
1900 * which point it's safe to get rid of any
1903 * If the changed record isn't dirty, then
1904 * we don't need it anymore since we're
1905 * committing and not rolling back.
1907 * The caller must be holding the database lock.
1909 for (changed = HEAD(version->changed_list);
1911 changed = next_changed) {
1912 next_changed = NEXT(changed, link);
1913 if (!changed->dirty) {
1914 UNLINK(version->changed_list,
1916 APPEND(*cleanup_list,
1923 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1924 dns_rdataset_t keyset;
1925 dns_rdataset_t nsecset, signsecset;
1926 dns_rdata_t rdata = DNS_RDATA_INIT;
1927 isc_boolean_t haszonekey = ISC_FALSE;
1928 isc_boolean_t hasnsec = ISC_FALSE;
1929 isc_boolean_t hasoptbit = ISC_FALSE;
1930 isc_boolean_t nsec3createflag = ISC_FALSE;
1931 isc_result_t result;
1933 dns_rdataset_init(&keyset);
1934 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1935 0, 0, &keyset, NULL);
1936 if (result == ISC_R_SUCCESS) {
1937 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1938 result = dns_rdataset_first(&keyset);
1939 while (result == ISC_R_SUCCESS) {
1940 dns_rdataset_current(&keyset, &keyrdata);
1941 if (dns_zonekey_iszonekey(&keyrdata)) {
1942 haszonekey = ISC_TRUE;
1945 result = dns_rdataset_next(&keyset);
1947 dns_rdataset_disassociate(&keyset);
1950 version->secure = dns_db_insecure;
1951 version->havensec3 = ISC_FALSE;
1955 dns_rdataset_init(&nsecset);
1956 dns_rdataset_init(&signsecset);
1957 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1958 0, 0, &nsecset, &signsecset);
1959 if (result == ISC_R_SUCCESS) {
1960 if (dns_rdataset_isassociated(&signsecset)) {
1962 result = dns_rdataset_first(&nsecset);
1963 if (result == ISC_R_SUCCESS) {
1964 dns_rdataset_current(&nsecset, &rdata);
1965 hasoptbit = dns_nsec_typepresent(&rdata,
1968 dns_rdataset_disassociate(&signsecset);
1970 dns_rdataset_disassociate(&nsecset);
1973 setnsec3parameters(db, version, &nsec3createflag);
1976 * Do we have a valid NSEC/NSEC3 chain?
1978 if (version->havensec3 || (hasnsec && !hasoptbit))
1979 version->secure = dns_db_secure;
1981 * Do we have a NSEC/NSEC3 chain under creation?
1983 else if (hasoptbit || nsec3createflag)
1984 version->secure = dns_db_partial;
1986 version->secure = dns_db_insecure;
1990 * Walk the origin node looking for NSEC3PARAM records.
1991 * Cache the nsec3 parameters.
1994 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1995 isc_boolean_t *nsec3createflag)
1997 dns_rbtnode_t *node;
1998 dns_rdata_nsec3param_t nsec3param;
1999 dns_rdata_t rdata = DNS_RDATA_INIT;
2000 isc_region_t region;
2001 isc_result_t result;
2002 rdatasetheader_t *header, *header_next;
2003 unsigned char *raw; /* RDATASLAB */
2004 unsigned int count, length;
2005 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2007 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2008 version->havensec3 = ISC_FALSE;
2009 node = rbtdb->origin_node;
2010 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2011 isc_rwlocktype_read);
2012 for (header = node->data;
2014 header = header_next) {
2015 header_next = header->next;
2017 if (header->serial <= version->serial &&
2019 if (NONEXISTENT(header))
2023 header = header->down;
2024 } while (header != NULL);
2026 if (header != NULL &&
2027 header->type == dns_rdatatype_nsec3param) {
2029 * Find A NSEC3PARAM with a supported algorithm.
2031 raw = (unsigned char *)header + sizeof(*header);
2032 count = raw[0] * 256 + raw[1]; /* count */
2033 #if DNS_RDATASET_FIXED
2034 raw += count * 4 + 2;
2038 while (count-- > 0U) {
2039 length = raw[0] * 256 + raw[1];
2040 #if DNS_RDATASET_FIXED
2046 region.length = length;
2048 dns_rdata_fromregion(&rdata,
2049 rbtdb->common.rdclass,
2050 dns_rdatatype_nsec3param,
2052 result = dns_rdata_tostruct(&rdata,
2055 INSIST(result == ISC_R_SUCCESS);
2056 dns_rdata_reset(&rdata);
2058 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2059 !dns_nsec3_supportedhash(nsec3param.hash))
2062 #ifdef RFC5155_STRICT
2063 if (nsec3param.flags != 0)
2066 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2068 *nsec3createflag = ISC_TRUE;
2069 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2074 memcpy(version->salt, nsec3param.salt,
2075 nsec3param.salt_length);
2076 version->hash = nsec3param.hash;
2077 version->salt_length = nsec3param.salt_length;
2078 version->iterations = nsec3param.iterations;
2079 version->flags = nsec3param.flags;
2080 version->havensec3 = ISC_TRUE;
2082 * Look for a better algorithm than the
2083 * unknown test algorithm.
2085 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2091 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2092 isc_rwlocktype_read);
2093 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2097 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2098 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2099 rbtdb_version_t *version, *cleanup_version, *least_greater;
2100 isc_boolean_t rollback = ISC_FALSE;
2101 rbtdb_changedlist_t cleanup_list;
2102 rdatasetheaderlist_t resigned_list;
2103 rbtdb_changed_t *changed, *next_changed;
2104 rbtdb_serial_t serial, least_serial;
2105 dns_rbtnode_t *rbtnode;
2107 rdatasetheader_t *header;
2108 isc_boolean_t writer;
2110 REQUIRE(VALID_RBTDB(rbtdb));
2111 version = (rbtdb_version_t *)*versionp;
2113 cleanup_version = NULL;
2114 ISC_LIST_INIT(cleanup_list);
2115 ISC_LIST_INIT(resigned_list);
2117 isc_refcount_decrement(&version->references, &refs);
2118 if (refs > 0) { /* typical and easy case first */
2120 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2121 INSIST(!version->writer);
2122 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2127 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2128 serial = version->serial;
2129 writer = version->writer;
2130 if (version->writer) {
2133 rbtdb_version_t *cur_version;
2135 INSIST(version->commit_ok);
2136 INSIST(version == rbtdb->future_version);
2138 * The current version is going to be replaced.
2139 * Release the (likely last) reference to it from the
2140 * DB itself and unlink it from the open list.
2142 cur_version = rbtdb->current_version;
2143 isc_refcount_decrement(&cur_version->references,
2146 if (cur_version->serial == rbtdb->least_serial)
2147 INSIST(EMPTY(cur_version->changed_list));
2148 UNLINK(rbtdb->open_versions,
2151 if (EMPTY(rbtdb->open_versions)) {
2153 * We're going to become the least open
2156 make_least_version(rbtdb, version,
2160 * Some other open version is the
2161 * least version. We can't cleanup
2162 * records that were changed in this
2163 * version because the older versions
2164 * may still be in use by an open
2167 * We can, however, discard the
2168 * changed records for things that
2169 * we've added that didn't exist in
2172 cleanup_nondirty(version, &cleanup_list);
2175 * If the (soon to be former) current version
2176 * isn't being used by anyone, we can clean
2180 cleanup_version = cur_version;
2181 APPENDLIST(version->changed_list,
2182 cleanup_version->changed_list,
2186 * Become the current version.
2188 version->writer = ISC_FALSE;
2189 rbtdb->current_version = version;
2190 rbtdb->current_serial = version->serial;
2191 rbtdb->future_version = NULL;
2194 * Keep the current version in the open list, and
2195 * gain a reference for the DB itself (see the DB
2196 * creation function below). This must be the only
2197 * case where we need to increment the counter from
2198 * zero and need to use isc_refcount_increment0().
2200 isc_refcount_increment0(&version->references,
2202 INSIST(cur_ref == 1);
2203 PREPEND(rbtdb->open_versions,
2204 rbtdb->current_version, link);
2205 resigned_list = version->resigned_list;
2206 ISC_LIST_INIT(version->resigned_list);
2209 * We're rolling back this transaction.
2211 cleanup_list = version->changed_list;
2212 ISC_LIST_INIT(version->changed_list);
2213 resigned_list = version->resigned_list;
2214 ISC_LIST_INIT(version->resigned_list);
2215 rollback = ISC_TRUE;
2216 cleanup_version = version;
2217 rbtdb->future_version = NULL;
2220 if (version != rbtdb->current_version) {
2222 * There are no external or internal references
2223 * to this version and it can be cleaned up.
2225 cleanup_version = version;
2228 * Find the version with the least serial
2229 * number greater than ours.
2231 least_greater = PREV(version, link);
2232 if (least_greater == NULL)
2233 least_greater = rbtdb->current_version;
2235 INSIST(version->serial < least_greater->serial);
2237 * Is this the least open version?
2239 if (version->serial == rbtdb->least_serial) {
2241 * Yes. Install the new least open
2244 make_least_version(rbtdb,
2249 * Add any unexecuted cleanups to
2250 * those of the least greater version.
2252 APPENDLIST(least_greater->changed_list,
2253 version->changed_list,
2256 } else if (version->serial == rbtdb->least_serial)
2257 INSIST(EMPTY(version->changed_list));
2258 UNLINK(rbtdb->open_versions, version, link);
2260 least_serial = rbtdb->least_serial;
2261 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2264 * Update the zone's secure status.
2266 if (writer && commit && !IS_CACHE(rbtdb))
2267 iszonesecure(db, version, rbtdb->origin_node);
2269 if (cleanup_version != NULL) {
2270 INSIST(EMPTY(cleanup_version->changed_list));
2271 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2272 sizeof(*cleanup_version));
2276 * Commit/rollback re-signed headers.
2278 for (header = HEAD(resigned_list);
2280 header = HEAD(resigned_list)) {
2283 ISC_LIST_UNLINK(resigned_list, header, link);
2285 lock = &rbtdb->node_locks[header->node->locknum].lock;
2286 NODE_LOCK(lock, isc_rwlocktype_write);
2288 resign_insert(rbtdb, header->node->locknum, header);
2289 decrement_reference(rbtdb, header->node, least_serial,
2290 isc_rwlocktype_write, isc_rwlocktype_none,
2292 NODE_UNLOCK(lock, isc_rwlocktype_write);
2295 if (!EMPTY(cleanup_list)) {
2297 * We acquire a tree write lock here in order to make sure
2298 * that stale nodes will be removed in decrement_reference().
2299 * If we didn't have the lock, those nodes could miss the
2300 * chance to be removed until the server stops. The write lock
2301 * is expensive, but this event should be rare enough to justify
2304 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2305 for (changed = HEAD(cleanup_list);
2307 changed = next_changed) {
2310 next_changed = NEXT(changed, link);
2311 rbtnode = changed->node;
2312 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2314 NODE_LOCK(lock, isc_rwlocktype_write);
2316 * This is a good opportunity to purge any dead nodes,
2319 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2322 rollback_node(rbtnode, serial);
2323 decrement_reference(rbtdb, rbtnode, least_serial,
2324 isc_rwlocktype_write,
2325 isc_rwlocktype_write, ISC_FALSE);
2327 NODE_UNLOCK(lock, isc_rwlocktype_write);
2329 isc_mem_put(rbtdb->common.mctx, changed,
2332 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2340 * Add the necessary magic for the wildcard name 'name'
2341 * to be found in 'rbtdb'.
2343 * In order for wildcard matching to work correctly in
2344 * zone_find(), we must ensure that a node for the wildcarding
2345 * level exists in the database, and has its 'find_callback'
2346 * and 'wild' bits set.
2348 * E.g. if the wildcard name is "*.sub.example." then we
2349 * must ensure that "sub.example." exists and is marked as
2353 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2354 isc_result_t result;
2355 dns_name_t foundname;
2356 dns_offsets_t offsets;
2358 dns_rbtnode_t *node = NULL;
2360 dns_name_init(&foundname, offsets);
2361 n = dns_name_countlabels(name);
2364 dns_name_getlabelsequence(name, 1, n, &foundname);
2365 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2366 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2369 node->find_callback = 1;
2371 return (ISC_R_SUCCESS);
2375 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2376 isc_result_t result;
2377 dns_name_t foundname;
2378 dns_offsets_t offsets;
2379 unsigned int n, l, i;
2381 dns_name_init(&foundname, offsets);
2382 n = dns_name_countlabels(name);
2383 l = dns_name_countlabels(&rbtdb->common.origin);
2386 dns_rbtnode_t *node = NULL; /* dummy */
2387 dns_name_getlabelsequence(name, n - i, i, &foundname);
2388 if (dns_name_iswildcard(&foundname)) {
2389 result = add_wildcard_magic(rbtdb, &foundname);
2390 if (result != ISC_R_SUCCESS)
2392 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2394 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2400 return (ISC_R_SUCCESS);
2404 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2405 dns_dbnode_t **nodep)
2407 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2408 dns_rbtnode_t *node = NULL;
2409 dns_name_t nodename;
2410 isc_result_t result;
2411 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2413 REQUIRE(VALID_RBTDB(rbtdb));
2415 dns_name_init(&nodename, NULL);
2416 RWLOCK(&rbtdb->tree_lock, locktype);
2417 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2418 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2419 if (result != ISC_R_SUCCESS) {
2420 RWUNLOCK(&rbtdb->tree_lock, locktype);
2422 if (result == DNS_R_PARTIALMATCH)
2423 result = ISC_R_NOTFOUND;
2427 * It would be nice to try to upgrade the lock instead of
2428 * unlocking then relocking.
2430 locktype = isc_rwlocktype_write;
2431 RWLOCK(&rbtdb->tree_lock, locktype);
2433 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2434 if (result == ISC_R_SUCCESS) {
2435 dns_rbt_namefromnode(node, &nodename);
2436 #ifdef DNS_RBT_USEHASH
2437 node->locknum = node->hashval % rbtdb->node_lock_count;
2439 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2440 rbtdb->node_lock_count;
2443 add_empty_wildcards(rbtdb, name);
2445 if (dns_name_iswildcard(name)) {
2446 result = add_wildcard_magic(rbtdb, name);
2447 if (result != ISC_R_SUCCESS) {
2448 RWUNLOCK(&rbtdb->tree_lock, locktype);
2452 } else if (result != ISC_R_EXISTS) {
2453 RWUNLOCK(&rbtdb->tree_lock, locktype);
2457 reactivate_node(rbtdb, node, locktype);
2458 RWUNLOCK(&rbtdb->tree_lock, locktype);
2460 *nodep = (dns_dbnode_t *)node;
2462 return (ISC_R_SUCCESS);
2466 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2467 dns_dbnode_t **nodep)
2469 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2470 dns_rbtnode_t *node = NULL;
2471 dns_name_t nodename;
2472 isc_result_t result;
2473 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2475 REQUIRE(VALID_RBTDB(rbtdb));
2477 dns_name_init(&nodename, NULL);
2478 RWLOCK(&rbtdb->tree_lock, locktype);
2479 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2480 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2481 if (result != ISC_R_SUCCESS) {
2482 RWUNLOCK(&rbtdb->tree_lock, locktype);
2484 if (result == DNS_R_PARTIALMATCH)
2485 result = ISC_R_NOTFOUND;
2489 * It would be nice to try to upgrade the lock instead of
2490 * unlocking then relocking.
2492 locktype = isc_rwlocktype_write;
2493 RWLOCK(&rbtdb->tree_lock, locktype);
2495 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2496 if (result == ISC_R_SUCCESS) {
2497 dns_rbt_namefromnode(node, &nodename);
2498 #ifdef DNS_RBT_USEHASH
2499 node->locknum = node->hashval % rbtdb->node_lock_count;
2501 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2502 rbtdb->node_lock_count;
2505 } else if (result != ISC_R_EXISTS) {
2506 RWUNLOCK(&rbtdb->tree_lock, locktype);
2510 INSIST(node->nsec3);
2511 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2512 new_reference(rbtdb, node);
2513 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2514 RWUNLOCK(&rbtdb->tree_lock, locktype);
2516 *nodep = (dns_dbnode_t *)node;
2518 return (ISC_R_SUCCESS);
2522 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2523 rbtdb_search_t *search = arg;
2524 rdatasetheader_t *header, *header_next;
2525 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2526 rdatasetheader_t *found;
2527 isc_result_t result;
2528 dns_rbtnode_t *onode;
2531 * We only want to remember the topmost zone cut, since it's the one
2532 * that counts, so we'll just continue if we've already found a
2535 if (search->zonecut != NULL)
2536 return (DNS_R_CONTINUE);
2539 result = DNS_R_CONTINUE;
2540 onode = search->rbtdb->origin_node;
2542 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2543 isc_rwlocktype_read);
2546 * Look for an NS or DNAME rdataset active in our version.
2549 dname_header = NULL;
2550 sigdname_header = NULL;
2551 for (header = node->data; header != NULL; header = header_next) {
2552 header_next = header->next;
2553 if (header->type == dns_rdatatype_ns ||
2554 header->type == dns_rdatatype_dname ||
2555 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2557 if (header->serial <= search->serial &&
2560 * Is this a "this rdataset doesn't
2563 if (NONEXISTENT(header))
2567 header = header->down;
2568 } while (header != NULL);
2569 if (header != NULL) {
2570 if (header->type == dns_rdatatype_dname)
2571 dname_header = header;
2572 else if (header->type ==
2573 RBTDB_RDATATYPE_SIGDNAME)
2574 sigdname_header = header;
2575 else if (node != onode ||
2576 IS_STUB(search->rbtdb)) {
2578 * We've found an NS rdataset that
2579 * isn't at the origin node. We check
2580 * that they're not at the origin node,
2581 * because otherwise we'd erroneously
2582 * treat the zone top as if it were
2592 * Did we find anything?
2594 if (dname_header != NULL) {
2596 * Note that DNAME has precedence over NS if both exist.
2598 found = dname_header;
2599 search->zonecut_sigrdataset = sigdname_header;
2600 } else if (ns_header != NULL) {
2602 search->zonecut_sigrdataset = NULL;
2605 if (found != NULL) {
2607 * We increment the reference count on node to ensure that
2608 * search->zonecut_rdataset will still be valid later.
2610 new_reference(search->rbtdb, node);
2611 search->zonecut = node;
2612 search->zonecut_rdataset = found;
2613 search->need_cleanup = ISC_TRUE;
2615 * Since we've found a zonecut, anything beneath it is
2616 * glue and is not subject to wildcard matching, so we
2617 * may clear search->wild.
2619 search->wild = ISC_FALSE;
2620 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2622 * If the caller does not want to find glue, then
2623 * this is the best answer and the search should
2626 result = DNS_R_PARTIALMATCH;
2631 * The search will continue beneath the zone cut.
2632 * This may or may not be the best match. In case it
2633 * is, we need to remember the node name.
2635 zcname = dns_fixedname_name(&search->zonecut_name);
2636 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2638 search->copy_name = ISC_TRUE;
2642 * There is no zonecut at this node which is active in this
2645 * If this is a "wild" node and the caller hasn't disabled
2646 * wildcard matching, remember that we've seen a wild node
2647 * in case we need to go searching for wildcard matches
2650 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2651 search->wild = ISC_TRUE;
2654 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2655 isc_rwlocktype_read);
2661 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2662 rdatasetheader_t *header, isc_stdtime_t now,
2663 dns_rdataset_t *rdataset)
2665 unsigned char *raw; /* RDATASLAB */
2668 * Caller must be holding the node reader lock.
2669 * XXXJT: technically, we need a writer lock, since we'll increment
2670 * the header count below. However, since the actual counter value
2671 * doesn't matter, we prioritize performance here. (We may want to
2672 * use atomic increment when available).
2675 if (rdataset == NULL)
2678 new_reference(rbtdb, node);
2680 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2682 rdataset->methods = &rdataset_methods;
2683 rdataset->rdclass = rbtdb->common.rdclass;
2684 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2685 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2686 rdataset->ttl = header->rdh_ttl - now;
2687 rdataset->trust = header->trust;
2688 if (NXDOMAIN(header))
2689 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2691 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2692 rdataset->private1 = rbtdb;
2693 rdataset->private2 = node;
2694 raw = (unsigned char *)header + sizeof(*header);
2695 rdataset->private3 = raw;
2696 rdataset->count = header->count++;
2697 if (rdataset->count == ISC_UINT32_MAX)
2698 rdataset->count = 0;
2701 * Reset iterator state.
2703 rdataset->privateuint4 = 0;
2704 rdataset->private5 = NULL;
2707 * Add noqname proof.
2709 rdataset->private6 = header->noqname;
2710 if (rdataset->private6 != NULL)
2711 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2712 rdataset->private7 = header->closest;
2713 if (rdataset->private7 != NULL)
2714 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2717 * Copy out re-signing information.
2719 if (RESIGN(header)) {
2720 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2721 rdataset->resign = header->resign;
2723 rdataset->resign = 0;
2726 static inline isc_result_t
2727 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2728 dns_name_t *foundname, dns_rdataset_t *rdataset,
2729 dns_rdataset_t *sigrdataset)
2731 isc_result_t result;
2733 rbtdb_rdatatype_t type;
2734 dns_rbtnode_t *node;
2737 * The caller MUST NOT be holding any node locks.
2740 node = search->zonecut;
2741 type = search->zonecut_rdataset->type;
2744 * If we have to set foundname, we do it before anything else.
2745 * If we were to set foundname after we had set nodep or bound the
2746 * rdataset, then we'd have to undo that work if dns_name_copy()
2747 * failed. By setting foundname first, there's nothing to undo if
2750 if (foundname != NULL && search->copy_name) {
2751 zcname = dns_fixedname_name(&search->zonecut_name);
2752 result = dns_name_copy(zcname, foundname, NULL);
2753 if (result != ISC_R_SUCCESS)
2756 if (nodep != NULL) {
2758 * Note that we don't have to increment the node's reference
2759 * count here because we're going to use the reference we
2760 * already have in the search block.
2763 search->need_cleanup = ISC_FALSE;
2765 if (rdataset != NULL) {
2766 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2767 isc_rwlocktype_read);
2768 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2769 search->now, rdataset);
2770 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2771 bind_rdataset(search->rbtdb, node,
2772 search->zonecut_sigrdataset,
2773 search->now, sigrdataset);
2774 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2775 isc_rwlocktype_read);
2778 if (type == dns_rdatatype_dname)
2779 return (DNS_R_DNAME);
2780 return (DNS_R_DELEGATION);
2783 static inline isc_boolean_t
2784 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2785 dns_rbtnode_t *node)
2787 unsigned char *raw; /* RDATASLAB */
2788 unsigned int count, size;
2790 isc_boolean_t valid = ISC_FALSE;
2791 dns_offsets_t offsets;
2792 isc_region_t region;
2793 rdatasetheader_t *header;
2796 * No additional locking is required.
2800 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2801 * if it occurs at a zone cut, but is not valid below it.
2803 if (type == dns_rdatatype_ns) {
2804 if (node != search->zonecut) {
2807 } else if (type != dns_rdatatype_a &&
2808 type != dns_rdatatype_aaaa &&
2809 type != dns_rdatatype_a6) {
2813 header = search->zonecut_rdataset;
2814 raw = (unsigned char *)header + sizeof(*header);
2815 count = raw[0] * 256 + raw[1];
2816 #if DNS_RDATASET_FIXED
2817 raw += 2 + (4 * count);
2824 size = raw[0] * 256 + raw[1];
2825 #if DNS_RDATASET_FIXED
2831 region.length = size;
2834 * XXX Until we have rdata structures, we have no choice but
2835 * to directly access the rdata format.
2837 dns_name_init(&ns_name, offsets);
2838 dns_name_fromregion(&ns_name, ®ion);
2839 if (dns_name_compare(&ns_name, name) == 0) {
2848 static inline isc_boolean_t
2849 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2852 dns_fixedname_t fnext;
2853 dns_fixedname_t forigin;
2858 dns_rbtnode_t *node;
2859 isc_result_t result;
2860 isc_boolean_t answer = ISC_FALSE;
2861 rdatasetheader_t *header;
2863 rbtdb = search->rbtdb;
2865 dns_name_init(&prefix, NULL);
2866 dns_fixedname_init(&fnext);
2867 next = dns_fixedname_name(&fnext);
2868 dns_fixedname_init(&forigin);
2869 origin = dns_fixedname_name(&forigin);
2871 result = dns_rbtnodechain_next(chain, NULL, NULL);
2872 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2874 result = dns_rbtnodechain_current(chain, &prefix,
2876 if (result != ISC_R_SUCCESS)
2878 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2879 isc_rwlocktype_read);
2880 for (header = node->data;
2882 header = header->next) {
2883 if (header->serial <= search->serial &&
2884 !IGNORE(header) && EXISTS(header))
2887 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2888 isc_rwlocktype_read);
2891 result = dns_rbtnodechain_next(chain, NULL, NULL);
2893 if (result == ISC_R_SUCCESS)
2894 result = dns_name_concatenate(&prefix, origin, next, NULL);
2895 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2900 static inline isc_boolean_t
2901 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2902 dns_fixedname_t fnext;
2903 dns_fixedname_t forigin;
2904 dns_fixedname_t fprev;
2912 dns_rbtnode_t *node;
2913 dns_rbtnodechain_t chain;
2914 isc_boolean_t check_next = ISC_TRUE;
2915 isc_boolean_t check_prev = ISC_TRUE;
2916 isc_boolean_t answer = ISC_FALSE;
2917 isc_result_t result;
2918 rdatasetheader_t *header;
2921 rbtdb = search->rbtdb;
2923 dns_name_init(&name, NULL);
2924 dns_name_init(&tname, NULL);
2925 dns_name_init(&rname, NULL);
2926 dns_fixedname_init(&fnext);
2927 next = dns_fixedname_name(&fnext);
2928 dns_fixedname_init(&fprev);
2929 prev = dns_fixedname_name(&fprev);
2930 dns_fixedname_init(&forigin);
2931 origin = dns_fixedname_name(&forigin);
2934 * Find if qname is at or below a empty node.
2935 * Use our own copy of the chain.
2938 chain = search->chain;
2941 result = dns_rbtnodechain_current(&chain, &name,
2943 if (result != ISC_R_SUCCESS)
2945 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2946 isc_rwlocktype_read);
2947 for (header = node->data;
2949 header = header->next) {
2950 if (header->serial <= search->serial &&
2951 !IGNORE(header) && EXISTS(header))
2954 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2955 isc_rwlocktype_read);
2958 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2959 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2960 if (result == ISC_R_SUCCESS)
2961 result = dns_name_concatenate(&name, origin, prev, NULL);
2962 if (result != ISC_R_SUCCESS)
2963 check_prev = ISC_FALSE;
2965 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2966 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2968 result = dns_rbtnodechain_current(&chain, &name,
2970 if (result != ISC_R_SUCCESS)
2972 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2973 isc_rwlocktype_read);
2974 for (header = node->data;
2976 header = header->next) {
2977 if (header->serial <= search->serial &&
2978 !IGNORE(header) && EXISTS(header))
2981 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2982 isc_rwlocktype_read);
2985 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2987 if (result == ISC_R_SUCCESS)
2988 result = dns_name_concatenate(&name, origin, next, NULL);
2989 if (result != ISC_R_SUCCESS)
2990 check_next = ISC_FALSE;
2992 dns_name_clone(qname, &rname);
2995 * Remove the wildcard label to find the terminal name.
2997 n = dns_name_countlabels(wname);
2998 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3001 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3002 (check_next && dns_name_issubdomain(next, &rname))) {
3007 * Remove the left hand label.
3009 n = dns_name_countlabels(&rname);
3010 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3011 } while (!dns_name_equal(&rname, &tname));
3015 static inline isc_result_t
3016 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3020 dns_rbtnode_t *node, *level_node, *wnode;
3021 rdatasetheader_t *header;
3022 isc_result_t result = ISC_R_NOTFOUND;
3025 dns_fixedname_t fwname;
3027 isc_boolean_t done, wild, active;
3028 dns_rbtnodechain_t wchain;
3031 * Caller must be holding the tree lock and MUST NOT be holding
3036 * Examine each ancestor level. If the level's wild bit
3037 * is set, then construct the corresponding wildcard name and
3038 * search for it. If the wildcard node exists, and is active in
3039 * this version, we're done. If not, then we next check to see
3040 * if the ancestor is active in this version. If so, then there
3041 * can be no possible wildcard match and again we're done. If not,
3042 * continue the search.
3045 rbtdb = search->rbtdb;
3046 i = search->chain.level_matches;
3050 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3051 isc_rwlocktype_read);
3054 * First we try to figure out if this node is active in
3055 * the search's version. We do this now, even though we
3056 * may not need the information, because it simplifies the
3057 * locking and code flow.
3059 for (header = node->data;
3061 header = header->next) {
3062 if (header->serial <= search->serial &&
3063 !IGNORE(header) && EXISTS(header))
3076 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3077 isc_rwlocktype_read);
3081 * Construct the wildcard name for this level.
3083 dns_name_init(&name, NULL);
3084 dns_rbt_namefromnode(node, &name);
3085 dns_fixedname_init(&fwname);
3086 wname = dns_fixedname_name(&fwname);
3087 result = dns_name_concatenate(dns_wildcardname, &name,
3090 while (result == ISC_R_SUCCESS && j != 0) {
3092 level_node = search->chain.levels[j];
3093 dns_name_init(&name, NULL);
3094 dns_rbt_namefromnode(level_node, &name);
3095 result = dns_name_concatenate(wname,
3100 if (result != ISC_R_SUCCESS)
3104 dns_rbtnodechain_init(&wchain, NULL);
3105 result = dns_rbt_findnode(rbtdb->tree, wname,
3106 NULL, &wnode, &wchain,
3107 DNS_RBTFIND_EMPTYDATA,
3109 if (result == ISC_R_SUCCESS) {
3113 * We have found the wildcard node. If it
3114 * is active in the search's version, we're
3117 lock = &rbtdb->node_locks[wnode->locknum].lock;
3118 NODE_LOCK(lock, isc_rwlocktype_read);
3119 for (header = wnode->data;
3121 header = header->next) {
3122 if (header->serial <= search->serial &&
3123 !IGNORE(header) && EXISTS(header))
3126 NODE_UNLOCK(lock, isc_rwlocktype_read);
3127 if (header != NULL ||
3128 activeempty(search, &wchain, wname)) {
3129 if (activeemtpynode(search, qname,
3131 return (ISC_R_NOTFOUND);
3134 * The wildcard node is active!
3136 * Note: result is still ISC_R_SUCCESS
3137 * so we don't have to set it.
3142 } else if (result != ISC_R_NOTFOUND &&
3143 result != DNS_R_PARTIALMATCH) {
3145 * An error has occurred. Bail out.
3153 * The level node is active. Any wildcarding
3154 * present at higher levels has no
3155 * effect and we're done.
3157 result = ISC_R_NOTFOUND;
3163 node = search->chain.levels[i];
3171 static isc_boolean_t
3172 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3174 dns_rdata_t rdata = DNS_RDATA_INIT;
3175 dns_rdata_nsec3_t nsec3;
3176 unsigned char *raw; /* RDATASLAB */
3177 unsigned int rdlen, count;
3178 isc_region_t region;
3179 isc_result_t result;
3181 REQUIRE(header->type == dns_rdatatype_nsec3);
3183 raw = (unsigned char *)header + sizeof(*header);
3184 count = raw[0] * 256 + raw[1]; /* count */
3185 #if DNS_RDATASET_FIXED
3186 raw += count * 4 + 2;
3190 while (count-- > 0) {
3191 rdlen = raw[0] * 256 + raw[1];
3192 #if DNS_RDATASET_FIXED
3198 region.length = rdlen;
3199 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3200 dns_rdatatype_nsec3, ®ion);
3202 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3203 INSIST(result == ISC_R_SUCCESS);
3204 if (nsec3.hash == search->rbtversion->hash &&
3205 nsec3.iterations == search->rbtversion->iterations &&
3206 nsec3.salt_length == search->rbtversion->salt_length &&
3207 memcmp(nsec3.salt, search->rbtversion->salt,
3208 nsec3.salt_length) == 0)
3210 dns_rdata_reset(&rdata);
3215 static inline isc_result_t
3216 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3217 dns_name_t *foundname, dns_rdataset_t *rdataset,
3218 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3219 dns_db_secure_t secure)
3221 dns_rbtnode_t *node;
3222 rdatasetheader_t *header, *header_next, *found, *foundsig;
3223 isc_boolean_t empty_node;
3224 isc_result_t result;
3225 dns_fixedname_t fname, forigin;
3226 dns_name_t *name, *origin;
3227 dns_rdatatype_t type;
3228 rbtdb_rdatatype_t sigtype;
3229 isc_boolean_t wraps;
3230 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3232 if (tree == search->rbtdb->nsec3) {
3233 type = dns_rdatatype_nsec3;
3234 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3237 type = dns_rdatatype_nsec;
3238 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3245 dns_fixedname_init(&fname);
3246 name = dns_fixedname_name(&fname);
3247 dns_fixedname_init(&forigin);
3248 origin = dns_fixedname_name(&forigin);
3249 result = dns_rbtnodechain_current(&search->chain, name,
3251 if (result != ISC_R_SUCCESS)
3253 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3254 isc_rwlocktype_read);
3257 empty_node = ISC_TRUE;
3258 for (header = node->data;
3260 header = header_next) {
3261 header_next = header->next;
3263 * Look for an active, extant NSEC or RRSIG NSEC.
3266 if (header->serial <= search->serial &&
3269 * Is this a "this rdataset doesn't
3272 if (NONEXISTENT(header))
3276 header = header->down;
3277 } while (header != NULL);
3278 if (header != NULL) {
3280 * We now know that there is at least one
3281 * active rdataset at this node.
3283 empty_node = ISC_FALSE;
3284 if (header->type == type) {
3286 if (foundsig != NULL)
3288 } else if (header->type == sigtype) {
3296 if (found != NULL && search->rbtversion->havensec3 &&
3297 found->type == dns_rdatatype_nsec3 &&
3298 !matchparams(found, search)) {
3299 empty_node = ISC_TRUE;
3302 result = dns_rbtnodechain_prev(&search->chain,
3304 } else if (found != NULL &&
3305 (foundsig != NULL || !need_sig))
3308 * We've found the right NSEC/NSEC3 record.
3310 * Note: for this to really be the right
3311 * NSEC record, it's essential that the NSEC
3312 * records of any nodes obscured by a zone
3313 * cut have been removed; we assume this is
3316 result = dns_name_concatenate(name, origin,
3318 if (result == ISC_R_SUCCESS) {
3319 if (nodep != NULL) {
3320 new_reference(search->rbtdb,
3324 bind_rdataset(search->rbtdb, node,
3327 if (foundsig != NULL)
3328 bind_rdataset(search->rbtdb,
3334 } else if (found == NULL && foundsig == NULL) {
3336 * This node is active, but has no NSEC or
3337 * RRSIG NSEC. That means it's glue or
3338 * other obscured zone data that isn't
3339 * relevant for our search. Treat the
3340 * node as if it were empty and keep looking.
3342 empty_node = ISC_TRUE;
3343 result = dns_rbtnodechain_prev(&search->chain,
3347 * We found an active node, but either the
3348 * NSEC or the RRSIG NSEC is missing. This
3351 result = DNS_R_BADDB;
3355 * This node isn't active. We've got to keep
3358 result = dns_rbtnodechain_prev(&search->chain, NULL,
3361 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3362 isc_rwlocktype_read);
3363 } while (empty_node && result == ISC_R_SUCCESS);
3365 if (result == ISC_R_NOMORE && wraps) {
3366 result = dns_rbtnodechain_last(&search->chain, tree,
3368 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3375 * If the result is ISC_R_NOMORE, then we got to the beginning of
3376 * the database and didn't find a NSEC record. This shouldn't
3379 if (result == ISC_R_NOMORE)
3380 result = DNS_R_BADDB;
3386 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3387 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3388 dns_dbnode_t **nodep, dns_name_t *foundname,
3389 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3391 dns_rbtnode_t *node = NULL;
3392 isc_result_t result;
3393 rbtdb_search_t search;
3394 isc_boolean_t cname_ok = ISC_TRUE;
3395 isc_boolean_t close_version = ISC_FALSE;
3396 isc_boolean_t maybe_zonecut = ISC_FALSE;
3397 isc_boolean_t at_zonecut = ISC_FALSE;
3399 isc_boolean_t empty_node;
3400 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3401 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3402 rbtdb_rdatatype_t sigtype;
3403 isc_boolean_t active;
3404 dns_rbtnodechain_t chain;
3408 search.rbtdb = (dns_rbtdb_t *)db;
3410 REQUIRE(VALID_RBTDB(search.rbtdb));
3413 * We don't care about 'now'.
3418 * If the caller didn't supply a version, attach to the current
3421 if (version == NULL) {
3422 currentversion(db, &version);
3423 close_version = ISC_TRUE;
3426 search.rbtversion = version;
3427 search.serial = search.rbtversion->serial;
3428 search.options = options;
3429 search.copy_name = ISC_FALSE;
3430 search.need_cleanup = ISC_FALSE;
3431 search.wild = ISC_FALSE;
3432 search.zonecut = NULL;
3433 dns_fixedname_init(&search.zonecut_name);
3434 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3438 * 'wild' will be true iff. we've matched a wildcard.
3442 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3445 * Search down from the root of the tree. If, while going down, we
3446 * encounter a callback node, zone_zonecut_callback() will search the
3447 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3449 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3451 result = dns_rbt_findnode(tree, name, foundname, &node,
3452 &search.chain, DNS_RBTFIND_EMPTYDATA,
3453 zone_zonecut_callback, &search);
3455 if (result == DNS_R_PARTIALMATCH) {
3457 if (search.zonecut != NULL) {
3458 result = setup_delegation(&search, nodep, foundname,
3459 rdataset, sigrdataset);
3465 * At least one of the levels in the search chain
3466 * potentially has a wildcard. For each such level,
3467 * we must see if there's a matching wildcard active
3468 * in the current version.
3470 result = find_wildcard(&search, &node, name);
3471 if (result == ISC_R_SUCCESS) {
3472 result = dns_name_copy(name, foundname, NULL);
3473 if (result != ISC_R_SUCCESS)
3478 else if (result != ISC_R_NOTFOUND)
3482 chain = search.chain;
3483 active = activeempty(&search, &chain, name);
3486 * If we're here, then the name does not exist, is not
3487 * beneath a zonecut, and there's no matching wildcard.
3489 if ((search.rbtversion->secure == dns_db_secure &&
3490 !search.rbtversion->havensec3) ||
3491 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3492 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3494 result = find_closest_nsec(&search, nodep, foundname,
3495 rdataset, sigrdataset, tree,
3496 search.rbtversion->secure);
3497 if (result == ISC_R_SUCCESS)
3498 result = active ? DNS_R_EMPTYNAME :
3501 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3503 } else if (result != ISC_R_SUCCESS)
3508 * We have found a node whose name is the desired name, or we
3509 * have matched a wildcard.
3512 if (search.zonecut != NULL) {
3514 * If we're beneath a zone cut, we don't want to look for
3515 * CNAMEs because they're not legitimate zone glue.
3517 cname_ok = ISC_FALSE;
3520 * The node may be a zone cut itself. If it might be one,
3521 * make sure we check for it later.
3523 * DS records live above the zone cut in ordinary zone so
3524 * we want to ignore any referral.
3526 * Stub zones don't have anything "above" the delgation so
3527 * we always return a referral.
3529 if (node->find_callback &&
3530 ((node != search.rbtdb->origin_node &&
3531 !dns_rdatatype_atparent(type)) ||
3532 IS_STUB(search.rbtdb)))
3533 maybe_zonecut = ISC_TRUE;
3537 * Certain DNSSEC types are not subject to CNAME matching
3538 * (RFC4035, section 2.5 and RFC3007).
3540 * We don't check for RRSIG, because we don't store RRSIG records
3543 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3544 cname_ok = ISC_FALSE;
3547 * We now go looking for rdata...
3550 lock = &search.rbtdb->node_locks[node->locknum].lock;
3551 NODE_LOCK(lock, isc_rwlocktype_read);
3555 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3559 empty_node = ISC_TRUE;
3560 for (header = node->data; header != NULL; header = header_next) {
3561 header_next = header->next;
3563 * Look for an active, extant rdataset.
3566 if (header->serial <= search.serial &&
3569 * Is this a "this rdataset doesn't
3572 if (NONEXISTENT(header))
3576 header = header->down;
3577 } while (header != NULL);
3578 if (header != NULL) {
3580 * We now know that there is at least one active
3581 * rdataset at this node.
3583 empty_node = ISC_FALSE;
3586 * Do special zone cut handling, if requested.
3588 if (maybe_zonecut &&
3589 header->type == dns_rdatatype_ns) {
3591 * We increment the reference count on node to
3592 * ensure that search->zonecut_rdataset will
3593 * still be valid later.
3595 new_reference(search.rbtdb, node);
3596 search.zonecut = node;
3597 search.zonecut_rdataset = header;
3598 search.zonecut_sigrdataset = NULL;
3599 search.need_cleanup = ISC_TRUE;
3600 maybe_zonecut = ISC_FALSE;
3601 at_zonecut = ISC_TRUE;
3603 * It is not clear if KEY should still be
3604 * allowed at the parent side of the zone
3605 * cut or not. It is needed for RFC3007
3606 * validated updates.
3608 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3609 && type != dns_rdatatype_nsec
3610 && type != dns_rdatatype_key) {
3612 * Glue is not OK, but any answer we
3613 * could return would be glue. Return
3619 if (found != NULL && foundsig != NULL)
3625 * If the NSEC3 record doesn't match the chain
3626 * we are using behave as if it isn't here.
3628 if (header->type == dns_rdatatype_nsec3 &&
3629 !matchparams(header, &search)) {
3630 NODE_UNLOCK(lock, isc_rwlocktype_read);
3634 * If we found a type we were looking for,
3637 if (header->type == type ||
3638 type == dns_rdatatype_any ||
3639 (header->type == dns_rdatatype_cname &&
3642 * We've found the answer!
3645 if (header->type == dns_rdatatype_cname &&
3648 * We may be finding a CNAME instead
3649 * of the desired type.
3651 * If we've already got the CNAME RRSIG,
3652 * use it, otherwise change sigtype
3653 * so that we find it.
3655 if (cnamesig != NULL)
3656 foundsig = cnamesig;
3659 RBTDB_RDATATYPE_SIGCNAME;
3662 * If we've got all we need, end the search.
3664 if (!maybe_zonecut && foundsig != NULL)
3666 } else if (header->type == sigtype) {
3668 * We've found the RRSIG rdataset for our
3669 * target type. Remember it.
3673 * If we've got all we need, end the search.
3675 if (!maybe_zonecut && found != NULL)
3677 } else if (header->type == dns_rdatatype_nsec &&
3678 !search.rbtversion->havensec3) {
3680 * Remember a NSEC rdataset even if we're
3681 * not specifically looking for it, because
3682 * we might need it later.
3684 nsecheader = header;
3685 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3686 !search.rbtversion->havensec3) {
3688 * If we need the NSEC rdataset, we'll also
3689 * need its signature.
3692 } else if (cname_ok &&
3693 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3695 * If we get a CNAME match, we'll also need
3705 * We have an exact match for the name, but there are no
3706 * active rdatasets in the desired version. That means that
3707 * this node doesn't exist in the desired version, and that
3708 * we really have a partial match.
3711 NODE_UNLOCK(lock, isc_rwlocktype_read);
3717 * If we didn't find what we were looking for...
3719 if (found == NULL) {
3720 if (search.zonecut != NULL) {
3722 * We were trying to find glue at a node beneath a
3723 * zone cut, but didn't.
3725 * Return the delegation.
3727 NODE_UNLOCK(lock, isc_rwlocktype_read);
3728 result = setup_delegation(&search, nodep, foundname,
3729 rdataset, sigrdataset);
3733 * The desired type doesn't exist.
3735 result = DNS_R_NXRRSET;
3736 if (search.rbtversion->secure == dns_db_secure &&
3737 !search.rbtversion->havensec3 &&
3738 (nsecheader == NULL || nsecsig == NULL)) {
3740 * The zone is secure but there's no NSEC,
3741 * or the NSEC has no signature!
3744 result = DNS_R_BADDB;
3748 NODE_UNLOCK(lock, isc_rwlocktype_read);
3749 result = find_closest_nsec(&search, nodep, foundname,
3750 rdataset, sigrdataset,
3752 search.rbtversion->secure);
3753 if (result == ISC_R_SUCCESS)
3754 result = DNS_R_EMPTYWILD;
3757 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3761 * There's no NSEC record, and we were told
3764 result = DNS_R_BADDB;
3767 if (nodep != NULL) {
3768 new_reference(search.rbtdb, node);
3771 if ((search.rbtversion->secure == dns_db_secure &&
3772 !search.rbtversion->havensec3) ||
3773 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3775 bind_rdataset(search.rbtdb, node, nsecheader,
3777 if (nsecsig != NULL)
3778 bind_rdataset(search.rbtdb, node,
3779 nsecsig, 0, sigrdataset);
3782 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3787 * We found what we were looking for, or we found a CNAME.
3790 if (type != found->type &&
3791 type != dns_rdatatype_any &&
3792 found->type == dns_rdatatype_cname) {
3794 * We weren't doing an ANY query and we found a CNAME instead
3795 * of the type we were looking for, so we need to indicate
3796 * that result to the caller.
3798 result = DNS_R_CNAME;
3799 } else if (search.zonecut != NULL) {
3801 * If we're beneath a zone cut, we must indicate that the
3802 * result is glue, unless we're actually at the zone cut
3803 * and the type is NSEC or KEY.
3805 if (search.zonecut == node) {
3807 * It is not clear if KEY should still be
3808 * allowed at the parent side of the zone
3809 * cut or not. It is needed for RFC3007
3810 * validated updates.
3812 if (type == dns_rdatatype_nsec ||
3813 type == dns_rdatatype_nsec3 ||
3814 type == dns_rdatatype_key)
3815 result = ISC_R_SUCCESS;
3816 else if (type == dns_rdatatype_any)
3817 result = DNS_R_ZONECUT;
3819 result = DNS_R_GLUE;
3821 result = DNS_R_GLUE;
3823 * We might have found data that isn't glue, but was occluded
3824 * by a dynamic update. If the caller cares about this, they
3825 * will have told us to validate glue.
3827 * XXX We should cache the glue validity state!
3829 if (result == DNS_R_GLUE &&
3830 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3831 !valid_glue(&search, foundname, type, node)) {
3832 NODE_UNLOCK(lock, isc_rwlocktype_read);
3833 result = setup_delegation(&search, nodep, foundname,
3834 rdataset, sigrdataset);
3839 * An ordinary successful query!
3841 result = ISC_R_SUCCESS;
3844 if (nodep != NULL) {
3846 new_reference(search.rbtdb, node);
3848 search.need_cleanup = ISC_FALSE;
3852 if (type != dns_rdatatype_any) {
3853 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3854 if (foundsig != NULL)
3855 bind_rdataset(search.rbtdb, node, foundsig, 0,
3860 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3863 NODE_UNLOCK(lock, isc_rwlocktype_read);
3866 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3869 * If we found a zonecut but aren't going to use it, we have to
3872 if (search.need_cleanup) {
3873 node = search.zonecut;
3874 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3876 NODE_LOCK(lock, isc_rwlocktype_read);
3877 decrement_reference(search.rbtdb, node, 0,
3878 isc_rwlocktype_read, isc_rwlocktype_none,
3880 NODE_UNLOCK(lock, isc_rwlocktype_read);
3884 closeversion(db, &version, ISC_FALSE);
3886 dns_rbtnodechain_reset(&search.chain);
3892 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3893 isc_stdtime_t now, dns_dbnode_t **nodep,
3894 dns_name_t *foundname,
3895 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3904 UNUSED(sigrdataset);
3906 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3908 return (ISC_R_NOTIMPLEMENTED);
3912 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3913 rbtdb_search_t *search = arg;
3914 rdatasetheader_t *header, *header_prev, *header_next;
3915 rdatasetheader_t *dname_header, *sigdname_header;
3916 isc_result_t result;
3918 isc_rwlocktype_t locktype;
3922 REQUIRE(search->zonecut == NULL);
3925 * Keep compiler silent.
3929 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3930 locktype = isc_rwlocktype_read;
3931 NODE_LOCK(lock, locktype);
3934 * Look for a DNAME or RRSIG DNAME rdataset.
3936 dname_header = NULL;
3937 sigdname_header = NULL;
3939 for (header = node->data; header != NULL; header = header_next) {
3940 header_next = header->next;
3941 if (header->rdh_ttl <= search->now) {
3943 * This rdataset is stale. If no one else is
3944 * using the node, we can clean it up right
3945 * now, otherwise we mark it as stale, and
3946 * the node as dirty, so it will get cleaned
3949 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3950 (locktype == isc_rwlocktype_write ||
3951 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3953 * We update the node's status only when we
3954 * can get write access; otherwise, we leave
3955 * others to this work. Periodical cleaning
3956 * will eventually take the job as the last
3958 * We won't downgrade the lock, since other
3959 * rdatasets are probably stale, too.
3961 locktype = isc_rwlocktype_write;
3963 if (dns_rbtnode_refcurrent(node) == 0) {
3967 * header->down can be non-NULL if the
3968 * refcount has just decremented to 0
3969 * but decrement_reference() has not
3970 * performed clean_cache_node(), in
3971 * which case we need to purge the
3972 * stale headers first.
3974 mctx = search->rbtdb->common.mctx;
3975 clean_stale_headers(search->rbtdb,
3978 if (header_prev != NULL)
3982 node->data = header->next;
3983 free_rdataset(search->rbtdb, mctx,
3986 header->attributes |=
3987 RDATASET_ATTR_STALE;
3989 header_prev = header;
3992 header_prev = header;
3993 } else if (header->type == dns_rdatatype_dname &&
3995 dname_header = header;
3996 header_prev = header;
3997 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3999 sigdname_header = header;
4000 header_prev = header;
4002 header_prev = header;
4005 if (dname_header != NULL &&
4006 (!DNS_TRUST_PENDING(dname_header->trust) ||
4007 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4009 * We increment the reference count on node to ensure that
4010 * search->zonecut_rdataset will still be valid later.
4012 new_reference(search->rbtdb, node);
4013 INSIST(!ISC_LINK_LINKED(node, deadlink));
4014 search->zonecut = node;
4015 search->zonecut_rdataset = dname_header;
4016 search->zonecut_sigrdataset = sigdname_header;
4017 search->need_cleanup = ISC_TRUE;
4018 result = DNS_R_PARTIALMATCH;
4020 result = DNS_R_CONTINUE;
4022 NODE_UNLOCK(lock, locktype);
4027 static inline isc_result_t
4028 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4029 dns_dbnode_t **nodep, dns_name_t *foundname,
4030 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4033 dns_rbtnode_t *level_node;
4034 rdatasetheader_t *header, *header_prev, *header_next;
4035 rdatasetheader_t *found, *foundsig;
4036 isc_result_t result = ISC_R_NOTFOUND;
4041 isc_rwlocktype_t locktype;
4044 * Caller must be holding the tree lock.
4047 rbtdb = search->rbtdb;
4048 i = search->chain.level_matches;
4051 locktype = isc_rwlocktype_read;
4052 lock = &rbtdb->node_locks[node->locknum].lock;
4053 NODE_LOCK(lock, locktype);
4056 * Look for NS and RRSIG NS rdatasets.
4061 for (header = node->data;
4063 header = header_next) {
4064 header_next = header->next;
4065 if (header->rdh_ttl <= search->now) {
4067 * This rdataset is stale. If no one else is
4068 * using the node, we can clean it up right
4069 * now, otherwise we mark it as stale, and
4070 * the node as dirty, so it will get cleaned
4073 if ((header->rdh_ttl <= search->now -
4075 (locktype == isc_rwlocktype_write ||
4076 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4078 * We update the node's status only
4079 * when we can get write access.
4081 locktype = isc_rwlocktype_write;
4083 if (dns_rbtnode_refcurrent(node)
4087 m = search->rbtdb->common.mctx;
4088 clean_stale_headers(
4091 if (header_prev != NULL)
4097 free_rdataset(rbtdb, m,
4100 header->attributes |=
4101 RDATASET_ATTR_STALE;
4103 header_prev = header;
4106 header_prev = header;
4107 } else if (EXISTS(header)) {
4109 * We've found an extant rdataset. See if
4110 * we're interested in it.
4112 if (header->type == dns_rdatatype_ns) {
4114 if (foundsig != NULL)
4116 } else if (header->type ==
4117 RBTDB_RDATATYPE_SIGNS) {
4122 header_prev = header;
4124 header_prev = header;
4127 if (found != NULL) {
4129 * If we have to set foundname, we do it before
4130 * anything else. If we were to set foundname after
4131 * we had set nodep or bound the rdataset, then we'd
4132 * have to undo that work if dns_name_concatenate()
4133 * failed. By setting foundname first, there's
4134 * nothing to undo if we have trouble.
4136 if (foundname != NULL) {
4137 dns_name_init(&name, NULL);
4138 dns_rbt_namefromnode(node, &name);
4139 result = dns_name_copy(&name, foundname, NULL);
4140 while (result == ISC_R_SUCCESS && i > 0) {
4142 level_node = search->chain.levels[i];
4143 dns_name_init(&name, NULL);
4144 dns_rbt_namefromnode(level_node,
4147 dns_name_concatenate(foundname,
4152 if (result != ISC_R_SUCCESS) {
4157 result = DNS_R_DELEGATION;
4158 if (nodep != NULL) {
4159 new_reference(search->rbtdb, node);
4162 bind_rdataset(search->rbtdb, node, found, search->now,
4164 if (foundsig != NULL)
4165 bind_rdataset(search->rbtdb, node, foundsig,
4166 search->now, sigrdataset);
4167 if (need_headerupdate(found, search->now) ||
4168 (foundsig != NULL &&
4169 need_headerupdate(foundsig, search->now))) {
4170 if (locktype != isc_rwlocktype_write) {
4171 NODE_UNLOCK(lock, locktype);
4172 NODE_LOCK(lock, isc_rwlocktype_write);
4173 locktype = isc_rwlocktype_write;
4175 if (need_headerupdate(found, search->now))
4176 update_header(search->rbtdb, found,
4178 if (foundsig != NULL &&
4179 need_headerupdate(foundsig, search->now)) {
4180 update_header(search->rbtdb, foundsig,
4187 NODE_UNLOCK(lock, locktype);
4189 if (found == NULL && i > 0) {
4191 node = search->chain.levels[i];
4201 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4202 isc_stdtime_t now, dns_name_t *foundname,
4203 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4205 dns_rbtnode_t *node;
4206 rdatasetheader_t *header, *header_next, *header_prev;
4207 rdatasetheader_t *found, *foundsig;
4208 isc_boolean_t empty_node;
4209 isc_result_t result;
4210 dns_fixedname_t fname, forigin;
4211 dns_name_t *name, *origin;
4212 rbtdb_rdatatype_t matchtype, sigmatchtype;
4214 isc_rwlocktype_t locktype;
4216 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4217 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4218 dns_rdatatype_nsec);
4222 dns_fixedname_init(&fname);
4223 name = dns_fixedname_name(&fname);
4224 dns_fixedname_init(&forigin);
4225 origin = dns_fixedname_name(&forigin);
4226 result = dns_rbtnodechain_current(&search->chain, name,
4228 if (result != ISC_R_SUCCESS)
4230 locktype = isc_rwlocktype_read;
4231 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4232 NODE_LOCK(lock, locktype);
4235 empty_node = ISC_TRUE;
4237 for (header = node->data;
4239 header = header_next) {
4240 header_next = header->next;
4241 if (header->rdh_ttl <= now) {
4243 * This rdataset is stale. If no one else is
4244 * using the node, we can clean it up right
4245 * now, otherwise we mark it as stale, and the
4246 * node as dirty, so it will get cleaned up
4249 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4250 (locktype == isc_rwlocktype_write ||
4251 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4253 * We update the node's status only
4254 * when we can get write access.
4256 locktype = isc_rwlocktype_write;
4258 if (dns_rbtnode_refcurrent(node)
4262 m = search->rbtdb->common.mctx;
4263 clean_stale_headers(
4266 if (header_prev != NULL)
4270 node->data = header->next;
4271 free_rdataset(search->rbtdb, m,
4274 header->attributes |=
4275 RDATASET_ATTR_STALE;
4277 header_prev = header;
4280 header_prev = header;
4283 if (NONEXISTENT(header) ||
4284 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4285 header_prev = header;
4288 empty_node = ISC_FALSE;
4289 if (header->type == matchtype)
4291 else if (header->type == sigmatchtype)
4293 header_prev = header;
4295 if (found != NULL) {
4296 result = dns_name_concatenate(name, origin,
4298 if (result != ISC_R_SUCCESS)
4300 bind_rdataset(search->rbtdb, node, found,
4302 if (foundsig != NULL)
4303 bind_rdataset(search->rbtdb, node, foundsig,
4305 new_reference(search->rbtdb, node);
4307 result = DNS_R_COVERINGNSEC;
4308 } else if (!empty_node) {
4309 result = ISC_R_NOTFOUND;
4311 result = dns_rbtnodechain_prev(&search->chain, NULL,
4314 NODE_UNLOCK(lock, locktype);
4315 } while (empty_node && result == ISC_R_SUCCESS);
4320 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4321 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4322 dns_dbnode_t **nodep, dns_name_t *foundname,
4323 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4325 dns_rbtnode_t *node = NULL;
4326 isc_result_t result;
4327 rbtdb_search_t search;
4328 isc_boolean_t cname_ok = ISC_TRUE;
4329 isc_boolean_t empty_node;
4331 isc_rwlocktype_t locktype;
4332 rdatasetheader_t *header, *header_prev, *header_next;
4333 rdatasetheader_t *found, *nsheader;
4334 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4335 rdatasetheader_t *update, *updatesig;
4336 rbtdb_rdatatype_t sigtype, negtype;
4340 search.rbtdb = (dns_rbtdb_t *)db;
4342 REQUIRE(VALID_RBTDB(search.rbtdb));
4343 REQUIRE(version == NULL);
4346 isc_stdtime_get(&now);
4348 search.rbtversion = NULL;
4350 search.options = options;
4351 search.copy_name = ISC_FALSE;
4352 search.need_cleanup = ISC_FALSE;
4353 search.wild = ISC_FALSE;
4354 search.zonecut = NULL;
4355 dns_fixedname_init(&search.zonecut_name);
4356 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4361 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4364 * Search down from the root of the tree. If, while going down, we
4365 * encounter a callback node, cache_zonecut_callback() will search the
4366 * rdatasets at the zone cut for a DNAME rdataset.
4368 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4369 &search.chain, DNS_RBTFIND_EMPTYDATA,
4370 cache_zonecut_callback, &search);
4372 if (result == DNS_R_PARTIALMATCH) {
4373 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4374 result = find_coveringnsec(&search, nodep, now,
4375 foundname, rdataset,
4377 if (result == DNS_R_COVERINGNSEC)
4380 if (search.zonecut != NULL) {
4381 result = setup_delegation(&search, nodep, foundname,
4382 rdataset, sigrdataset);
4386 result = find_deepest_zonecut(&search, node, nodep,
4387 foundname, rdataset,
4391 } else if (result != ISC_R_SUCCESS)
4395 * Certain DNSSEC types are not subject to CNAME matching
4396 * (RFC4035, section 2.5 and RFC3007).
4398 * We don't check for RRSIG, because we don't store RRSIG records
4401 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4402 cname_ok = ISC_FALSE;
4405 * We now go looking for rdata...
4408 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4409 locktype = isc_rwlocktype_read;
4410 NODE_LOCK(lock, locktype);
4414 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4415 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4419 empty_node = ISC_TRUE;
4421 for (header = node->data; header != NULL; header = header_next) {
4422 header_next = header->next;
4423 if (header->rdh_ttl <= now) {
4425 * This rdataset is stale. If no one else is using the
4426 * node, we can clean it up right now, otherwise we
4427 * mark it as stale, and the node as dirty, so it will
4428 * get cleaned up later.
4430 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4431 (locktype == isc_rwlocktype_write ||
4432 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4434 * We update the node's status only when we
4435 * can get write access.
4437 locktype = isc_rwlocktype_write;
4439 if (dns_rbtnode_refcurrent(node) == 0) {
4442 mctx = search.rbtdb->common.mctx;
4443 clean_stale_headers(search.rbtdb, mctx,
4445 if (header_prev != NULL)
4449 node->data = header->next;
4450 free_rdataset(search.rbtdb, mctx,
4453 header->attributes |=
4454 RDATASET_ATTR_STALE;
4456 header_prev = header;
4459 header_prev = header;
4460 } else if (EXISTS(header)) {
4462 * We now know that there is at least one active
4463 * non-stale rdataset at this node.
4465 empty_node = ISC_FALSE;
4468 * If we found a type we were looking for, remember
4471 if (header->type == type ||
4472 (type == dns_rdatatype_any &&
4473 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4474 (cname_ok && header->type ==
4475 dns_rdatatype_cname)) {
4477 * We've found the answer.
4480 if (header->type == dns_rdatatype_cname &&
4484 * If we've already got the CNAME RRSIG,
4485 * use it, otherwise change sigtype
4486 * so that we find it.
4488 if (cnamesig != NULL)
4489 foundsig = cnamesig;
4492 RBTDB_RDATATYPE_SIGCNAME;
4493 foundsig = cnamesig;
4495 } else if (header->type == sigtype) {
4497 * We've found the RRSIG rdataset for our
4498 * target type. Remember it.
4501 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4502 header->type == negtype) {
4504 * We've found a negative cache entry.
4507 } else if (header->type == dns_rdatatype_ns) {
4509 * Remember a NS rdataset even if we're
4510 * not specifically looking for it, because
4511 * we might need it later.
4514 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4516 * If we need the NS rdataset, we'll also
4517 * need its signature.
4520 } else if (cname_ok &&
4521 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4523 * If we get a CNAME match, we'll also need
4528 header_prev = header;
4530 header_prev = header;
4535 * We have an exact match for the name, but there are no
4536 * extant rdatasets. That means that this node doesn't
4537 * meaningfully exist, and that we really have a partial match.
4539 NODE_UNLOCK(lock, locktype);
4544 * If we didn't find what we were looking for...
4546 if (found == NULL ||
4547 (DNS_TRUST_ADDITIONAL(found->trust) &&
4548 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4549 (found->trust == dns_trust_glue &&
4550 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4551 (DNS_TRUST_PENDING(found->trust) &&
4552 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4554 * If there is an NS rdataset at this node, then this is the
4557 if (nsheader != NULL) {
4558 if (nodep != NULL) {
4559 new_reference(search.rbtdb, node);
4560 INSIST(!ISC_LINK_LINKED(node, deadlink));
4563 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4565 if (need_headerupdate(nsheader, search.now))
4567 if (nssig != NULL) {
4568 bind_rdataset(search.rbtdb, node, nssig,
4569 search.now, sigrdataset);
4570 if (need_headerupdate(nssig, search.now))
4573 result = DNS_R_DELEGATION;
4578 * Go find the deepest zone cut.
4580 NODE_UNLOCK(lock, locktype);
4585 * We found what we were looking for, or we found a CNAME.
4588 if (nodep != NULL) {
4589 new_reference(search.rbtdb, node);
4590 INSIST(!ISC_LINK_LINKED(node, deadlink));
4594 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4596 * We found a negative cache entry.
4598 if (NXDOMAIN(found))
4599 result = DNS_R_NCACHENXDOMAIN;
4601 result = DNS_R_NCACHENXRRSET;
4602 } else if (type != found->type &&
4603 type != dns_rdatatype_any &&
4604 found->type == dns_rdatatype_cname) {
4606 * We weren't doing an ANY query and we found a CNAME instead
4607 * of the type we were looking for, so we need to indicate
4608 * that result to the caller.
4610 result = DNS_R_CNAME;
4613 * An ordinary successful query!
4615 result = ISC_R_SUCCESS;
4618 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4619 result == DNS_R_NCACHENXRRSET) {
4620 bind_rdataset(search.rbtdb, node, found, search.now,
4622 if (need_headerupdate(found, search.now))
4624 if (!NEGATIVE(found) && foundsig != NULL) {
4625 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4627 if (need_headerupdate(foundsig, search.now))
4628 updatesig = foundsig;
4633 if ((update != NULL || updatesig != NULL) &&
4634 locktype != isc_rwlocktype_write) {
4635 NODE_UNLOCK(lock, locktype);
4636 NODE_LOCK(lock, isc_rwlocktype_write);
4637 locktype = isc_rwlocktype_write;
4639 if (update != NULL && need_headerupdate(update, search.now))
4640 update_header(search.rbtdb, update, search.now);
4641 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4642 update_header(search.rbtdb, updatesig, search.now);
4644 NODE_UNLOCK(lock, locktype);
4647 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4650 * If we found a zonecut but aren't going to use it, we have to
4653 if (search.need_cleanup) {
4654 node = search.zonecut;
4655 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4657 NODE_LOCK(lock, isc_rwlocktype_read);
4658 decrement_reference(search.rbtdb, node, 0,
4659 isc_rwlocktype_read, isc_rwlocktype_none,
4661 NODE_UNLOCK(lock, isc_rwlocktype_read);
4664 dns_rbtnodechain_reset(&search.chain);
4670 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4671 isc_stdtime_t now, dns_dbnode_t **nodep,
4672 dns_name_t *foundname,
4673 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4675 dns_rbtnode_t *node = NULL;
4677 isc_result_t result;
4678 rbtdb_search_t search;
4679 rdatasetheader_t *header, *header_prev, *header_next;
4680 rdatasetheader_t *found, *foundsig;
4681 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4682 isc_rwlocktype_t locktype;
4684 search.rbtdb = (dns_rbtdb_t *)db;
4686 REQUIRE(VALID_RBTDB(search.rbtdb));
4689 isc_stdtime_get(&now);
4691 search.rbtversion = NULL;
4693 search.options = options;
4694 search.copy_name = ISC_FALSE;
4695 search.need_cleanup = ISC_FALSE;
4696 search.wild = ISC_FALSE;
4697 search.zonecut = NULL;
4698 dns_fixedname_init(&search.zonecut_name);
4699 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4702 if ((options & DNS_DBFIND_NOEXACT) != 0)
4703 rbtoptions |= DNS_RBTFIND_NOEXACT;
4705 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4708 * Search down from the root of the tree.
4710 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4711 &search.chain, rbtoptions, NULL, &search);
4713 if (result == DNS_R_PARTIALMATCH) {
4715 result = find_deepest_zonecut(&search, node, nodep, foundname,
4716 rdataset, sigrdataset);
4718 } else if (result != ISC_R_SUCCESS)
4722 * We now go looking for an NS rdataset at the node.
4725 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4726 locktype = isc_rwlocktype_read;
4727 NODE_LOCK(lock, locktype);
4732 for (header = node->data; header != NULL; header = header_next) {
4733 header_next = header->next;
4734 if (header->rdh_ttl <= now) {
4736 * This rdataset is stale. If no one else is using the
4737 * node, we can clean it up right now, otherwise we
4738 * mark it as stale, and the node as dirty, so it will
4739 * get cleaned up later.
4741 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4742 (locktype == isc_rwlocktype_write ||
4743 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4745 * We update the node's status only when we
4746 * can get write access.
4748 locktype = isc_rwlocktype_write;
4750 if (dns_rbtnode_refcurrent(node) == 0) {
4753 mctx = search.rbtdb->common.mctx;
4754 clean_stale_headers(search.rbtdb, mctx,
4756 if (header_prev != NULL)
4760 node->data = header->next;
4761 free_rdataset(search.rbtdb, mctx,
4764 header->attributes |=
4765 RDATASET_ATTR_STALE;
4767 header_prev = header;
4770 header_prev = header;
4771 } else if (EXISTS(header)) {
4773 * If we found a type we were looking for, remember
4776 if (header->type == dns_rdatatype_ns) {
4778 * Remember a NS rdataset even if we're
4779 * not specifically looking for it, because
4780 * we might need it later.
4783 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4785 * If we need the NS rdataset, we'll also
4786 * need its signature.
4790 header_prev = header;
4792 header_prev = header;
4795 if (found == NULL) {
4797 * No NS records here.
4799 NODE_UNLOCK(lock, locktype);
4803 if (nodep != NULL) {
4804 new_reference(search.rbtdb, node);
4805 INSIST(!ISC_LINK_LINKED(node, deadlink));
4809 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4810 if (foundsig != NULL)
4811 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4814 if (need_headerupdate(found, search.now) ||
4815 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4816 if (locktype != isc_rwlocktype_write) {
4817 NODE_UNLOCK(lock, locktype);
4818 NODE_LOCK(lock, isc_rwlocktype_write);
4819 locktype = isc_rwlocktype_write;
4821 if (need_headerupdate(found, search.now))
4822 update_header(search.rbtdb, found, search.now);
4823 if (foundsig != NULL &&
4824 need_headerupdate(foundsig, search.now)) {
4825 update_header(search.rbtdb, foundsig, search.now);
4829 NODE_UNLOCK(lock, locktype);
4832 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4834 INSIST(!search.need_cleanup);
4836 dns_rbtnodechain_reset(&search.chain);
4838 if (result == DNS_R_DELEGATION)
4839 result = ISC_R_SUCCESS;
4845 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4846 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4847 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4850 REQUIRE(VALID_RBTDB(rbtdb));
4851 REQUIRE(targetp != NULL && *targetp == NULL);
4853 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4854 dns_rbtnode_refincrement(node, &refs);
4856 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4862 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4863 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4864 dns_rbtnode_t *node;
4865 isc_boolean_t want_free = ISC_FALSE;
4866 isc_boolean_t inactive = ISC_FALSE;
4867 rbtdb_nodelock_t *nodelock;
4869 REQUIRE(VALID_RBTDB(rbtdb));
4870 REQUIRE(targetp != NULL && *targetp != NULL);
4872 node = (dns_rbtnode_t *)(*targetp);
4873 nodelock = &rbtdb->node_locks[node->locknum];
4875 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4877 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4878 isc_rwlocktype_none, ISC_FALSE)) {
4879 if (isc_refcount_current(&nodelock->references) == 0 &&
4880 nodelock->exiting) {
4881 inactive = ISC_TRUE;
4885 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4890 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4892 if (rbtdb->active == 0)
4893 want_free = ISC_TRUE;
4894 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4896 char buf[DNS_NAME_FORMATSIZE];
4897 if (dns_name_dynamic(&rbtdb->common.origin))
4898 dns_name_format(&rbtdb->common.origin, buf,
4901 strcpy(buf, "<UNKNOWN>");
4902 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4903 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4904 "calling free_rbtdb(%s)", buf);
4905 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4911 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4912 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4913 dns_rbtnode_t *rbtnode = node;
4914 rdatasetheader_t *header;
4915 isc_boolean_t force_expire = ISC_FALSE;
4917 * These are the category and module used by the cache cleaner.
4919 isc_boolean_t log = ISC_FALSE;
4920 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4921 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4922 int level = ISC_LOG_DEBUG(2);
4923 char printname[DNS_NAME_FORMATSIZE];
4925 REQUIRE(VALID_RBTDB(rbtdb));
4928 * Caller must hold a tree lock.
4932 isc_stdtime_get(&now);
4934 if (rbtdb->overmem) {
4937 isc_random_get(&val);
4939 * XXXDCL Could stand to have a better policy, like LRU.
4941 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4944 * Note that 'log' can be true IFF rbtdb->overmem is also true.
4945 * rbtdb->overmem can currently only be true for cache
4946 * databases -- hence all of the "overmem cache" log strings.
4948 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4950 isc_log_write(dns_lctx, category, module, level,
4951 "overmem cache: %s %s",
4952 force_expire ? "FORCE" : "check",
4953 dns_rbt_formatnodename(rbtnode,
4955 sizeof(printname)));
4959 * We may not need write access, but this code path is not performance
4960 * sensitive, so it should be okay to always lock as a writer.
4962 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4963 isc_rwlocktype_write);
4965 for (header = rbtnode->data; header != NULL; header = header->next)
4966 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4968 * We don't check if refcurrent(rbtnode) == 0 and try
4969 * to free like we do in cache_find(), because
4970 * refcurrent(rbtnode) must be non-zero. This is so
4971 * because 'node' is an argument to the function.
4973 header->attributes |= RDATASET_ATTR_STALE;
4976 isc_log_write(dns_lctx, category, module,
4977 level, "overmem cache: stale %s",
4979 } else if (force_expire) {
4980 if (! RETAIN(header)) {
4981 set_ttl(rbtdb, header, 0);
4982 header->attributes |= RDATASET_ATTR_STALE;
4985 isc_log_write(dns_lctx, category, module,
4986 level, "overmem cache: "
4987 "reprieve by RETAIN() %s",
4990 } else if (rbtdb->overmem && log)
4991 isc_log_write(dns_lctx, category, module, level,
4992 "overmem cache: saved %s", printname);
4994 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4995 isc_rwlocktype_write);
4997 return (ISC_R_SUCCESS);
5001 overmem(dns_db_t *db, isc_boolean_t overmem) {
5002 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5004 if (IS_CACHE(rbtdb))
5005 rbtdb->overmem = overmem;
5009 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5010 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5011 dns_rbtnode_t *rbtnode = node;
5012 isc_boolean_t first;
5014 REQUIRE(VALID_RBTDB(rbtdb));
5016 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5017 isc_rwlocktype_read);
5019 fprintf(out, "node %p, %u references, locknum = %u\n",
5020 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5022 if (rbtnode->data != NULL) {
5023 rdatasetheader_t *current, *top_next;
5025 for (current = rbtnode->data; current != NULL;
5026 current = top_next) {
5027 top_next = current->next;
5029 fprintf(out, "\ttype %u", current->type);
5035 "\tserial = %lu, ttl = %u, "
5036 "trust = %u, attributes = %u, "
5038 (unsigned long)current->serial,
5041 current->attributes,
5043 current = current->down;
5044 } while (current != NULL);
5047 fprintf(out, "(empty)\n");
5049 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5050 isc_rwlocktype_read);
5054 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5056 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5057 rbtdb_dbiterator_t *rbtdbiter;
5059 REQUIRE(VALID_RBTDB(rbtdb));
5061 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5062 if (rbtdbiter == NULL)
5063 return (ISC_R_NOMEMORY);
5065 rbtdbiter->common.methods = &dbiterator_methods;
5066 rbtdbiter->common.db = NULL;
5067 dns_db_attach(db, &rbtdbiter->common.db);
5068 rbtdbiter->common.relative_names =
5069 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5070 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5071 rbtdbiter->common.cleaning = ISC_FALSE;
5072 rbtdbiter->paused = ISC_TRUE;
5073 rbtdbiter->tree_locked = isc_rwlocktype_none;
5074 rbtdbiter->result = ISC_R_SUCCESS;
5075 dns_fixedname_init(&rbtdbiter->name);
5076 dns_fixedname_init(&rbtdbiter->origin);
5077 rbtdbiter->node = NULL;
5078 rbtdbiter->delete = 0;
5079 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5080 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5081 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5082 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5083 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5084 if (rbtdbiter->nsec3only)
5085 rbtdbiter->current = &rbtdbiter->nsec3chain;
5087 rbtdbiter->current = &rbtdbiter->chain;
5089 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5091 return (ISC_R_SUCCESS);
5095 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5096 dns_rdatatype_t type, dns_rdatatype_t covers,
5097 isc_stdtime_t now, dns_rdataset_t *rdataset,
5098 dns_rdataset_t *sigrdataset)
5100 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5101 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5102 rdatasetheader_t *header, *header_next, *found, *foundsig;
5103 rbtdb_serial_t serial;
5104 rbtdb_version_t *rbtversion = version;
5105 isc_boolean_t close_version = ISC_FALSE;
5106 rbtdb_rdatatype_t matchtype, sigmatchtype;
5108 REQUIRE(VALID_RBTDB(rbtdb));
5109 REQUIRE(type != dns_rdatatype_any);
5111 if (rbtversion == NULL) {
5112 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5113 close_version = ISC_TRUE;
5115 serial = rbtversion->serial;
5118 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5119 isc_rwlocktype_read);
5123 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5125 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5129 for (header = rbtnode->data; header != NULL; header = header_next) {
5130 header_next = header->next;
5132 if (header->serial <= serial &&
5135 * Is this a "this rdataset doesn't
5138 if (NONEXISTENT(header))
5142 header = header->down;
5143 } while (header != NULL);
5144 if (header != NULL) {
5146 * We have an active, extant rdataset. If it's a
5147 * type we're looking for, remember it.
5149 if (header->type == matchtype) {
5151 if (foundsig != NULL)
5153 } else if (header->type == sigmatchtype) {
5160 if (found != NULL) {
5161 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5162 if (foundsig != NULL)
5163 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5167 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5168 isc_rwlocktype_read);
5171 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5175 return (ISC_R_NOTFOUND);
5177 return (ISC_R_SUCCESS);
5181 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5182 dns_rdatatype_t type, dns_rdatatype_t covers,
5183 isc_stdtime_t now, dns_rdataset_t *rdataset,
5184 dns_rdataset_t *sigrdataset)
5186 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5187 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5188 rdatasetheader_t *header, *header_next, *found, *foundsig;
5189 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5190 isc_result_t result;
5192 isc_rwlocktype_t locktype;
5194 REQUIRE(VALID_RBTDB(rbtdb));
5195 REQUIRE(type != dns_rdatatype_any);
5199 result = ISC_R_SUCCESS;
5202 isc_stdtime_get(&now);
5204 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5205 locktype = isc_rwlocktype_read;
5206 NODE_LOCK(lock, locktype);
5210 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5211 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5213 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5217 for (header = rbtnode->data; header != NULL; header = header_next) {
5218 header_next = header->next;
5219 if (header->rdh_ttl <= now) {
5220 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5221 (locktype == isc_rwlocktype_write ||
5222 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5224 * We update the node's status only when we
5225 * can get write access.
5227 locktype = isc_rwlocktype_write;
5230 * We don't check if refcurrent(rbtnode) == 0
5231 * and try to free like we do in cache_find(),
5232 * because refcurrent(rbtnode) must be
5233 * non-zero. This is so because 'node' is an
5234 * argument to the function.
5236 header->attributes |= RDATASET_ATTR_STALE;
5239 } else if (EXISTS(header)) {
5240 if (header->type == matchtype)
5242 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5243 header->type == negtype)
5245 else if (header->type == sigmatchtype)
5249 if (found != NULL) {
5250 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5251 if (!NEGATIVE(found) && foundsig != NULL)
5252 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5256 NODE_UNLOCK(lock, locktype);
5259 return (ISC_R_NOTFOUND);
5261 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5263 * We found a negative cache entry.
5265 if (NXDOMAIN(found))
5266 result = DNS_R_NCACHENXDOMAIN;
5268 result = DNS_R_NCACHENXRRSET;
5275 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5276 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5278 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5279 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5280 rbtdb_version_t *rbtversion = version;
5281 rbtdb_rdatasetiter_t *iterator;
5284 REQUIRE(VALID_RBTDB(rbtdb));
5286 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5287 if (iterator == NULL)
5288 return (ISC_R_NOMEMORY);
5290 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5292 if (rbtversion == NULL)
5294 (dns_dbversion_t **) (void *)(&rbtversion));
5298 isc_refcount_increment(&rbtversion->references,
5304 isc_stdtime_get(&now);
5308 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5309 iterator->common.methods = &rdatasetiter_methods;
5310 iterator->common.db = db;
5311 iterator->common.node = node;
5312 iterator->common.version = (dns_dbversion_t *)rbtversion;
5313 iterator->common.now = now;
5315 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5317 dns_rbtnode_refincrement(rbtnode, &refs);
5320 iterator->current = NULL;
5322 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5324 *iteratorp = (dns_rdatasetiter_t *)iterator;
5326 return (ISC_R_SUCCESS);
5329 static isc_boolean_t
5330 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5331 rdatasetheader_t *header, *header_next;
5332 isc_boolean_t cname, other_data;
5333 dns_rdatatype_t rdtype;
5336 * The caller must hold the node lock.
5340 * Look for CNAME and "other data" rdatasets active in our version.
5343 other_data = ISC_FALSE;
5344 for (header = node->data; header != NULL; header = header_next) {
5345 header_next = header->next;
5346 if (header->type == dns_rdatatype_cname) {
5348 * Look for an active extant CNAME.
5351 if (header->serial <= serial &&
5354 * Is this a "this rdataset doesn't
5357 if (NONEXISTENT(header))
5361 header = header->down;
5362 } while (header != NULL);
5367 * Look for active extant "other data".
5369 * "Other data" is any rdataset whose type is not
5370 * KEY, NSEC, SIG or RRSIG.
5372 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5373 if (rdtype != dns_rdatatype_key &&
5374 rdtype != dns_rdatatype_sig &&
5375 rdtype != dns_rdatatype_nsec &&
5376 rdtype != dns_rdatatype_rrsig) {
5378 * Is it active and extant?
5381 if (header->serial <= serial &&
5384 * Is this a "this rdataset
5385 * doesn't exist" record?
5387 if (NONEXISTENT(header))
5391 header = header->down;
5392 } while (header != NULL);
5394 other_data = ISC_TRUE;
5399 if (cname && other_data)
5406 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5407 isc_result_t result;
5409 INSIST(!IS_CACHE(rbtdb));
5410 INSIST(newheader->heap_index == 0);
5411 INSIST(!ISC_LINK_LINKED(newheader, link));
5413 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5418 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5419 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5420 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5422 rbtdb_changed_t *changed = NULL;
5423 rdatasetheader_t *topheader, *topheader_prev, *header;
5424 unsigned char *merged;
5425 isc_result_t result;
5426 isc_boolean_t header_nx;
5427 isc_boolean_t newheader_nx;
5428 isc_boolean_t merge;
5429 dns_rdatatype_t rdtype, covers;
5430 rbtdb_rdatatype_t negtype;
5435 * Add an rdatasetheader_t to a node.
5439 * Caller must be holding the node lock.
5442 if ((options & DNS_DBADD_MERGE) != 0) {
5443 REQUIRE(rbtversion != NULL);
5448 if ((options & DNS_DBADD_FORCE) != 0)
5449 trust = dns_trust_ultimate;
5451 trust = newheader->trust;
5453 if (rbtversion != NULL && !loading) {
5455 * We always add a changed record, even if no changes end up
5456 * being made to this node, because it's harmless and
5457 * simplifies the code.
5459 changed = add_changed(rbtdb, rbtversion, rbtnode);
5460 if (changed == NULL) {
5461 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5462 return (ISC_R_NOMEMORY);
5466 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5467 topheader_prev = NULL;
5470 if (rbtversion == NULL && !newheader_nx) {
5471 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5474 * We're adding a negative cache entry.
5476 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5477 if (covers == dns_rdatatype_any) {
5479 * We're adding an negative cache entry
5480 * which covers all types (NXDOMAIN,
5481 * NODATA(QTYPE=ANY)).
5483 * We make all other data stale so that the
5484 * only rdataset that can be found at this
5485 * node is the negative cache entry.
5487 for (topheader = rbtnode->data;
5489 topheader = topheader->next) {
5490 set_ttl(rbtdb, topheader, 0);
5491 topheader->attributes |=
5492 RDATASET_ATTR_STALE;
5497 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5500 * We're adding something that isn't a
5501 * negative cache entry. Look for an extant
5502 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5505 for (topheader = rbtnode->data;
5507 topheader = topheader->next) {
5508 if (topheader->type ==
5509 RBTDB_RDATATYPE_NCACHEANY)
5512 if (topheader != NULL && EXISTS(topheader) &&
5513 topheader->rdh_ttl > now) {
5517 if (trust < topheader->trust) {
5519 * The NXDOMAIN/NODATA(QTYPE=ANY)
5522 free_rdataset(rbtdb,
5525 if (addedrdataset != NULL)
5526 bind_rdataset(rbtdb, rbtnode,
5529 return (DNS_R_UNCHANGED);
5532 * The new rdataset is better. Expire the
5533 * NXDOMAIN/NODATA(QTYPE=ANY).
5535 set_ttl(rbtdb, topheader, 0);
5536 topheader->attributes |= RDATASET_ATTR_STALE;
5541 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5545 for (topheader = rbtnode->data;
5547 topheader = topheader->next) {
5548 if (topheader->type == newheader->type ||
5549 topheader->type == negtype)
5551 topheader_prev = topheader;
5556 * If header isn't NULL, we've found the right type. There may be
5557 * IGNORE rdatasets between the top of the chain and the first real
5558 * data. We skip over them.
5561 while (header != NULL && IGNORE(header))
5562 header = header->down;
5563 if (header != NULL) {
5564 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5567 * Deleting an already non-existent rdataset has no effect.
5569 if (header_nx && newheader_nx) {
5570 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5571 return (DNS_R_UNCHANGED);
5575 * Trying to add an rdataset with lower trust to a cache DB
5576 * has no effect, provided that the cache data isn't stale.
5578 if (rbtversion == NULL && trust < header->trust &&
5579 (header->rdh_ttl > now || header_nx)) {
5580 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5581 if (addedrdataset != NULL)
5582 bind_rdataset(rbtdb, rbtnode, header, now,
5584 return (DNS_R_UNCHANGED);
5588 * Don't merge if a nonexistent rdataset is involved.
5590 if (merge && (header_nx || newheader_nx))
5594 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5595 * that is the union of 'newheader' and 'header'.
5598 unsigned int flags = 0;
5599 INSIST(rbtversion->serial >= header->serial);
5601 result = ISC_R_SUCCESS;
5603 if ((options & DNS_DBADD_EXACT) != 0)
5604 flags |= DNS_RDATASLAB_EXACT;
5605 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5606 newheader->rdh_ttl != header->rdh_ttl)
5607 result = DNS_R_NOTEXACT;
5608 else if (newheader->rdh_ttl != header->rdh_ttl)
5609 flags |= DNS_RDATASLAB_FORCE;
5610 if (result == ISC_R_SUCCESS)
5611 result = dns_rdataslab_merge(
5612 (unsigned char *)header,
5613 (unsigned char *)newheader,
5614 (unsigned int)(sizeof(*newheader)),
5616 rbtdb->common.rdclass,
5617 (dns_rdatatype_t)header->type,
5619 if (result == ISC_R_SUCCESS) {
5621 * If 'header' has the same serial number as
5622 * we do, we could clean it up now if we knew
5623 * that our caller had no references to it.
5624 * We don't know this, however, so we leave it
5625 * alone. It will get cleaned up when
5626 * clean_zone_node() runs.
5628 free_rdataset(rbtdb, rbtdb->common.mctx,
5630 newheader = (rdatasetheader_t *)merged;
5631 if (loading && RESIGN(newheader) &&
5633 header->resign < newheader->resign)
5634 newheader->resign = header->resign;
5636 free_rdataset(rbtdb, rbtdb->common.mctx,
5642 * Don't replace existing NS, A and AAAA RRsets
5643 * in the cache if they are already exist. This
5644 * prevents named being locked to old servers.
5645 * Don't lower trust of existing record if the
5648 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5649 header->type == dns_rdatatype_ns &&
5650 !header_nx && !newheader_nx &&
5651 header->trust >= newheader->trust &&
5652 dns_rdataslab_equalx((unsigned char *)header,
5653 (unsigned char *)newheader,
5654 (unsigned int)(sizeof(*newheader)),
5655 rbtdb->common.rdclass,
5656 (dns_rdatatype_t)header->type)) {
5658 * Honour the new ttl if it is less than the
5661 if (header->rdh_ttl > newheader->rdh_ttl)
5662 set_ttl(rbtdb, header, newheader->rdh_ttl);
5663 if (header->noqname == NULL &&
5664 newheader->noqname != NULL) {
5665 header->noqname = newheader->noqname;
5666 newheader->noqname = NULL;
5668 if (header->closest == NULL &&
5669 newheader->closest != NULL) {
5670 header->closest = newheader->closest;
5671 newheader->closest = NULL;
5673 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5674 if (addedrdataset != NULL)
5675 bind_rdataset(rbtdb, rbtnode, header, now,
5677 return (ISC_R_SUCCESS);
5679 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5680 (header->type == dns_rdatatype_a ||
5681 header->type == dns_rdatatype_aaaa) &&
5682 !header_nx && !newheader_nx &&
5683 header->trust >= newheader->trust &&
5684 dns_rdataslab_equal((unsigned char *)header,
5685 (unsigned char *)newheader,
5686 (unsigned int)(sizeof(*newheader)))) {
5688 * Honour the new ttl if it is less than the
5691 if (header->rdh_ttl > newheader->rdh_ttl)
5692 set_ttl(rbtdb, header, newheader->rdh_ttl);
5693 if (header->noqname == NULL &&
5694 newheader->noqname != NULL) {
5695 header->noqname = newheader->noqname;
5696 newheader->noqname = NULL;
5698 if (header->closest == NULL &&
5699 newheader->closest != NULL) {
5700 header->closest = newheader->closest;
5701 newheader->closest = NULL;
5703 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5704 if (addedrdataset != NULL)
5705 bind_rdataset(rbtdb, rbtnode, header, now,
5707 return (ISC_R_SUCCESS);
5709 INSIST(rbtversion == NULL ||
5710 rbtversion->serial >= topheader->serial);
5711 if (topheader_prev != NULL)
5712 topheader_prev->next = newheader;
5714 rbtnode->data = newheader;
5715 newheader->next = topheader->next;
5718 * There are no other references to 'header' when
5719 * loading, so we MAY clean up 'header' now.
5720 * Since we don't generate changed records when
5721 * loading, we MUST clean up 'header' now.
5723 newheader->down = NULL;
5724 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5726 newheader->down = topheader;
5727 topheader->next = newheader;
5729 if (changed != NULL)
5730 changed->dirty = ISC_TRUE;
5731 if (rbtversion == NULL) {
5732 set_ttl(rbtdb, header, 0);
5733 header->attributes |= RDATASET_ATTR_STALE;
5735 idx = newheader->node->locknum;
5736 if (IS_CACHE(rbtdb)) {
5737 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5740 * XXXMLG We don't check the return value
5741 * here. If it fails, we will not do TTL
5742 * based expiry on this node. However, we
5743 * will do it on the LRU side, so memory
5744 * will not leak... for long.
5746 isc_heap_insert(rbtdb->heaps[idx], newheader);
5747 } else if (RESIGN(newheader))
5748 resign_insert(rbtdb, idx, newheader);
5752 * No non-IGNORED rdatasets of the given type exist at
5757 * If we're trying to delete the type, don't bother.
5760 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5761 return (DNS_R_UNCHANGED);
5764 if (topheader != NULL) {
5766 * We have an list of rdatasets of the given type,
5767 * but they're all marked IGNORE. We simply insert
5768 * the new rdataset at the head of the list.
5770 * Ignored rdatasets cannot occur during loading, so
5774 INSIST(rbtversion == NULL ||
5775 rbtversion->serial >= topheader->serial);
5776 if (topheader_prev != NULL)
5777 topheader_prev->next = newheader;
5779 rbtnode->data = newheader;
5780 newheader->next = topheader->next;
5781 newheader->down = topheader;
5782 topheader->next = newheader;
5784 if (changed != NULL)
5785 changed->dirty = ISC_TRUE;
5788 * No rdatasets of the given type exist at the node.
5790 newheader->next = rbtnode->data;
5791 newheader->down = NULL;
5792 rbtnode->data = newheader;
5794 idx = newheader->node->locknum;
5795 if (IS_CACHE(rbtdb)) {
5796 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5798 isc_heap_insert(rbtdb->heaps[idx], newheader);
5799 } else if (RESIGN(newheader)) {
5800 resign_insert(rbtdb, idx, newheader);
5805 * Check if the node now contains CNAME and other data.
5807 if (rbtversion != NULL &&
5808 cname_and_other_data(rbtnode, rbtversion->serial))
5809 return (DNS_R_CNAMEANDOTHER);
5811 if (addedrdataset != NULL)
5812 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5814 return (ISC_R_SUCCESS);
5817 static inline isc_boolean_t
5818 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5819 rbtdb_rdatatype_t type)
5821 if (IS_CACHE(rbtdb)) {
5822 if (type == dns_rdatatype_dname)
5826 } else if (type == dns_rdatatype_dname ||
5827 (type == dns_rdatatype_ns &&
5828 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5833 static inline isc_result_t
5834 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5835 dns_rdataset_t *rdataset)
5837 struct noqname *noqname;
5838 isc_mem_t *mctx = rbtdb->common.mctx;
5840 dns_rdataset_t neg, negsig;
5841 isc_result_t result;
5844 dns_name_init(&name, NULL);
5845 dns_rdataset_init(&neg);
5846 dns_rdataset_init(&negsig);
5848 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5849 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5851 noqname = isc_mem_get(mctx, sizeof(*noqname));
5852 if (noqname == NULL) {
5853 result = ISC_R_NOMEMORY;
5856 dns_name_init(&noqname->name, NULL);
5857 noqname->neg = NULL;
5858 noqname->negsig = NULL;
5859 noqname->type = neg.type;
5860 result = dns_name_dup(&name, mctx, &noqname->name);
5861 if (result != ISC_R_SUCCESS)
5863 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5864 if (result != ISC_R_SUCCESS)
5866 noqname->neg = r.base;
5867 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5868 if (result != ISC_R_SUCCESS)
5870 noqname->negsig = r.base;
5871 dns_rdataset_disassociate(&neg);
5872 dns_rdataset_disassociate(&negsig);
5873 newheader->noqname = noqname;
5874 return (ISC_R_SUCCESS);
5877 dns_rdataset_disassociate(&neg);
5878 dns_rdataset_disassociate(&negsig);
5879 free_noqname(mctx, &noqname);
5883 static inline isc_result_t
5884 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5885 dns_rdataset_t *rdataset)
5887 struct noqname *closest;
5888 isc_mem_t *mctx = rbtdb->common.mctx;
5890 dns_rdataset_t neg, negsig;
5891 isc_result_t result;
5894 dns_name_init(&name, NULL);
5895 dns_rdataset_init(&neg);
5896 dns_rdataset_init(&negsig);
5898 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5899 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5901 closest = isc_mem_get(mctx, sizeof(*closest));
5902 if (closest == NULL) {
5903 result = ISC_R_NOMEMORY;
5906 dns_name_init(&closest->name, NULL);
5907 closest->neg = NULL;
5908 closest->negsig = NULL;
5909 closest->type = neg.type;
5910 result = dns_name_dup(&name, mctx, &closest->name);
5911 if (result != ISC_R_SUCCESS)
5913 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5914 if (result != ISC_R_SUCCESS)
5916 closest->neg = r.base;
5917 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5918 if (result != ISC_R_SUCCESS)
5920 closest->negsig = r.base;
5921 dns_rdataset_disassociate(&neg);
5922 dns_rdataset_disassociate(&negsig);
5923 newheader->closest = closest;
5924 return (ISC_R_SUCCESS);
5927 dns_rdataset_disassociate(&neg);
5928 dns_rdataset_disassociate(&negsig);
5929 free_noqname(mctx, &closest);
5933 static dns_dbmethods_t zone_methods;
5936 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5937 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5938 dns_rdataset_t *addedrdataset)
5940 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5941 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5942 rbtdb_version_t *rbtversion = version;
5943 isc_region_t region;
5944 rdatasetheader_t *newheader;
5945 rdatasetheader_t *header;
5946 isc_result_t result;
5947 isc_boolean_t delegating;
5948 isc_boolean_t tree_locked = ISC_FALSE;
5950 REQUIRE(VALID_RBTDB(rbtdb));
5952 if (rbtdb->common.methods == &zone_methods)
5953 REQUIRE(((rbtnode->nsec3 &&
5954 (rdataset->type == dns_rdatatype_nsec3 ||
5955 rdataset->covers == dns_rdatatype_nsec3)) ||
5957 rdataset->type != dns_rdatatype_nsec3 &&
5958 rdataset->covers != dns_rdatatype_nsec3)));
5960 if (rbtversion == NULL) {
5962 isc_stdtime_get(&now);
5966 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5968 sizeof(rdatasetheader_t));
5969 if (result != ISC_R_SUCCESS)
5972 newheader = (rdatasetheader_t *)region.base;
5973 init_rdataset(rbtdb, newheader);
5974 set_ttl(rbtdb, newheader, rdataset->ttl + now);
5975 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5977 newheader->attributes = 0;
5978 newheader->noqname = NULL;
5979 newheader->closest = NULL;
5980 newheader->count = init_count++;
5981 newheader->trust = rdataset->trust;
5982 newheader->additional_auth = NULL;
5983 newheader->additional_glue = NULL;
5984 newheader->last_used = now;
5985 newheader->node = rbtnode;
5986 if (rbtversion != NULL) {
5987 newheader->serial = rbtversion->serial;
5990 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5991 newheader->attributes |= RDATASET_ATTR_RESIGN;
5992 newheader->resign = rdataset->resign;
5994 newheader->resign = 0;
5996 newheader->serial = 1;
5997 newheader->resign = 0;
5998 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5999 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6000 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6001 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6002 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6003 result = addnoqname(rbtdb, newheader, rdataset);
6004 if (result != ISC_R_SUCCESS) {
6005 free_rdataset(rbtdb, rbtdb->common.mctx,
6010 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6011 result = addclosest(rbtdb, newheader, rdataset);
6012 if (result != ISC_R_SUCCESS) {
6013 free_rdataset(rbtdb, rbtdb->common.mctx,
6021 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6022 * just DNAME for the cache), then we need to set the callback bit
6025 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6026 delegating = ISC_TRUE;
6028 delegating = ISC_FALSE;
6031 * If we're adding a delegation type or the DB is a cache in an overmem
6032 * state, hold an exclusive lock on the tree. In the latter case
6033 * the lock does not necessarily have to be acquired but it will help
6034 * purge stale entries more effectively.
6036 if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6037 tree_locked = ISC_TRUE;
6038 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6041 if (IS_CACHE(rbtdb) && rbtdb->overmem)
6042 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6044 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6045 isc_rwlocktype_write);
6047 if (rbtdb->rrsetstats != NULL) {
6048 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6049 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6052 if (IS_CACHE(rbtdb)) {
6054 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6056 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6057 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6058 expire_header(rbtdb, header, tree_locked);
6061 * If we've been holding a write lock on the tree just for
6062 * cleaning, we can release it now. However, we still need the
6065 if (tree_locked && !delegating) {
6066 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6067 tree_locked = ISC_FALSE;
6071 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6072 addedrdataset, now);
6073 if (result == ISC_R_SUCCESS && delegating)
6074 rbtnode->find_callback = 1;
6076 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6077 isc_rwlocktype_write);
6080 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6083 * Update the zone's secure status. If version is non-NULL
6084 * this is deferred until closeversion() is called.
6086 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6087 iszonesecure(db, version, rbtdb->origin_node);
6093 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6094 dns_rdataset_t *rdataset, unsigned int options,
6095 dns_rdataset_t *newrdataset)
6097 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6098 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6099 rbtdb_version_t *rbtversion = version;
6100 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6101 unsigned char *subresult;
6102 isc_region_t region;
6103 isc_result_t result;
6104 rbtdb_changed_t *changed;
6106 REQUIRE(VALID_RBTDB(rbtdb));
6108 if (rbtdb->common.methods == &zone_methods)
6109 REQUIRE(((rbtnode->nsec3 &&
6110 (rdataset->type == dns_rdatatype_nsec3 ||
6111 rdataset->covers == dns_rdatatype_nsec3)) ||
6113 rdataset->type != dns_rdatatype_nsec3 &&
6114 rdataset->covers != dns_rdatatype_nsec3)));
6116 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6118 sizeof(rdatasetheader_t));
6119 if (result != ISC_R_SUCCESS)
6121 newheader = (rdatasetheader_t *)region.base;
6122 init_rdataset(rbtdb, newheader);
6123 set_ttl(rbtdb, newheader, rdataset->ttl);
6124 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6126 newheader->attributes = 0;
6127 newheader->serial = rbtversion->serial;
6128 newheader->trust = 0;
6129 newheader->noqname = NULL;
6130 newheader->closest = NULL;
6131 newheader->count = init_count++;
6132 newheader->additional_auth = NULL;
6133 newheader->additional_glue = NULL;
6134 newheader->last_used = 0;
6135 newheader->node = rbtnode;
6136 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6137 newheader->attributes |= RDATASET_ATTR_RESIGN;
6138 newheader->resign = rdataset->resign;
6140 newheader->resign = 0;
6142 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6143 isc_rwlocktype_write);
6145 changed = add_changed(rbtdb, rbtversion, rbtnode);
6146 if (changed == NULL) {
6147 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6148 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6149 isc_rwlocktype_write);
6150 return (ISC_R_NOMEMORY);
6153 topheader_prev = NULL;
6154 for (topheader = rbtnode->data;
6156 topheader = topheader->next) {
6157 if (topheader->type == newheader->type)
6159 topheader_prev = topheader;
6162 * If header isn't NULL, we've found the right type. There may be
6163 * IGNORE rdatasets between the top of the chain and the first real
6164 * data. We skip over them.
6167 while (header != NULL && IGNORE(header))
6168 header = header->down;
6169 if (header != NULL && EXISTS(header)) {
6170 unsigned int flags = 0;
6172 result = ISC_R_SUCCESS;
6173 if ((options & DNS_DBSUB_EXACT) != 0) {
6174 flags |= DNS_RDATASLAB_EXACT;
6175 if (newheader->rdh_ttl != header->rdh_ttl)
6176 result = DNS_R_NOTEXACT;
6178 if (result == ISC_R_SUCCESS)
6179 result = dns_rdataslab_subtract(
6180 (unsigned char *)header,
6181 (unsigned char *)newheader,
6182 (unsigned int)(sizeof(*newheader)),
6184 rbtdb->common.rdclass,
6185 (dns_rdatatype_t)header->type,
6187 if (result == ISC_R_SUCCESS) {
6188 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6189 newheader = (rdatasetheader_t *)subresult;
6190 init_rdataset(rbtdb, newheader);
6192 * We have to set the serial since the rdataslab
6193 * subtraction routine copies the reserved portion of
6194 * header, not newheader.
6196 newheader->serial = rbtversion->serial;
6198 * XXXJT: dns_rdataslab_subtract() copied the pointers
6199 * to additional info. We need to clear these fields
6200 * to avoid having duplicated references.
6202 newheader->additional_auth = NULL;
6203 newheader->additional_glue = NULL;
6204 } else if (result == DNS_R_NXRRSET) {
6206 * This subtraction would remove all of the rdata;
6207 * add a nonexistent header instead.
6209 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6210 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6211 if (newheader == NULL) {
6212 result = ISC_R_NOMEMORY;
6215 set_ttl(rbtdb, newheader, 0);
6216 newheader->type = topheader->type;
6217 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6218 newheader->trust = 0;
6219 newheader->serial = rbtversion->serial;
6220 newheader->noqname = NULL;
6221 newheader->closest = NULL;
6222 newheader->count = 0;
6223 newheader->additional_auth = NULL;
6224 newheader->additional_glue = NULL;
6225 newheader->node = rbtnode;
6226 newheader->resign = 0;
6227 newheader->last_used = 0;
6229 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6234 * If we're here, we want to link newheader in front of
6237 INSIST(rbtversion->serial >= topheader->serial);
6238 if (topheader_prev != NULL)
6239 topheader_prev->next = newheader;
6241 rbtnode->data = newheader;
6242 newheader->next = topheader->next;
6243 newheader->down = topheader;
6244 topheader->next = newheader;
6246 changed->dirty = ISC_TRUE;
6249 * The rdataset doesn't exist, so we don't need to do anything
6250 * to satisfy the deletion request.
6252 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6253 if ((options & DNS_DBSUB_EXACT) != 0)
6254 result = DNS_R_NOTEXACT;
6256 result = DNS_R_UNCHANGED;
6259 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6260 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6263 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6264 isc_rwlocktype_write);
6267 * Update the zone's secure status. If version is non-NULL
6268 * this is deferred until closeversion() is called.
6270 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6271 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6277 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6278 dns_rdatatype_t type, dns_rdatatype_t covers)
6280 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6281 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6282 rbtdb_version_t *rbtversion = version;
6283 isc_result_t result;
6284 rdatasetheader_t *newheader;
6286 REQUIRE(VALID_RBTDB(rbtdb));
6288 if (type == dns_rdatatype_any)
6289 return (ISC_R_NOTIMPLEMENTED);
6290 if (type == dns_rdatatype_rrsig && covers == 0)
6291 return (ISC_R_NOTIMPLEMENTED);
6293 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6294 if (newheader == NULL)
6295 return (ISC_R_NOMEMORY);
6296 set_ttl(rbtdb, newheader, 0);
6297 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6298 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6299 newheader->trust = 0;
6300 newheader->noqname = NULL;
6301 newheader->closest = NULL;
6302 newheader->additional_auth = NULL;
6303 newheader->additional_glue = NULL;
6304 if (rbtversion != NULL)
6305 newheader->serial = rbtversion->serial;
6307 newheader->serial = 0;
6308 newheader->count = 0;
6309 newheader->last_used = 0;
6310 newheader->node = rbtnode;
6312 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6313 isc_rwlocktype_write);
6315 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6316 ISC_FALSE, NULL, 0);
6318 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6319 isc_rwlocktype_write);
6322 * Update the zone's secure status. If version is non-NULL
6323 * this is deferred until closeversion() is called.
6325 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6326 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6332 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6333 rbtdb_load_t *loadctx = arg;
6334 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6335 dns_rbtnode_t *node;
6336 isc_result_t result;
6337 isc_region_t region;
6338 rdatasetheader_t *newheader;
6341 * This routine does no node locking. See comments in
6342 * 'load' below for more information on loading and
6348 * SOA records are only allowed at top of zone.
6350 if (rdataset->type == dns_rdatatype_soa &&
6351 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6352 return (DNS_R_NOTZONETOP);
6354 if (rdataset->type != dns_rdatatype_nsec3 &&
6355 rdataset->covers != dns_rdatatype_nsec3)
6356 add_empty_wildcards(rbtdb, name);
6358 if (dns_name_iswildcard(name)) {
6360 * NS record owners cannot legally be wild cards.
6362 if (rdataset->type == dns_rdatatype_ns)
6363 return (DNS_R_INVALIDNS);
6365 * NSEC3 record owners cannot legally be wild cards.
6367 if (rdataset->type == dns_rdatatype_nsec3)
6368 return (DNS_R_INVALIDNSEC3);
6369 result = add_wildcard_magic(rbtdb, name);
6370 if (result != ISC_R_SUCCESS)
6375 if (rdataset->type == dns_rdatatype_nsec3 ||
6376 rdataset->covers == dns_rdatatype_nsec3) {
6377 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6378 if (result == ISC_R_SUCCESS)
6381 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6382 if (result == ISC_R_SUCCESS)
6385 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6387 if (result != ISC_R_EXISTS) {
6388 dns_name_t foundname;
6389 dns_name_init(&foundname, NULL);
6390 dns_rbt_namefromnode(node, &foundname);
6391 #ifdef DNS_RBT_USEHASH
6392 node->locknum = node->hashval % rbtdb->node_lock_count;
6394 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6395 rbtdb->node_lock_count;
6399 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6401 sizeof(rdatasetheader_t));
6402 if (result != ISC_R_SUCCESS)
6404 newheader = (rdatasetheader_t *)region.base;
6405 init_rdataset(rbtdb, newheader);
6406 set_ttl(rbtdb, newheader,
6407 rdataset->ttl + loadctx->now); /* XXX overflow check */
6408 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6410 newheader->attributes = 0;
6411 newheader->trust = rdataset->trust;
6412 newheader->serial = 1;
6413 newheader->noqname = NULL;
6414 newheader->closest = NULL;
6415 newheader->count = init_count++;
6416 newheader->additional_auth = NULL;
6417 newheader->additional_glue = NULL;
6418 newheader->last_used = 0;
6419 newheader->node = node;
6420 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6421 newheader->attributes |= RDATASET_ATTR_RESIGN;
6422 newheader->resign = rdataset->resign;
6424 newheader->resign = 0;
6426 result = add(rbtdb, node, rbtdb->current_version, newheader,
6427 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6428 if (result == ISC_R_SUCCESS &&
6429 delegating_type(rbtdb, node, rdataset->type))
6430 node->find_callback = 1;
6431 else if (result == DNS_R_UNCHANGED)
6432 result = ISC_R_SUCCESS;
6438 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6439 rbtdb_load_t *loadctx;
6442 rbtdb = (dns_rbtdb_t *)db;
6444 REQUIRE(VALID_RBTDB(rbtdb));
6446 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6447 if (loadctx == NULL)
6448 return (ISC_R_NOMEMORY);
6450 loadctx->rbtdb = rbtdb;
6451 if (IS_CACHE(rbtdb))
6452 isc_stdtime_get(&loadctx->now);
6456 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6458 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6460 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6462 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6464 *addp = loading_addrdataset;
6467 return (ISC_R_SUCCESS);
6471 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6472 rbtdb_load_t *loadctx;
6473 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6475 REQUIRE(VALID_RBTDB(rbtdb));
6476 REQUIRE(dbloadp != NULL);
6478 REQUIRE(loadctx->rbtdb == rbtdb);
6480 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6482 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6483 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6485 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6486 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6488 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6491 * If there's a KEY rdataset at the zone origin containing a
6492 * zone key, we consider the zone secure.
6494 if (! IS_CACHE(rbtdb))
6495 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6499 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6501 return (ISC_R_SUCCESS);
6505 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6506 dns_masterformat_t masterformat) {
6509 rbtdb = (dns_rbtdb_t *)db;
6511 REQUIRE(VALID_RBTDB(rbtdb));
6513 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6514 &dns_master_style_default,
6515 filename, masterformat));
6519 delete_callback(void *data, void *arg) {
6520 dns_rbtdb_t *rbtdb = arg;
6521 rdatasetheader_t *current, *next;
6522 unsigned int locknum;
6525 locknum = current->node->locknum;
6526 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6527 while (current != NULL) {
6528 next = current->next;
6529 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6532 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6535 static isc_boolean_t
6536 issecure(dns_db_t *db) {
6538 isc_boolean_t secure;
6540 rbtdb = (dns_rbtdb_t *)db;
6542 REQUIRE(VALID_RBTDB(rbtdb));
6544 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6545 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6546 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6551 static isc_boolean_t
6552 isdnssec(dns_db_t *db) {
6554 isc_boolean_t dnssec;
6556 rbtdb = (dns_rbtdb_t *)db;
6558 REQUIRE(VALID_RBTDB(rbtdb));
6560 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6561 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6562 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6568 nodecount(dns_db_t *db) {
6572 rbtdb = (dns_rbtdb_t *)db;
6574 REQUIRE(VALID_RBTDB(rbtdb));
6576 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6577 count = dns_rbt_nodecount(rbtdb->tree);
6578 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6584 settask(dns_db_t *db, isc_task_t *task) {
6587 rbtdb = (dns_rbtdb_t *)db;
6589 REQUIRE(VALID_RBTDB(rbtdb));
6591 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6592 if (rbtdb->task != NULL)
6593 isc_task_detach(&rbtdb->task);
6595 isc_task_attach(task, &rbtdb->task);
6596 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6599 static isc_boolean_t
6600 ispersistent(dns_db_t *db) {
6606 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6607 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6608 dns_rbtnode_t *onode;
6609 isc_result_t result = ISC_R_SUCCESS;
6611 REQUIRE(VALID_RBTDB(rbtdb));
6612 REQUIRE(nodep != NULL && *nodep == NULL);
6614 /* Note that the access to origin_node doesn't require a DB lock */
6615 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6616 if (onode != NULL) {
6617 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6618 new_reference(rbtdb, onode);
6619 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6621 *nodep = rbtdb->origin_node;
6623 INSIST(IS_CACHE(rbtdb));
6624 result = ISC_R_NOTFOUND;
6631 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6632 isc_uint8_t *flags, isc_uint16_t *iterations,
6633 unsigned char *salt, size_t *salt_length)
6636 isc_result_t result = ISC_R_NOTFOUND;
6637 rbtdb_version_t *rbtversion = version;
6639 rbtdb = (dns_rbtdb_t *)db;
6641 REQUIRE(VALID_RBTDB(rbtdb));
6643 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6645 if (rbtversion == NULL)
6646 rbtversion = rbtdb->current_version;
6648 if (rbtversion->havensec3) {
6650 *hash = rbtversion->hash;
6651 if (salt != NULL && salt_length != NULL) {
6652 REQUIRE(*salt_length >= rbtversion->salt_length);
6653 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6655 if (salt_length != NULL)
6656 *salt_length = rbtversion->salt_length;
6657 if (iterations != NULL)
6658 *iterations = rbtversion->iterations;
6660 *flags = rbtversion->flags;
6661 result = ISC_R_SUCCESS;
6663 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6669 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6670 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6671 isc_stdtime_t oldresign;
6672 isc_result_t result = ISC_R_SUCCESS;
6673 rdatasetheader_t *header;
6675 REQUIRE(VALID_RBTDB(rbtdb));
6676 REQUIRE(!IS_CACHE(rbtdb));
6677 REQUIRE(rdataset != NULL);
6679 header = rdataset->private3;
6682 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6683 isc_rwlocktype_write);
6685 oldresign = header->resign;
6686 header->resign = resign;
6687 if (header->heap_index != 0) {
6688 INSIST(RESIGN(header));
6690 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6691 header->heap_index);
6692 header->heap_index = 0;
6693 } else if (resign < oldresign)
6694 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6695 header->heap_index);
6697 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6698 header->heap_index);
6699 } else if (resign && header->heap_index == 0) {
6700 header->attributes |= RDATASET_ATTR_RESIGN;
6701 result = resign_insert(rbtdb, header->node->locknum, header);
6703 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6704 isc_rwlocktype_write);
6709 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6710 dns_name_t *foundname)
6712 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6713 rdatasetheader_t *header = NULL, *this;
6715 isc_result_t result = ISC_R_NOTFOUND;
6716 unsigned int locknum;
6718 REQUIRE(VALID_RBTDB(rbtdb));
6720 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6722 for (i = 0; i < rbtdb->node_lock_count; i++) {
6723 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6724 this = isc_heap_element(rbtdb->heaps[i], 1);
6726 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6727 isc_rwlocktype_read);
6732 else if (isc_serial_lt(this->resign, header->resign)) {
6733 locknum = header->node->locknum;
6734 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6735 isc_rwlocktype_read);
6738 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6739 isc_rwlocktype_read);
6745 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6747 if (foundname != NULL)
6748 dns_rbt_fullnamefromnode(header->node, foundname);
6750 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6751 isc_rwlocktype_read);
6753 result = ISC_R_SUCCESS;
6756 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6762 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6764 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6765 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6766 dns_rbtnode_t *node;
6767 rdatasetheader_t *header;
6769 REQUIRE(VALID_RBTDB(rbtdb));
6770 REQUIRE(rdataset != NULL);
6771 REQUIRE(rbtdb->future_version == rbtversion);
6772 REQUIRE(rbtversion->writer);
6774 node = rdataset->private2;
6775 header = rdataset->private3;
6778 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6779 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6780 isc_rwlocktype_write);
6782 * Delete from heap and save to re-signed list so that it can
6783 * be restored if we backout of this change.
6785 new_reference(rbtdb, node);
6786 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6787 header->heap_index = 0;
6788 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6790 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6791 isc_rwlocktype_write);
6792 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6795 static dns_stats_t *
6796 getrrsetstats(dns_db_t *db) {
6797 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6799 REQUIRE(VALID_RBTDB(rbtdb));
6800 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6802 return (rbtdb->rrsetstats);
6805 static dns_dbmethods_t zone_methods = {
6844 static dns_dbmethods_t cache_methods = {
6884 #ifdef DNS_RBTDB_VERSION64
6889 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6890 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6891 void *driverarg, dns_db_t **dbp)
6894 isc_result_t result;
6897 isc_boolean_t (*sooner)(void *, void *);
6899 /* Keep the compiler happy. */
6904 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6906 return (ISC_R_NOMEMORY);
6908 memset(rbtdb, '\0', sizeof(*rbtdb));
6909 dns_name_init(&rbtdb->common.origin, NULL);
6910 rbtdb->common.attributes = 0;
6911 if (type == dns_dbtype_cache) {
6912 rbtdb->common.methods = &cache_methods;
6913 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6914 } else if (type == dns_dbtype_stub) {
6915 rbtdb->common.methods = &zone_methods;
6916 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6918 rbtdb->common.methods = &zone_methods;
6919 rbtdb->common.rdclass = rdclass;
6920 rbtdb->common.mctx = NULL;
6922 result = RBTDB_INITLOCK(&rbtdb->lock);
6923 if (result != ISC_R_SUCCESS)
6926 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6927 if (result != ISC_R_SUCCESS)
6931 * Initialize node_lock_count in a generic way to support future
6932 * extension which allows the user to specify this value on creation.
6933 * Note that when specified for a cache DB it must be larger than 1
6934 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6936 if (rbtdb->node_lock_count == 0) {
6937 if (IS_CACHE(rbtdb))
6938 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6940 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6941 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6942 result = ISC_R_RANGE;
6943 goto cleanup_tree_lock;
6945 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6946 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6947 sizeof(rbtdb_nodelock_t));
6948 if (rbtdb->node_locks == NULL) {
6949 result = ISC_R_NOMEMORY;
6950 goto cleanup_tree_lock;
6953 rbtdb->rrsetstats = NULL;
6954 if (IS_CACHE(rbtdb)) {
6955 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6956 if (result != ISC_R_SUCCESS)
6957 goto cleanup_node_locks;
6958 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6959 sizeof(rdatasetheaderlist_t));
6960 if (rbtdb->rdatasets == NULL) {
6961 result = ISC_R_NOMEMORY;
6962 goto cleanup_rrsetstats;
6964 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6965 ISC_LIST_INIT(rbtdb->rdatasets[i]);
6967 rbtdb->rdatasets = NULL;
6972 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6973 sizeof(isc_heap_t *));
6974 if (rbtdb->heaps == NULL) {
6975 result = ISC_R_NOMEMORY;
6976 goto cleanup_rdatasets;
6978 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6979 rbtdb->heaps[i] = NULL;
6980 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6981 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6982 result = isc_heap_create(mctx, sooner, set_index, 0,
6984 if (result != ISC_R_SUCCESS)
6989 * Create deadnode lists.
6991 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6992 sizeof(rbtnodelist_t));
6993 if (rbtdb->deadnodes == NULL) {
6994 result = ISC_R_NOMEMORY;
6997 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6998 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7000 rbtdb->active = rbtdb->node_lock_count;
7002 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7003 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7004 if (result == ISC_R_SUCCESS) {
7005 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7006 if (result != ISC_R_SUCCESS)
7007 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7009 if (result != ISC_R_SUCCESS) {
7011 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7012 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7013 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7015 goto cleanup_deadnodes;
7017 rbtdb->node_locks[i].exiting = ISC_FALSE;
7021 * Attach to the mctx. The database will persist so long as there
7022 * are references to it, and attaching to the mctx ensures that our
7023 * mctx won't disappear out from under us.
7025 isc_mem_attach(mctx, &rbtdb->common.mctx);
7028 * Must be initialized before free_rbtdb() is called.
7030 isc_ondestroy_init(&rbtdb->common.ondest);
7033 * Make a copy of the origin name.
7035 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7036 if (result != ISC_R_SUCCESS) {
7037 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7042 * Make the Red-Black Trees.
7044 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7045 if (result != ISC_R_SUCCESS) {
7046 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7050 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7051 if (result != ISC_R_SUCCESS) {
7052 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7057 * In order to set the node callback bit correctly in zone databases,
7058 * we need to know if the node has the origin name of the zone.
7059 * In loading_addrdataset() we could simply compare the new name
7060 * to the origin name, but this is expensive. Also, we don't know the
7061 * node name in addrdataset(), so we need another way of knowing the
7064 * We now explicitly create a node for the zone's origin, and then
7065 * we simply remember the node's address. This is safe, because
7066 * the top-of-zone node can never be deleted, nor can its address
7069 if (!IS_CACHE(rbtdb)) {
7070 rbtdb->origin_node = NULL;
7071 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7072 &rbtdb->origin_node);
7073 if (result != ISC_R_SUCCESS) {
7074 INSIST(result != ISC_R_EXISTS);
7075 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7078 rbtdb->origin_node->nsec3 = 0;
7080 * We need to give the origin node the right locknum.
7082 dns_name_init(&name, NULL);
7083 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7084 #ifdef DNS_RBT_USEHASH
7085 rbtdb->origin_node->locknum =
7086 rbtdb->origin_node->hashval %
7087 rbtdb->node_lock_count;
7089 rbtdb->origin_node->locknum =
7090 dns_name_hash(&name, ISC_TRUE) %
7091 rbtdb->node_lock_count;
7096 * Misc. Initialization.
7098 result = isc_refcount_init(&rbtdb->references, 1);
7099 if (result != ISC_R_SUCCESS) {
7100 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7103 rbtdb->attributes = 0;
7104 rbtdb->overmem = ISC_FALSE;
7108 * Version Initialization.
7110 rbtdb->current_serial = 1;
7111 rbtdb->least_serial = 1;
7112 rbtdb->next_serial = 2;
7113 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7114 if (rbtdb->current_version == NULL) {
7115 isc_refcount_decrement(&rbtdb->references, NULL);
7116 isc_refcount_destroy(&rbtdb->references);
7117 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7118 return (ISC_R_NOMEMORY);
7120 rbtdb->current_version->secure = dns_db_insecure;
7121 rbtdb->current_version->havensec3 = ISC_FALSE;
7122 rbtdb->current_version->flags = 0;
7123 rbtdb->current_version->iterations = 0;
7124 rbtdb->current_version->hash = 0;
7125 rbtdb->current_version->salt_length = 0;
7126 memset(rbtdb->current_version->salt, 0,
7127 sizeof(rbtdb->current_version->salt));
7128 rbtdb->future_version = NULL;
7129 ISC_LIST_INIT(rbtdb->open_versions);
7131 * Keep the current version in the open list so that list operation
7132 * won't happen in normal lookup operations.
7134 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7136 rbtdb->common.magic = DNS_DB_MAGIC;
7137 rbtdb->common.impmagic = RBTDB_MAGIC;
7139 *dbp = (dns_db_t *)rbtdb;
7141 return (ISC_R_SUCCESS);
7144 isc_mem_put(mctx, rbtdb->deadnodes,
7145 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7148 if (rbtdb->heaps != NULL) {
7149 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7150 if (rbtdb->heaps[i] != NULL)
7151 isc_heap_destroy(&rbtdb->heaps[i]);
7152 isc_mem_put(mctx, rbtdb->heaps,
7153 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7157 if (rbtdb->rdatasets != NULL)
7158 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7159 sizeof(rdatasetheaderlist_t));
7161 if (rbtdb->rrsetstats != NULL)
7162 dns_stats_detach(&rbtdb->rrsetstats);
7165 isc_mem_put(mctx, rbtdb->node_locks,
7166 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7169 isc_rwlock_destroy(&rbtdb->tree_lock);
7172 RBTDB_DESTROYLOCK(&rbtdb->lock);
7175 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7181 * Slabbed Rdataset Methods
7185 rdataset_disassociate(dns_rdataset_t *rdataset) {
7186 dns_db_t *db = rdataset->private1;
7187 dns_dbnode_t *node = rdataset->private2;
7189 detachnode(db, &node);
7193 rdataset_first(dns_rdataset_t *rdataset) {
7194 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7197 count = raw[0] * 256 + raw[1];
7199 rdataset->private5 = NULL;
7200 return (ISC_R_NOMORE);
7203 #if DNS_RDATASET_FIXED
7204 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7205 raw += 2 + (4 * count);
7211 * The privateuint4 field is the number of rdata beyond the
7212 * cursor position, so we decrement the total count by one
7213 * before storing it.
7215 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7216 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7217 * to the first entry in the offset table.
7220 rdataset->privateuint4 = count;
7221 rdataset->private5 = raw;
7223 return (ISC_R_SUCCESS);
7227 rdataset_next(dns_rdataset_t *rdataset) {
7229 unsigned int length;
7230 unsigned char *raw; /* RDATASLAB */
7232 count = rdataset->privateuint4;
7234 return (ISC_R_NOMORE);
7236 rdataset->privateuint4 = count;
7239 * Skip forward one record (length + 4) or one offset (4).
7241 raw = rdataset->private5;
7242 #if DNS_RDATASET_FIXED
7243 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7245 length = raw[0] * 256 + raw[1];
7247 #if DNS_RDATASET_FIXED
7249 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7251 rdataset->private5 = raw + 2; /* length(2) */
7254 return (ISC_R_SUCCESS);
7258 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7259 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7260 #if DNS_RDATASET_FIXED
7261 unsigned int offset;
7263 unsigned int length;
7265 unsigned int flags = 0;
7267 REQUIRE(raw != NULL);
7270 * Find the start of the record if not already in private5
7271 * then skip the length and order fields.
7273 #if DNS_RDATASET_FIXED
7274 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7275 offset = (raw[0] << 24) + (raw[1] << 16) +
7276 (raw[2] << 8) + raw[3];
7277 raw = rdataset->private3;
7281 length = raw[0] * 256 + raw[1];
7282 #if DNS_RDATASET_FIXED
7287 if (rdataset->type == dns_rdatatype_rrsig) {
7288 if (*raw & DNS_RDATASLAB_OFFLINE)
7289 flags |= DNS_RDATA_OFFLINE;
7295 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7296 rdata->flags |= flags;
7300 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7301 dns_db_t *db = source->private1;
7302 dns_dbnode_t *node = source->private2;
7303 dns_dbnode_t *cloned_node = NULL;
7305 attachnode(db, node, &cloned_node);
7309 * Reset iterator state.
7311 target->privateuint4 = 0;
7312 target->private5 = NULL;
7316 rdataset_count(dns_rdataset_t *rdataset) {
7317 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7320 count = raw[0] * 256 + raw[1];
7326 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7327 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7329 dns_db_t *db = rdataset->private1;
7330 dns_dbnode_t *node = rdataset->private2;
7331 dns_dbnode_t *cloned_node;
7332 struct noqname *noqname = rdataset->private6;
7335 attachnode(db, node, &cloned_node);
7336 nsec->methods = &rdataset_methods;
7337 nsec->rdclass = db->rdclass;
7338 nsec->type = noqname->type;
7340 nsec->ttl = rdataset->ttl;
7341 nsec->trust = rdataset->trust;
7342 nsec->private1 = rdataset->private1;
7343 nsec->private2 = rdataset->private2;
7344 nsec->private3 = noqname->neg;
7345 nsec->privateuint4 = 0;
7346 nsec->private5 = NULL;
7347 nsec->private6 = NULL;
7348 nsec->private7 = NULL;
7351 attachnode(db, node, &cloned_node);
7352 nsecsig->methods = &rdataset_methods;
7353 nsecsig->rdclass = db->rdclass;
7354 nsecsig->type = dns_rdatatype_rrsig;
7355 nsecsig->covers = noqname->type;
7356 nsecsig->ttl = rdataset->ttl;
7357 nsecsig->trust = rdataset->trust;
7358 nsecsig->private1 = rdataset->private1;
7359 nsecsig->private2 = rdataset->private2;
7360 nsecsig->private3 = noqname->negsig;
7361 nsecsig->privateuint4 = 0;
7362 nsecsig->private5 = NULL;
7363 nsec->private6 = NULL;
7364 nsec->private7 = NULL;
7366 dns_name_clone(&noqname->name, name);
7368 return (ISC_R_SUCCESS);
7372 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7373 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7375 dns_db_t *db = rdataset->private1;
7376 dns_dbnode_t *node = rdataset->private2;
7377 dns_dbnode_t *cloned_node;
7378 struct noqname *closest = rdataset->private7;
7381 attachnode(db, node, &cloned_node);
7382 nsec->methods = &rdataset_methods;
7383 nsec->rdclass = db->rdclass;
7384 nsec->type = closest->type;
7386 nsec->ttl = rdataset->ttl;
7387 nsec->trust = rdataset->trust;
7388 nsec->private1 = rdataset->private1;
7389 nsec->private2 = rdataset->private2;
7390 nsec->private3 = closest->neg;
7391 nsec->privateuint4 = 0;
7392 nsec->private5 = NULL;
7393 nsec->private6 = NULL;
7394 nsec->private7 = NULL;
7397 attachnode(db, node, &cloned_node);
7398 nsecsig->methods = &rdataset_methods;
7399 nsecsig->rdclass = db->rdclass;
7400 nsecsig->type = dns_rdatatype_rrsig;
7401 nsecsig->covers = closest->type;
7402 nsecsig->ttl = rdataset->ttl;
7403 nsecsig->trust = rdataset->trust;
7404 nsecsig->private1 = rdataset->private1;
7405 nsecsig->private2 = rdataset->private2;
7406 nsecsig->private3 = closest->negsig;
7407 nsecsig->privateuint4 = 0;
7408 nsecsig->private5 = NULL;
7409 nsec->private6 = NULL;
7410 nsec->private7 = NULL;
7412 dns_name_clone(&closest->name, name);
7414 return (ISC_R_SUCCESS);
7418 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7419 dns_rbtdb_t *rbtdb = rdataset->private1;
7420 dns_rbtnode_t *rbtnode = rdataset->private2;
7421 rdatasetheader_t *header = rdataset->private3;
7424 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7425 isc_rwlocktype_write);
7426 header->trust = rdataset->trust = trust;
7427 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7428 isc_rwlocktype_write);
7432 rdataset_expire(dns_rdataset_t *rdataset) {
7433 dns_rbtdb_t *rbtdb = rdataset->private1;
7434 dns_rbtnode_t *rbtnode = rdataset->private2;
7435 rdatasetheader_t *header = rdataset->private3;
7438 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7439 isc_rwlocktype_write);
7440 expire_header(rbtdb, header, ISC_FALSE);
7441 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7442 isc_rwlocktype_write);
7446 * Rdataset Iterator Methods
7450 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7451 rbtdb_rdatasetiter_t *rbtiterator;
7453 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7455 if (rbtiterator->common.version != NULL)
7456 closeversion(rbtiterator->common.db,
7457 &rbtiterator->common.version, ISC_FALSE);
7458 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7459 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7460 sizeof(*rbtiterator));
7466 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7467 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7468 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7469 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7470 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7471 rdatasetheader_t *header, *top_next;
7472 rbtdb_serial_t serial;
7475 if (IS_CACHE(rbtdb)) {
7477 now = rbtiterator->common.now;
7479 serial = rbtversion->serial;
7483 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7484 isc_rwlocktype_read);
7486 for (header = rbtnode->data; header != NULL; header = top_next) {
7487 top_next = header->next;
7489 if (header->serial <= serial && !IGNORE(header)) {
7491 * Is this a "this rdataset doesn't exist"
7492 * record? Or is it too old in the cache?
7494 * Note: unlike everywhere else, we
7495 * check for now > header->rdh_ttl instead
7496 * of now >= header->rdh_ttl. This allows
7497 * ANY and RRSIG queries for 0 TTL
7498 * rdatasets to work.
7500 if (NONEXISTENT(header) ||
7501 (now != 0 && now > header->rdh_ttl))
7505 header = header->down;
7506 } while (header != NULL);
7511 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7512 isc_rwlocktype_read);
7514 rbtiterator->current = header;
7517 return (ISC_R_NOMORE);
7519 return (ISC_R_SUCCESS);
7523 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7524 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7525 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7526 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7527 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7528 rdatasetheader_t *header, *top_next;
7529 rbtdb_serial_t serial;
7531 rbtdb_rdatatype_t type, negtype;
7532 dns_rdatatype_t rdtype, covers;
7534 header = rbtiterator->current;
7536 return (ISC_R_NOMORE);
7538 if (IS_CACHE(rbtdb)) {
7540 now = rbtiterator->common.now;
7542 serial = rbtversion->serial;
7546 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7547 isc_rwlocktype_read);
7549 type = header->type;
7550 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7552 covers = RBTDB_RDATATYPE_EXT(header->type);
7553 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7555 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7556 for (header = header->next; header != NULL; header = top_next) {
7557 top_next = header->next;
7559 * If not walking back up the down list.
7561 if (header->type != type && header->type != negtype) {
7563 if (header->serial <= serial &&
7566 * Is this a "this rdataset doesn't
7569 * Note: unlike everywhere else, we
7570 * check for now > header->ttl instead
7571 * of now >= header->ttl. This allows
7572 * ANY and RRSIG queries for 0 TTL
7573 * rdatasets to work.
7575 if ((header->attributes &
7576 RDATASET_ATTR_NONEXISTENT) != 0 ||
7577 (now != 0 && now > header->rdh_ttl))
7581 header = header->down;
7582 } while (header != NULL);
7588 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7589 isc_rwlocktype_read);
7591 rbtiterator->current = header;
7594 return (ISC_R_NOMORE);
7596 return (ISC_R_SUCCESS);
7600 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7601 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7602 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7603 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7604 rdatasetheader_t *header;
7606 header = rbtiterator->current;
7607 REQUIRE(header != NULL);
7609 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7610 isc_rwlocktype_read);
7612 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7615 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7616 isc_rwlocktype_read);
7621 * Database Iterator Methods
7625 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7626 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7627 dns_rbtnode_t *node = rbtdbiter->node;
7632 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7633 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7637 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7638 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7639 dns_rbtnode_t *node = rbtdbiter->node;
7645 lock = &rbtdb->node_locks[node->locknum].lock;
7646 NODE_LOCK(lock, isc_rwlocktype_read);
7647 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7648 rbtdbiter->tree_locked, ISC_FALSE);
7649 NODE_UNLOCK(lock, isc_rwlocktype_read);
7651 rbtdbiter->node = NULL;
7655 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7656 dns_rbtnode_t *node;
7657 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7658 isc_boolean_t was_read_locked = ISC_FALSE;
7662 if (rbtdbiter->delete != 0) {
7664 * Note that "%d node of %d in tree" can report things like
7665 * "flush_deletions: 59 nodes of 41 in tree". This means
7666 * That some nodes appear on the deletions list more than
7667 * once. Only the last occurence will actually be deleted.
7669 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7670 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7671 "flush_deletions: %d nodes of %d in tree",
7673 dns_rbt_nodecount(rbtdb->tree));
7675 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7676 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7677 was_read_locked = ISC_TRUE;
7679 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7680 rbtdbiter->tree_locked = isc_rwlocktype_write;
7682 for (i = 0; i < rbtdbiter->delete; i++) {
7683 node = rbtdbiter->deletions[i];
7684 lock = &rbtdb->node_locks[node->locknum].lock;
7686 NODE_LOCK(lock, isc_rwlocktype_read);
7687 decrement_reference(rbtdb, node, 0,
7688 isc_rwlocktype_read,
7689 rbtdbiter->tree_locked, ISC_FALSE);
7690 NODE_UNLOCK(lock, isc_rwlocktype_read);
7693 rbtdbiter->delete = 0;
7695 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7696 if (was_read_locked) {
7697 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7698 rbtdbiter->tree_locked = isc_rwlocktype_read;
7701 rbtdbiter->tree_locked = isc_rwlocktype_none;
7707 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7708 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7710 REQUIRE(rbtdbiter->paused);
7711 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7713 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7714 rbtdbiter->tree_locked = isc_rwlocktype_read;
7716 rbtdbiter->paused = ISC_FALSE;
7720 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7721 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7722 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7723 dns_db_t *db = NULL;
7725 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7726 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7727 rbtdbiter->tree_locked = isc_rwlocktype_none;
7729 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7731 dereference_iter_node(rbtdbiter);
7733 flush_deletions(rbtdbiter);
7735 dns_db_attach(rbtdbiter->common.db, &db);
7736 dns_db_detach(&rbtdbiter->common.db);
7738 dns_rbtnodechain_reset(&rbtdbiter->chain);
7739 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7740 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7747 dbiterator_first(dns_dbiterator_t *iterator) {
7748 isc_result_t result;
7749 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7750 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7751 dns_name_t *name, *origin;
7753 if (rbtdbiter->result != ISC_R_SUCCESS &&
7754 rbtdbiter->result != ISC_R_NOMORE)
7755 return (rbtdbiter->result);
7757 if (rbtdbiter->paused)
7758 resume_iteration(rbtdbiter);
7760 dereference_iter_node(rbtdbiter);
7762 name = dns_fixedname_name(&rbtdbiter->name);
7763 origin = dns_fixedname_name(&rbtdbiter->origin);
7764 dns_rbtnodechain_reset(&rbtdbiter->chain);
7765 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7767 if (rbtdbiter->nsec3only) {
7768 rbtdbiter->current = &rbtdbiter->nsec3chain;
7769 result = dns_rbtnodechain_first(rbtdbiter->current,
7770 rbtdb->nsec3, name, origin);
7772 rbtdbiter->current = &rbtdbiter->chain;
7773 result = dns_rbtnodechain_first(rbtdbiter->current,
7774 rbtdb->tree, name, origin);
7775 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7776 rbtdbiter->current = &rbtdbiter->nsec3chain;
7777 result = dns_rbtnodechain_first(rbtdbiter->current,
7782 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7783 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7784 NULL, &rbtdbiter->node);
7785 if (result == ISC_R_SUCCESS) {
7786 rbtdbiter->new_origin = ISC_TRUE;
7787 reference_iter_node(rbtdbiter);
7790 INSIST(result == ISC_R_NOTFOUND);
7791 result = ISC_R_NOMORE; /* The tree is empty. */
7794 rbtdbiter->result = result;
7800 dbiterator_last(dns_dbiterator_t *iterator) {
7801 isc_result_t result;
7802 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7803 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7804 dns_name_t *name, *origin;
7806 if (rbtdbiter->result != ISC_R_SUCCESS &&
7807 rbtdbiter->result != ISC_R_NOMORE)
7808 return (rbtdbiter->result);
7810 if (rbtdbiter->paused)
7811 resume_iteration(rbtdbiter);
7813 dereference_iter_node(rbtdbiter);
7815 name = dns_fixedname_name(&rbtdbiter->name);
7816 origin = dns_fixedname_name(&rbtdbiter->origin);
7817 dns_rbtnodechain_reset(&rbtdbiter->chain);
7818 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7820 result = ISC_R_NOTFOUND;
7821 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7822 rbtdbiter->current = &rbtdbiter->nsec3chain;
7823 result = dns_rbtnodechain_last(rbtdbiter->current,
7824 rbtdb->nsec3, name, origin);
7826 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7827 rbtdbiter->current = &rbtdbiter->chain;
7828 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7831 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7832 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7833 NULL, &rbtdbiter->node);
7834 if (result == ISC_R_SUCCESS) {
7835 rbtdbiter->new_origin = ISC_TRUE;
7836 reference_iter_node(rbtdbiter);
7839 INSIST(result == ISC_R_NOTFOUND);
7840 result = ISC_R_NOMORE; /* The tree is empty. */
7843 rbtdbiter->result = result;
7849 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7850 isc_result_t result;
7851 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7852 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7853 dns_name_t *iname, *origin;
7855 if (rbtdbiter->result != ISC_R_SUCCESS &&
7856 rbtdbiter->result != ISC_R_NOTFOUND &&
7857 rbtdbiter->result != ISC_R_NOMORE)
7858 return (rbtdbiter->result);
7860 if (rbtdbiter->paused)
7861 resume_iteration(rbtdbiter);
7863 dereference_iter_node(rbtdbiter);
7865 iname = dns_fixedname_name(&rbtdbiter->name);
7866 origin = dns_fixedname_name(&rbtdbiter->origin);
7867 dns_rbtnodechain_reset(&rbtdbiter->chain);
7868 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7870 if (rbtdbiter->nsec3only) {
7871 rbtdbiter->current = &rbtdbiter->nsec3chain;
7872 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7875 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7876 } else if (rbtdbiter->nonsec3) {
7877 rbtdbiter->current = &rbtdbiter->chain;
7878 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7881 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7884 * Stay on main chain if not found on either chain.
7886 rbtdbiter->current = &rbtdbiter->chain;
7887 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7890 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7891 if (result == DNS_R_PARTIALMATCH) {
7892 dns_rbtnode_t *node = NULL;
7893 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7894 &node, &rbtdbiter->nsec3chain,
7895 DNS_RBTFIND_EMPTYDATA,
7897 if (result == ISC_R_SUCCESS) {
7898 rbtdbiter->node = node;
7899 rbtdbiter->current = &rbtdbiter->nsec3chain;
7905 if (result == ISC_R_SUCCESS) {
7906 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7908 if (result == ISC_R_SUCCESS) {
7909 rbtdbiter->new_origin = ISC_TRUE;
7910 reference_iter_node(rbtdbiter);
7912 } else if (result == DNS_R_PARTIALMATCH) {
7913 result = ISC_R_NOTFOUND;
7914 rbtdbiter->node = NULL;
7917 rbtdbiter->result = result;
7919 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7920 isc_result_t tresult;
7921 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7923 if (tresult == ISC_R_SUCCESS) {
7924 rbtdbiter->new_origin = ISC_TRUE;
7925 reference_iter_node(rbtdbiter);
7928 rbtdbiter->node = NULL;
7931 rbtdbiter->node = NULL;
7933 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7934 ISC_R_SUCCESS : result;
7941 dbiterator_prev(dns_dbiterator_t *iterator) {
7942 isc_result_t result;
7943 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7944 dns_name_t *name, *origin;
7945 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7947 REQUIRE(rbtdbiter->node != NULL);
7949 if (rbtdbiter->result != ISC_R_SUCCESS)
7950 return (rbtdbiter->result);
7952 if (rbtdbiter->paused)
7953 resume_iteration(rbtdbiter);
7955 name = dns_fixedname_name(&rbtdbiter->name);
7956 origin = dns_fixedname_name(&rbtdbiter->origin);
7957 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7958 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7959 !rbtdbiter->nonsec3 &&
7960 &rbtdbiter->nsec3chain == rbtdbiter->current) {
7961 rbtdbiter->current = &rbtdbiter->chain;
7962 dns_rbtnodechain_reset(rbtdbiter->current);
7963 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7965 if (result == ISC_R_NOTFOUND)
7966 result = ISC_R_NOMORE;
7969 dereference_iter_node(rbtdbiter);
7971 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7972 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7973 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7974 NULL, &rbtdbiter->node);
7977 if (result == ISC_R_SUCCESS)
7978 reference_iter_node(rbtdbiter);
7980 rbtdbiter->result = result;
7986 dbiterator_next(dns_dbiterator_t *iterator) {
7987 isc_result_t result;
7988 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7989 dns_name_t *name, *origin;
7990 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7992 REQUIRE(rbtdbiter->node != NULL);
7994 if (rbtdbiter->result != ISC_R_SUCCESS)
7995 return (rbtdbiter->result);
7997 if (rbtdbiter->paused)
7998 resume_iteration(rbtdbiter);
8000 name = dns_fixedname_name(&rbtdbiter->name);
8001 origin = dns_fixedname_name(&rbtdbiter->origin);
8002 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8003 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8004 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8005 rbtdbiter->current = &rbtdbiter->nsec3chain;
8006 dns_rbtnodechain_reset(rbtdbiter->current);
8007 result = dns_rbtnodechain_first(rbtdbiter->current,
8008 rbtdb->nsec3, name, origin);
8009 if (result == ISC_R_NOTFOUND)
8010 result = ISC_R_NOMORE;
8013 dereference_iter_node(rbtdbiter);
8015 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8016 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8017 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8018 NULL, &rbtdbiter->node);
8020 if (result == ISC_R_SUCCESS)
8021 reference_iter_node(rbtdbiter);
8023 rbtdbiter->result = result;
8029 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8032 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8033 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8034 dns_rbtnode_t *node = rbtdbiter->node;
8035 isc_result_t result;
8036 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8037 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8039 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8040 REQUIRE(rbtdbiter->node != NULL);
8042 if (rbtdbiter->paused)
8043 resume_iteration(rbtdbiter);
8046 if (rbtdbiter->common.relative_names)
8048 result = dns_name_concatenate(nodename, origin, name, NULL);
8049 if (result != ISC_R_SUCCESS)
8051 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8052 result = DNS_R_NEWORIGIN;
8054 result = ISC_R_SUCCESS;
8056 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8057 new_reference(rbtdb, node);
8058 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8060 *nodep = rbtdbiter->node;
8062 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8063 isc_result_t expire_result;
8066 * If the deletion array is full, flush it before trying
8067 * to expire the current node. The current node can't
8068 * fully deleted while the iteration cursor is still on it.
8070 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8071 flush_deletions(rbtdbiter);
8073 expire_result = expirenode(iterator->db, *nodep, 0);
8076 * expirenode() currently always returns success.
8078 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8081 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8082 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8083 dns_rbtnode_refincrement(node, &refs);
8085 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8093 dbiterator_pause(dns_dbiterator_t *iterator) {
8094 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8095 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8097 if (rbtdbiter->result != ISC_R_SUCCESS &&
8098 rbtdbiter->result != ISC_R_NOMORE)
8099 return (rbtdbiter->result);
8101 if (rbtdbiter->paused)
8102 return (ISC_R_SUCCESS);
8104 rbtdbiter->paused = ISC_TRUE;
8106 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8107 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8108 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8109 rbtdbiter->tree_locked = isc_rwlocktype_none;
8112 flush_deletions(rbtdbiter);
8114 return (ISC_R_SUCCESS);
8118 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8119 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8120 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8122 if (rbtdbiter->result != ISC_R_SUCCESS)
8123 return (rbtdbiter->result);
8125 return (dns_name_copy(origin, name, NULL));
8129 * Additional cache routines.
8132 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8133 dns_rdatatype_t qtype, dns_acache_t *acache,
8134 dns_zone_t **zonep, dns_db_t **dbp,
8135 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8136 dns_name_t *fname, dns_message_t *msg,
8139 dns_rbtdb_t *rbtdb = rdataset->private1;
8140 dns_rbtnode_t *rbtnode = rdataset->private2;
8141 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8142 unsigned int current_count = rdataset->privateuint4;
8144 rdatasetheader_t *header;
8145 nodelock_t *nodelock;
8146 unsigned int total_count;
8147 acachectl_t *acarray;
8148 dns_acacheentry_t *entry;
8149 isc_result_t result;
8151 UNUSED(qtype); /* we do not use this value at least for now */
8154 header = (struct rdatasetheader *)(raw - sizeof(*header));
8156 total_count = raw[0] * 256 + raw[1];
8157 INSIST(total_count > current_count);
8158 count = total_count - current_count - 1;
8162 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8163 NODE_LOCK(nodelock, isc_rwlocktype_read);
8166 case dns_rdatasetadditional_fromauth:
8167 acarray = header->additional_auth;
8169 case dns_rdatasetadditional_fromcache:
8172 case dns_rdatasetadditional_fromglue:
8173 acarray = header->additional_glue;
8179 if (acarray == NULL) {
8180 if (type != dns_rdatasetadditional_fromcache)
8181 dns_acache_countquerymiss(acache);
8182 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8183 return (ISC_R_NOTFOUND);
8186 if (acarray[count].entry == NULL) {
8187 dns_acache_countquerymiss(acache);
8188 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8189 return (ISC_R_NOTFOUND);
8193 dns_acache_attachentry(acarray[count].entry, &entry);
8195 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8197 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8198 nodep, fname, msg, now);
8200 dns_acache_detachentry(&entry);
8206 acache_callback(dns_acacheentry_t *entry, void **arg) {
8208 dns_rbtnode_t *rbtnode;
8209 nodelock_t *nodelock;
8210 acachectl_t *acarray = NULL;
8211 acache_cbarg_t *cbarg;
8214 REQUIRE(arg != NULL);
8218 * The caller must hold the entry lock.
8221 rbtdb = (dns_rbtdb_t *)cbarg->db;
8222 rbtnode = (dns_rbtnode_t *)cbarg->node;
8224 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8225 NODE_LOCK(nodelock, isc_rwlocktype_write);
8227 switch (cbarg->type) {
8228 case dns_rdatasetadditional_fromauth:
8229 acarray = cbarg->header->additional_auth;
8231 case dns_rdatasetadditional_fromglue:
8232 acarray = cbarg->header->additional_glue;
8238 count = cbarg->count;
8239 if (acarray != NULL && acarray[count].entry == entry) {
8240 acarray[count].entry = NULL;
8241 INSIST(acarray[count].cbarg == cbarg);
8242 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8243 acarray[count].cbarg = NULL;
8245 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8247 dns_acache_detachentry(&entry);
8249 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8251 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8252 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8258 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8259 acache_cbarg_t **cbargp)
8261 acache_cbarg_t *cbarg;
8263 REQUIRE(mctx != NULL);
8264 REQUIRE(entry != NULL);
8265 REQUIRE(cbargp != NULL && *cbargp != NULL);
8269 dns_acache_cancelentry(entry);
8270 dns_db_detachnode(cbarg->db, &cbarg->node);
8271 dns_db_detach(&cbarg->db);
8273 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8279 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8280 dns_rdatatype_t qtype, dns_acache_t *acache,
8281 dns_zone_t *zone, dns_db_t *db,
8282 dns_dbversion_t *version, dns_dbnode_t *node,
8285 dns_rbtdb_t *rbtdb = rdataset->private1;
8286 dns_rbtnode_t *rbtnode = rdataset->private2;
8287 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8288 unsigned int current_count = rdataset->privateuint4;
8289 rdatasetheader_t *header;
8290 unsigned int total_count, count;
8291 nodelock_t *nodelock;
8292 isc_result_t result;
8293 acachectl_t *acarray;
8294 dns_acacheentry_t *newentry, *oldentry = NULL;
8295 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8299 if (type == dns_rdatasetadditional_fromcache)
8300 return (ISC_R_SUCCESS);
8302 header = (struct rdatasetheader *)(raw - sizeof(*header));
8304 total_count = raw[0] * 256 + raw[1];
8305 INSIST(total_count > current_count);
8306 count = total_count - current_count - 1; /* should be private data */
8308 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8309 if (newcbarg == NULL)
8310 return (ISC_R_NOMEMORY);
8311 newcbarg->type = type;
8312 newcbarg->count = count;
8313 newcbarg->header = header;
8314 newcbarg->db = NULL;
8315 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8316 newcbarg->node = NULL;
8317 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8320 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8321 acache_callback, newcbarg, &newentry);
8322 if (result != ISC_R_SUCCESS)
8324 /* Set cache data in the new entry. */
8325 result = dns_acache_setentry(acache, newentry, zone, db,
8326 version, node, fname);
8327 if (result != ISC_R_SUCCESS)
8330 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8331 NODE_LOCK(nodelock, isc_rwlocktype_write);
8335 case dns_rdatasetadditional_fromauth:
8336 acarray = header->additional_auth;
8338 case dns_rdatasetadditional_fromglue:
8339 acarray = header->additional_glue;
8345 if (acarray == NULL) {
8348 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8349 sizeof(acachectl_t));
8351 if (acarray == NULL) {
8352 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8356 for (i = 0; i < total_count; i++) {
8357 acarray[i].entry = NULL;
8358 acarray[i].cbarg = NULL;
8362 case dns_rdatasetadditional_fromauth:
8363 header->additional_auth = acarray;
8365 case dns_rdatasetadditional_fromglue:
8366 header->additional_glue = acarray;
8372 if (acarray[count].entry != NULL) {
8374 * Swap the entry. Delay cleaning-up the old entry since
8375 * it would require a node lock.
8377 oldentry = acarray[count].entry;
8378 INSIST(acarray[count].cbarg != NULL);
8379 oldcbarg = acarray[count].cbarg;
8381 acarray[count].entry = newentry;
8382 acarray[count].cbarg = newcbarg;
8384 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8386 if (oldentry != NULL) {
8387 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8388 dns_acache_detachentry(&oldentry);
8391 return (ISC_R_SUCCESS);
8394 if (newcbarg != NULL) {
8395 if (newentry != NULL) {
8396 acache_cancelentry(rbtdb->common.mctx, newentry,
8398 dns_acache_detachentry(&newentry);
8400 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8401 dns_db_detach(&newcbarg->db);
8402 isc_mem_put(rbtdb->common.mctx, newcbarg,
8411 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8412 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8414 dns_rbtdb_t *rbtdb = rdataset->private1;
8415 dns_rbtnode_t *rbtnode = rdataset->private2;
8416 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8417 unsigned int current_count = rdataset->privateuint4;
8418 rdatasetheader_t *header;
8419 nodelock_t *nodelock;
8420 unsigned int total_count, count;
8421 acachectl_t *acarray;
8422 dns_acacheentry_t *entry;
8423 acache_cbarg_t *cbarg;
8425 UNUSED(qtype); /* we do not use this value at least for now */
8428 if (type == dns_rdatasetadditional_fromcache)
8429 return (ISC_R_SUCCESS);
8431 header = (struct rdatasetheader *)(raw - sizeof(*header));
8433 total_count = raw[0] * 256 + raw[1];
8434 INSIST(total_count > current_count);
8435 count = total_count - current_count - 1;
8440 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8441 NODE_LOCK(nodelock, isc_rwlocktype_write);
8444 case dns_rdatasetadditional_fromauth:
8445 acarray = header->additional_auth;
8447 case dns_rdatasetadditional_fromglue:
8448 acarray = header->additional_glue;
8454 if (acarray == NULL) {
8455 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8456 return (ISC_R_NOTFOUND);
8459 entry = acarray[count].entry;
8460 if (entry == NULL) {
8461 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8462 return (ISC_R_NOTFOUND);
8465 acarray[count].entry = NULL;
8466 cbarg = acarray[count].cbarg;
8467 acarray[count].cbarg = NULL;
8469 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8471 if (entry != NULL) {
8473 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8474 dns_acache_detachentry(&entry);
8477 return (ISC_R_SUCCESS);
8481 * Routines for LRU-based cache management.
8485 * See if a given cache entry that is being reused needs to be updated
8486 * in the LRU-list. From the LRU management point of view, this function is
8487 * expected to return true for almost all cases. When used with threads,
8488 * however, this may cause a non-negligible performance penalty because a
8489 * writer lock will have to be acquired before updating the list.
8490 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8491 * function returns true if the entry has not been updated for some period of
8492 * time. We differentiate the NS or glue address case and the others since
8493 * experiments have shown that the former tends to be accessed relatively
8494 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8495 * may cause external queries at a higher level zone, involving more
8498 * Caller must hold the node (read or write) lock.
8500 static inline isc_boolean_t
8501 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8502 if ((header->attributes &
8503 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8506 #if DNS_RBTDB_LIMITLRUUPDATE
8507 if (header->type == dns_rdatatype_ns ||
8508 (header->trust == dns_trust_glue &&
8509 (header->type == dns_rdatatype_a ||
8510 header->type == dns_rdatatype_aaaa))) {
8512 * Glue records are updated if at least 60 seconds have passed
8513 * since the previous update time.
8515 return (header->last_used + 60 <= now);
8518 /* Other records are updated if 5 minutes have passed. */
8519 return (header->last_used + 300 <= now);
8528 * Update the timestamp of a given cache entry and move it to the head
8529 * of the corresponding LRU list.
8531 * Caller must hold the node (write) lock.
8533 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8536 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8539 INSIST(IS_CACHE(rbtdb));
8541 /* To be checked: can we really assume this? XXXMLG */
8542 INSIST(ISC_LINK_LINKED(header, link));
8544 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8545 header->last_used = now;
8546 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8550 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8551 * under an overmem condition. To recover from this condition quickly, up to
8552 * 2 entries will be purged. This process is triggered while adding a new
8553 * entry, and we specifically avoid purging entries in the same LRU bucket as
8554 * the one to which the new entry will belong. Otherwise, we might purge
8555 * entries of the same name of different RR types while adding RRsets from a
8556 * single response (consider the case where we're adding A and AAAA glue records
8557 * of the same NS name).
8560 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8561 isc_stdtime_t now, isc_boolean_t tree_locked)
8563 rdatasetheader_t *header, *header_prev;
8564 unsigned int locknum;
8567 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8568 locknum != locknum_start && purgecount > 0;
8569 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8570 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8571 isc_rwlocktype_write);
8573 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8574 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8575 expire_header(rbtdb, header, tree_locked);
8579 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8580 header != NULL && purgecount > 0;
8581 header = header_prev) {
8582 header_prev = ISC_LIST_PREV(header, link);
8584 * Unlink the entry at this point to avoid checking it
8585 * again even if it's currently used someone else and
8586 * cannot be purged at this moment. This entry won't be
8587 * referenced any more (so unlinking is safe) since the
8588 * TTL was reset to 0.
8590 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8592 expire_header(rbtdb, header, tree_locked);
8596 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8597 isc_rwlocktype_write);
8602 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8603 isc_boolean_t tree_locked)
8605 set_ttl(rbtdb, header, 0);
8606 header->attributes |= RDATASET_ATTR_STALE;
8607 header->node->dirty = 1;
8610 * Caller must hold the node (write) lock.
8613 if (dns_rbtnode_refcurrent(header->node) == 0) {
8615 * If no one else is using the node, we can clean it up now.
8616 * We first need to gain a new reference to the node to meet a
8617 * requirement of decrement_reference().
8619 new_reference(rbtdb, header->node);
8620 decrement_reference(rbtdb, header->node, 0,
8621 isc_rwlocktype_write,
8622 tree_locked ? isc_rwlocktype_write :
8623 isc_rwlocktype_none, ISC_FALSE);