2 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.270.12.16 2009/12/30 08:34:30 jinmei Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
65 #include <dns/zonekey.h>
67 #ifdef DNS_RBTDB_VERSION64
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
80 * Note that "impmagic" is not the first four bytes of the struct, so
81 * ISC_MAGIC_VALID cannot be used.
83 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
84 (rbtdb)->common.impmagic == RBTDB_MAGIC)
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t rbtdb_serial_t;
89 * Make casting easier in symbolic debuggers by using different names
90 * for the 64 bit version.
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
96 typedef isc_uint32_t rbtdb_serial_t;
99 typedef isc_uint32_t rbtdb_rdatatype_t;
101 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
105 #define RBTDB_RDATATYPE_SIGNSEC \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
119 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120 * Using rwlock is effective with regard to lookup performance only when
121 * it is implemented in an efficient way.
122 * Otherwise, it is generally wise to stick to the simple locking since rwlock
123 * would require more memory or can even make lookups slower due to its own
124 * overhead (when it internally calls mutex locks).
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
129 #define DNS_RBTDB_USERWLOCK 0
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
138 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t) LOCK(l)
141 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
145 * Since node locking is sensitive to both performance and memory footprint,
146 * we need some trick here. If we have both high-performance rwlock and
147 * high performance and small-memory reference counters, we use rwlock for
148 * node lock and isc_refcount for node references. In this case, we don't have
149 * to protect the access to the counters by locks.
150 * Otherwise, we simply use ordinary mutex lock for node locking, and use
151 * simple integers as reference counters which is protected by the lock.
152 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153 * NODE_UNLOCK. In some other cases, however, we need to protect reference
154 * counters first and then protect other parts of a node as read-only data.
155 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156 * provided for these special cases. When we can use the efficient backend
157 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159 * section including the access to the reference counter.
160 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161 * section is also protected by NODE_STRONGLOCK().
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
166 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t) RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
172 #define NODE_STRONGLOCK(l) ((void)0)
173 #define NODE_STRONGUNLOCK(l) ((void)0)
174 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
178 typedef isc_mutex_t nodelock_t;
180 #define NODE_INITLOCK(l) isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
182 #define NODE_LOCK(l, t) LOCK(l)
183 #define NODE_UNLOCK(l, t) UNLOCK(l)
184 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
186 #define NODE_STRONGLOCK(l) LOCK(l)
187 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t) ((void)0)
189 #define NODE_WEAKUNLOCK(l, t) ((void)0)
190 #define NODE_WEAKDOWNGRADE(l) ((void)0)
194 * Whether to rate-limit updating the LRU to avoid possible thread contention.
195 * Our performance measurement has shown the cost is marginal, so it's defined
196 * to be 0 by default either with or without threads.
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 * Allow clients with a virtual time of up to 5 minutes in the past to see
204 * records that would have otherwise have expired.
206 #define RBTDB_VIRTUAL 300
212 dns_rdatatype_t type;
215 typedef struct acachectl acachectl_t;
217 typedef struct rdatasetheader {
219 * Locked by the owning node's lock.
221 rbtdb_serial_t serial;
223 rbtdb_rdatatype_t type;
224 isc_uint16_t attributes;
226 struct noqname *noqname;
227 struct noqname *closest;
229 * We don't use the LIST macros, because the LIST structure has
230 * both head and tail pointers, and is doubly linked.
233 struct rdatasetheader *next;
235 * If this is the top header for an rdataset, 'next' points
236 * to the top header for the next rdataset (i.e., the next type).
237 * Otherwise, it points up to the header whose down pointer points
241 struct rdatasetheader *down;
243 * Points to the header for the next older version of
249 * Monotonously increased every time this rdataset is bound so that
250 * it is used as the base of the starting point in DNS responses
251 * when the "cyclic" rrset-order is required. Since the ordering
252 * should not be so crucial, no lock is set for the counter for
253 * performance reasons.
256 acachectl_t *additional_auth;
257 acachectl_t *additional_glue;
260 isc_stdtime_t last_used;
261 ISC_LINK(struct rdatasetheader) link;
263 unsigned int heap_index;
265 * Used for TTL-based cache cleaning.
267 isc_stdtime_t resign;
270 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
273 #define RDATASET_ATTR_NONEXISTENT 0x0001
274 #define RDATASET_ATTR_STALE 0x0002
275 #define RDATASET_ATTR_IGNORE 0x0004
276 #define RDATASET_ATTR_RETAIN 0x0008
277 #define RDATASET_ATTR_NXDOMAIN 0x0010
278 #define RDATASET_ATTR_RESIGN 0x0020
279 #define RDATASET_ATTR_STATCOUNT 0x0040
280 #define RDATASET_ATTR_OPTOUT 0x0080
282 typedef struct acache_cbarg {
283 dns_rdatasetadditional_t type;
287 rdatasetheader_t *header;
291 dns_acacheentry_t *entry;
292 acache_cbarg_t *cbarg;
297 * When the cache will pre-expire data (due to memory low or other
298 * situations) before the rdataset's TTL has expired, it MUST
299 * respect the RETAIN bit and not expire the data until its TTL is
303 #undef IGNORE /* WIN32 winbase.h defines this. */
305 #define EXISTS(header) \
306 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
323 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324 * There is a tradeoff issue about configuring this value: if this is too
325 * small, it may cause heavier contention between threads; if this is too large,
326 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327 * The default value should work well for most environments, but this can
328 * also be configurable at compilation time via the
329 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
330 * 1 due to the assumption of overmem_purge().
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
340 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
344 /* Protected in the refcount routines. */
345 isc_refcount_t references;
346 /* Locked by lock. */
347 isc_boolean_t exiting;
350 typedef struct rbtdb_changed {
351 dns_rbtnode_t * node;
353 ISC_LINK(struct rbtdb_changed) link;
356 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
364 typedef struct rbtdb_version {
366 rbtdb_serial_t serial;
368 * Protected in the refcount routines.
369 * XXXJT: should we change the lock policy based on the refcount
372 isc_refcount_t references;
373 /* Locked by database lock. */
374 isc_boolean_t writer;
375 isc_boolean_t commit_ok;
376 rbtdb_changedlist_t changed_list;
377 rdatasetheaderlist_t resigned_list;
378 ISC_LINK(struct rbtdb_version) link;
379 dns_db_secure_t secure;
380 isc_boolean_t havensec3;
381 /* NSEC3 parameters */
384 isc_uint16_t iterations;
385 isc_uint8_t salt_length;
386 unsigned char salt[DNS_NSEC3_SALTSIZE];
389 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
394 #if DNS_RBTDB_USERWLOCK
399 isc_rwlock_t tree_lock;
400 unsigned int node_lock_count;
401 rbtdb_nodelock_t * node_locks;
402 dns_rbtnode_t * origin_node;
403 dns_stats_t * rrsetstats; /* cache DB only */
404 /* Locked by lock. */
406 isc_refcount_t references;
407 unsigned int attributes;
408 rbtdb_serial_t current_serial;
409 rbtdb_serial_t least_serial;
410 rbtdb_serial_t next_serial;
411 rbtdb_version_t * current_version;
412 rbtdb_version_t * future_version;
413 rbtdb_versionlist_t open_versions;
414 isc_boolean_t overmem;
416 dns_dbnode_t *soanode;
417 dns_dbnode_t *nsnode;
420 * This is a linked list used to implement the LRU cache. There will
421 * be node_lock_count linked lists here. Nodes in bucket 1 will be
422 * placed on the linked list rdatasets[1].
424 rdatasetheaderlist_t *rdatasets;
427 * Temporary storage for stale cache nodes and dynamically deleted
428 * nodes that await being cleaned up.
430 rbtnodelist_t *deadnodes;
433 * Heaps. Each of these is used for TTL based expiry.
437 /* Locked by tree_lock. */
442 unsigned int quantum;
445 #define RBTDB_ATTR_LOADED 0x01
446 #define RBTDB_ATTR_LOADING 0x02
453 rbtdb_version_t * rbtversion;
454 rbtdb_serial_t serial;
455 unsigned int options;
456 dns_rbtnodechain_t chain;
457 isc_boolean_t copy_name;
458 isc_boolean_t need_cleanup;
460 dns_rbtnode_t * zonecut;
461 rdatasetheader_t * zonecut_rdataset;
462 rdatasetheader_t * zonecut_sigrdataset;
463 dns_fixedname_t zonecut_name;
475 static void rdataset_disassociate(dns_rdataset_t *rdataset);
476 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
477 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
478 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
479 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
480 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
481 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
484 dns_rdataset_t *negsig);
485 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
488 dns_rdataset_t *negsig);
489 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
490 dns_rdatasetadditional_t type,
491 dns_rdatatype_t qtype,
492 dns_acache_t *acache,
495 dns_dbversion_t **versionp,
496 dns_dbnode_t **nodep,
500 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
501 dns_rdatasetadditional_t type,
502 dns_rdatatype_t qtype,
503 dns_acache_t *acache,
506 dns_dbversion_t *version,
509 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
510 dns_rdataset_t *rdataset,
511 dns_rdatasetadditional_t type,
512 dns_rdatatype_t qtype);
513 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
515 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
517 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
518 isc_boolean_t tree_locked);
519 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
520 isc_stdtime_t now, isc_boolean_t tree_locked);
521 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
522 rdatasetheader_t *newheader);
523 static void prune_tree(isc_task_t *task, isc_event_t *event);
525 static dns_rdatasetmethods_t rdataset_methods = {
526 rdataset_disassociate,
536 rdataset_getadditional,
537 rdataset_setadditional,
538 rdataset_putadditional
541 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
542 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
543 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
544 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
545 dns_rdataset_t *rdataset);
547 static dns_rdatasetitermethods_t rdatasetiter_methods = {
548 rdatasetiter_destroy,
554 typedef struct rbtdb_rdatasetiter {
555 dns_rdatasetiter_t common;
556 rdatasetheader_t * current;
557 } rbtdb_rdatasetiter_t;
559 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
560 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
561 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
562 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
564 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
565 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
566 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
567 dns_dbnode_t **nodep,
569 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
570 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
573 static dns_dbiteratormethods_t dbiterator_methods = {
585 #define DELETION_BATCH_MAX 64
588 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
590 typedef struct rbtdb_dbiterator {
591 dns_dbiterator_t common;
592 isc_boolean_t paused;
593 isc_boolean_t new_origin;
594 isc_rwlocktype_t tree_locked;
596 dns_fixedname_t name;
597 dns_fixedname_t origin;
598 dns_rbtnodechain_t chain;
599 dns_rbtnodechain_t nsec3chain;
600 dns_rbtnodechain_t *current;
602 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
604 isc_boolean_t nsec3only;
605 isc_boolean_t nonsec3;
606 } rbtdb_dbiterator_t;
609 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
610 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
612 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
614 static void overmem(dns_db_t *db, isc_boolean_t overmem);
615 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
616 isc_boolean_t *nsec3createflag);
619 * 'init_count' is used to initialize 'newheader->count' which inturn
620 * is used to determine where in the cycle rrset-order cyclic starts.
621 * We don't lock this as we don't care about simultaneous updates.
624 * Both init_count and header->count can be ISC_UINT32_MAX.
625 * The count on the returned rdataset however can't be as
626 * that indicates that the database does not implement cyclic
629 static unsigned int init_count;
634 * If a routine is going to lock more than one lock in this module, then
635 * the locking must be done in the following order:
639 * Node Lock (Only one from the set may be locked at one time by
644 * Failure to follow this hierarchy can result in deadlock.
650 * For zone databases the node for the origin of the zone MUST NOT be deleted.
659 attach(dns_db_t *source, dns_db_t **targetp) {
660 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
662 REQUIRE(VALID_RBTDB(rbtdb));
664 isc_refcount_increment(&rbtdb->references, NULL);
670 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
671 dns_rbtdb_t *rbtdb = event->ev_arg;
675 free_rbtdb(rbtdb, ISC_TRUE, event);
679 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
680 isc_boolean_t increment)
682 dns_rdatastatstype_t statattributes = 0;
683 dns_rdatastatstype_t base = 0;
684 dns_rdatastatstype_t type;
686 /* At the moment we count statistics only for cache DB */
687 INSIST(IS_CACHE(rbtdb));
689 if (NXDOMAIN(header))
690 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
691 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
692 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
693 base = RBTDB_RDATATYPE_EXT(header->type);
695 base = RBTDB_RDATATYPE_BASE(header->type);
697 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
699 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
701 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
705 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
710 oldttl = header->rdh_ttl;
711 header->rdh_ttl = newttl;
713 if (!IS_CACHE(rbtdb))
717 * It's possible the rbtdb is not a cache. If this is the case,
718 * we will not have a heap, and we move on. If we do, though,
719 * we might need to adjust things.
721 if (header->heap_index == 0 || newttl == oldttl)
723 idx = header->node->locknum;
724 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
726 heap = rbtdb->heaps[idx];
729 isc_heap_increased(heap, header->heap_index);
731 isc_heap_decreased(heap, header->heap_index);
735 * These functions allow the heap code to rank the priority of each
736 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
739 ttl_sooner(void *v1, void *v2) {
740 rdatasetheader_t *h1 = v1;
741 rdatasetheader_t *h2 = v2;
743 if (h1->rdh_ttl < h2->rdh_ttl)
749 resign_sooner(void *v1, void *v2) {
750 rdatasetheader_t *h1 = v1;
751 rdatasetheader_t *h2 = v2;
753 if (h1->resign < h2->resign)
759 * This function sets the heap index into the header.
762 set_index(void *what, unsigned int index) {
763 rdatasetheader_t *h = what;
765 h->heap_index = index;
769 * Work out how many nodes can be deleted in the time between two
770 * requests to the nameserver. Smooth the resulting number and use it
771 * as a estimate for the number of nodes to be deleted in the next
775 adjust_quantum(unsigned int old, isc_time_t *start) {
776 unsigned int pps = dns_pps; /* packets per second */
777 unsigned int interval;
786 interval = 1000000 / pps; /* interval in usec */
789 usecs = isc_time_microdiff(&end, start);
792 * We were unable to measure the amount of time taken.
793 * Double the nodes deleted next time.
800 new = old * interval;
801 new /= (unsigned int)usecs;
808 new = (new + old * 3) / 4;
810 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
811 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
817 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
819 isc_ondestroy_t ondest;
821 char buf[DNS_NAME_FORMATSIZE];
824 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
825 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
827 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
828 REQUIRE(rbtdb->future_version == NULL);
830 if (rbtdb->current_version != NULL) {
833 isc_refcount_decrement(&rbtdb->current_version->references,
836 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
837 isc_refcount_destroy(&rbtdb->current_version->references);
838 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
839 sizeof(rbtdb_version_t));
843 * We assume the number of remaining dead nodes is reasonably small;
844 * the overhead of unlinking all nodes here should be negligible.
846 for (i = 0; i < rbtdb->node_lock_count; i++) {
849 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
850 while (node != NULL) {
851 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
852 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
857 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
859 if (rbtdb->tree != NULL) {
860 isc_time_now(&start);
861 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
862 if (result == ISC_R_QUOTA) {
863 INSIST(rbtdb->task != NULL);
864 if (rbtdb->quantum != 0)
865 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
868 event = isc_event_allocate(rbtdb->common.mctx,
870 DNS_EVENT_FREESTORAGE,
873 sizeof(isc_event_t));
876 isc_task_send(rbtdb->task, &event);
879 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
882 if (rbtdb->nsec3 != NULL) {
883 isc_time_now(&start);
884 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
885 if (result == ISC_R_QUOTA) {
886 INSIST(rbtdb->task != NULL);
887 if (rbtdb->quantum != 0)
888 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
891 event = isc_event_allocate(rbtdb->common.mctx,
893 DNS_EVENT_FREESTORAGE,
896 sizeof(isc_event_t));
899 isc_task_send(rbtdb->task, &event);
902 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
906 isc_event_free(&event);
908 if (dns_name_dynamic(&rbtdb->common.origin))
909 dns_name_format(&rbtdb->common.origin, buf,
912 strcpy(buf, "<UNKNOWN>");
913 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
914 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
915 "done free_rbtdb(%s)", buf);
917 if (dns_name_dynamic(&rbtdb->common.origin))
918 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
919 for (i = 0; i < rbtdb->node_lock_count; i++) {
920 isc_refcount_destroy(&rbtdb->node_locks[i].references);
921 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
925 * Clean up LRU / re-signing order lists.
927 if (rbtdb->rdatasets != NULL) {
928 for (i = 0; i < rbtdb->node_lock_count; i++)
929 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
930 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
931 rbtdb->node_lock_count *
932 sizeof(rdatasetheaderlist_t));
935 * Clean up dead node buckets.
937 if (rbtdb->deadnodes != NULL) {
938 for (i = 0; i < rbtdb->node_lock_count; i++)
939 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
940 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
941 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
944 * Clean up heap objects.
946 if (rbtdb->heaps != NULL) {
947 for (i = 0; i < rbtdb->node_lock_count; i++)
948 isc_heap_destroy(&rbtdb->heaps[i]);
949 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
950 rbtdb->node_lock_count *
951 sizeof(isc_heap_t *));
954 if (rbtdb->rrsetstats != NULL)
955 dns_stats_detach(&rbtdb->rrsetstats);
957 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
958 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
959 isc_rwlock_destroy(&rbtdb->tree_lock);
960 isc_refcount_destroy(&rbtdb->references);
961 if (rbtdb->task != NULL)
962 isc_task_detach(&rbtdb->task);
964 RBTDB_DESTROYLOCK(&rbtdb->lock);
965 rbtdb->common.magic = 0;
966 rbtdb->common.impmagic = 0;
967 ondest = rbtdb->common.ondest;
968 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
969 isc_ondestroy_notify(&ondest, rbtdb);
973 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
974 isc_boolean_t want_free = ISC_FALSE;
976 unsigned int inactive = 0;
978 /* XXX check for open versions here */
980 if (rbtdb->soanode != NULL)
981 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
982 if (rbtdb->nsnode != NULL)
983 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
986 * Even though there are no external direct references, there still
987 * may be nodes in use.
989 for (i = 0; i < rbtdb->node_lock_count; i++) {
990 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
991 rbtdb->node_locks[i].exiting = ISC_TRUE;
992 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
993 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1000 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1001 rbtdb->active -= inactive;
1002 if (rbtdb->active == 0)
1003 want_free = ISC_TRUE;
1004 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1006 char buf[DNS_NAME_FORMATSIZE];
1007 if (dns_name_dynamic(&rbtdb->common.origin))
1008 dns_name_format(&rbtdb->common.origin, buf,
1011 strcpy(buf, "<UNKNOWN>");
1012 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1013 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1014 "calling free_rbtdb(%s)", buf);
1015 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1021 detach(dns_db_t **dbp) {
1022 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1025 REQUIRE(VALID_RBTDB(rbtdb));
1027 isc_refcount_decrement(&rbtdb->references, &refs);
1030 maybe_free_rbtdb(rbtdb);
1036 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1037 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1038 rbtdb_version_t *version;
1041 REQUIRE(VALID_RBTDB(rbtdb));
1043 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1044 version = rbtdb->current_version;
1045 isc_refcount_increment(&version->references, &refs);
1046 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1048 *versionp = (dns_dbversion_t *)version;
1051 static inline rbtdb_version_t *
1052 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1053 unsigned int references, isc_boolean_t writer)
1055 isc_result_t result;
1056 rbtdb_version_t *version;
1058 version = isc_mem_get(mctx, sizeof(*version));
1059 if (version == NULL)
1061 version->serial = serial;
1062 result = isc_refcount_init(&version->references, references);
1063 if (result != ISC_R_SUCCESS) {
1064 isc_mem_put(mctx, version, sizeof(*version));
1067 version->writer = writer;
1068 version->commit_ok = ISC_FALSE;
1069 ISC_LIST_INIT(version->changed_list);
1070 ISC_LIST_INIT(version->resigned_list);
1071 ISC_LINK_INIT(version, link);
1077 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1078 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1079 rbtdb_version_t *version;
1081 REQUIRE(VALID_RBTDB(rbtdb));
1082 REQUIRE(versionp != NULL && *versionp == NULL);
1083 REQUIRE(rbtdb->future_version == NULL);
1085 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1086 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1087 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1089 if (version != NULL) {
1090 version->commit_ok = ISC_TRUE;
1091 version->secure = rbtdb->current_version->secure;
1092 version->havensec3 = rbtdb->current_version->havensec3;
1093 if (version->havensec3) {
1094 version->flags = rbtdb->current_version->flags;
1095 version->iterations =
1096 rbtdb->current_version->iterations;
1097 version->hash = rbtdb->current_version->hash;
1098 version->salt_length =
1099 rbtdb->current_version->salt_length;
1100 memcpy(version->salt, rbtdb->current_version->salt,
1101 version->salt_length);
1104 version->iterations = 0;
1106 version->salt_length = 0;
1107 memset(version->salt, 0, sizeof(version->salt));
1109 rbtdb->next_serial++;
1110 rbtdb->future_version = version;
1112 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1114 if (version == NULL)
1115 return (ISC_R_NOMEMORY);
1117 *versionp = version;
1119 return (ISC_R_SUCCESS);
1123 attachversion(dns_db_t *db, dns_dbversion_t *source,
1124 dns_dbversion_t **targetp)
1126 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1127 rbtdb_version_t *rbtversion = source;
1130 REQUIRE(VALID_RBTDB(rbtdb));
1132 isc_refcount_increment(&rbtversion->references, &refs);
1135 *targetp = rbtversion;
1138 static rbtdb_changed_t *
1139 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1140 dns_rbtnode_t *node)
1142 rbtdb_changed_t *changed;
1146 * Caller must be holding the node lock if its reference must be
1147 * protected by the lock.
1150 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1152 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1154 REQUIRE(version->writer);
1156 if (changed != NULL) {
1157 dns_rbtnode_refincrement(node, &refs);
1159 changed->node = node;
1160 changed->dirty = ISC_FALSE;
1161 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1163 version->commit_ok = ISC_FALSE;
1165 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1171 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1176 unsigned char *raw; /* RDATASLAB */
1179 * The caller must be holding the corresponding node lock.
1185 raw = (unsigned char *)header + sizeof(*header);
1186 count = raw[0] * 256 + raw[1];
1189 * Sanity check: since an additional cache entry has a reference to
1190 * the original DB node (in the callback arg), there should be no
1191 * acache entries when the node can be freed.
1193 for (i = 0; i < count; i++)
1194 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1196 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1200 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1202 if (dns_name_dynamic(&(*noqname)->name))
1203 dns_name_free(&(*noqname)->name, mctx);
1204 if ((*noqname)->neg != NULL)
1205 isc_mem_put(mctx, (*noqname)->neg,
1206 dns_rdataslab_size((*noqname)->neg, 0));
1207 if ((*noqname)->negsig != NULL)
1208 isc_mem_put(mctx, (*noqname)->negsig,
1209 dns_rdataslab_size((*noqname)->negsig, 0));
1210 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1215 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1217 ISC_LINK_INIT(h, link);
1221 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1222 fprintf(stderr, "initialized header: %p\n", h);
1228 static inline rdatasetheader_t *
1229 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1231 rdatasetheader_t *h;
1233 h = isc_mem_get(mctx, sizeof(*h));
1238 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1239 fprintf(stderr, "allocated header: %p\n", h);
1241 init_rdataset(rbtdb, h);
1246 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1251 if (EXISTS(rdataset) &&
1252 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1253 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1256 idx = rdataset->node->locknum;
1257 if (ISC_LINK_LINKED(rdataset, link)) {
1258 INSIST(IS_CACHE(rbtdb));
1259 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1261 if (rdataset->heap_index != 0)
1262 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1263 rdataset->heap_index = 0;
1265 if (rdataset->noqname != NULL)
1266 free_noqname(mctx, &rdataset->noqname);
1267 if (rdataset->closest != NULL)
1268 free_noqname(mctx, &rdataset->closest);
1270 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1271 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1273 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1274 size = sizeof(*rdataset);
1276 size = dns_rdataslab_size((unsigned char *)rdataset,
1278 isc_mem_put(mctx, rdataset, size);
1282 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1283 rdatasetheader_t *header, *dcurrent;
1284 isc_boolean_t make_dirty = ISC_FALSE;
1287 * Caller must hold the node lock.
1291 * We set the IGNORE attribute on rdatasets with serial number
1292 * 'serial'. When the reference count goes to zero, these rdatasets
1293 * will be cleaned up; until that time, they will be ignored.
1295 for (header = node->data; header != NULL; header = header->next) {
1296 if (header->serial == serial) {
1297 header->attributes |= RDATASET_ATTR_IGNORE;
1298 make_dirty = ISC_TRUE;
1300 for (dcurrent = header->down;
1302 dcurrent = dcurrent->down) {
1303 if (dcurrent->serial == serial) {
1304 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1305 make_dirty = ISC_TRUE;
1314 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1316 rdatasetheader_t *d, *down_next;
1318 for (d = top->down; d != NULL; d = down_next) {
1319 down_next = d->down;
1320 free_rdataset(rbtdb, mctx, d);
1326 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1327 rdatasetheader_t *current, *top_prev, *top_next;
1328 isc_mem_t *mctx = rbtdb->common.mctx;
1331 * Caller must be holding the node lock.
1335 for (current = node->data; current != NULL; current = top_next) {
1336 top_next = current->next;
1337 clean_stale_headers(rbtdb, mctx, current);
1339 * If current is nonexistent or stale, we can clean it up.
1341 if ((current->attributes &
1342 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1343 if (top_prev != NULL)
1344 top_prev->next = current->next;
1346 node->data = current->next;
1347 free_rdataset(rbtdb, mctx, current);
1355 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1356 rbtdb_serial_t least_serial)
1358 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1359 rdatasetheader_t *top_prev, *top_next;
1360 isc_mem_t *mctx = rbtdb->common.mctx;
1361 isc_boolean_t still_dirty = ISC_FALSE;
1364 * Caller must be holding the node lock.
1366 REQUIRE(least_serial != 0);
1369 for (current = node->data; current != NULL; current = top_next) {
1370 top_next = current->next;
1373 * First, we clean up any instances of multiple rdatasets
1374 * with the same serial number, or that have the IGNORE
1378 for (dcurrent = current->down;
1380 dcurrent = down_next) {
1381 down_next = dcurrent->down;
1382 INSIST(dcurrent->serial <= dparent->serial);
1383 if (dcurrent->serial == dparent->serial ||
1385 if (down_next != NULL)
1386 down_next->next = dparent;
1387 dparent->down = down_next;
1388 free_rdataset(rbtdb, mctx, dcurrent);
1394 * We've now eliminated all IGNORE datasets with the possible
1395 * exception of current, which we now check.
1397 if (IGNORE(current)) {
1398 down_next = current->down;
1399 if (down_next == NULL) {
1400 if (top_prev != NULL)
1401 top_prev->next = current->next;
1403 node->data = current->next;
1404 free_rdataset(rbtdb, mctx, current);
1406 * current no longer exists, so we can
1407 * just continue with the loop.
1412 * Pull up current->down, making it the new
1415 if (top_prev != NULL)
1416 top_prev->next = down_next;
1418 node->data = down_next;
1419 down_next->next = top_next;
1420 free_rdataset(rbtdb, mctx, current);
1421 current = down_next;
1426 * We now try to find the first down node less than the
1430 for (dcurrent = current->down;
1432 dcurrent = down_next) {
1433 down_next = dcurrent->down;
1434 if (dcurrent->serial < least_serial)
1440 * If there is a such an rdataset, delete it and any older
1443 if (dcurrent != NULL) {
1445 down_next = dcurrent->down;
1446 INSIST(dcurrent->serial <= least_serial);
1447 free_rdataset(rbtdb, mctx, dcurrent);
1448 dcurrent = down_next;
1449 } while (dcurrent != NULL);
1450 dparent->down = NULL;
1454 * Note. The serial number of 'current' might be less than
1455 * least_serial too, but we cannot delete it because it is
1456 * the most recent version, unless it is a NONEXISTENT
1459 if (current->down != NULL) {
1460 still_dirty = ISC_TRUE;
1464 * If this is a NONEXISTENT rdataset, we can delete it.
1466 if (NONEXISTENT(current)) {
1467 if (top_prev != NULL)
1468 top_prev->next = current->next;
1470 node->data = current->next;
1471 free_rdataset(rbtdb, mctx, current);
1481 * Clean up dead nodes. These are nodes which have no references, and
1482 * have no data. They are dead but we could not or chose not to delete
1483 * them when we deleted all the data at that node because we did not want
1484 * to wait for the tree write lock.
1486 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1489 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1490 dns_rbtnode_t *node;
1491 isc_result_t result;
1492 int count = 10; /* XXXJT: should be adjustable */
1494 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1495 while (node != NULL && count > 0) {
1496 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1499 * Since we're holding a tree write lock, it should be
1500 * impossible for this node to be referenced by others.
1502 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1503 node->data == NULL);
1505 INSIST(!ISC_LINK_LINKED(node, deadlink));
1507 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1510 result = dns_rbt_deletenode(rbtdb->tree, node,
1512 if (result != ISC_R_SUCCESS)
1513 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1514 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1515 "cleanup_dead_nodes: "
1516 "dns_rbt_deletenode: %s",
1517 isc_result_totext(result));
1518 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1524 * Caller must be holding the node lock if its reference must be protected
1528 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1529 unsigned int lockrefs, noderefs;
1530 isc_refcount_t *lockref;
1532 dns_rbtnode_refincrement0(node, &noderefs);
1533 if (noderefs == 1) { /* this is the first reference to the node */
1534 lockref = &rbtdb->node_locks[node->locknum].references;
1535 isc_refcount_increment0(lockref, &lockrefs);
1536 INSIST(lockrefs != 0);
1538 INSIST(noderefs != 0);
1542 * This function is assumed to be called when a node is newly referenced
1543 * and can be in the deadnode list. In that case the node must be retrieved
1544 * from the list because it is going to be used. In addition, if the caller
1545 * happens to hold a write lock on the tree, it's a good chance to purge dead
1547 * Note: while a new reference is gained in multiple places, there are only very
1548 * few cases where the node can be in the deadnode list (only empty nodes can
1549 * have been added to the list).
1552 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1553 isc_rwlocktype_t treelocktype)
1555 isc_boolean_t need_relock = ISC_FALSE;
1557 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1558 new_reference(rbtdb, node);
1560 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1561 isc_rwlocktype_read);
1562 if (ISC_LINK_LINKED(node, deadlink))
1563 need_relock = ISC_TRUE;
1564 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1565 treelocktype == isc_rwlocktype_write)
1566 need_relock = ISC_TRUE;
1567 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1568 isc_rwlocktype_read);
1570 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1571 isc_rwlocktype_write);
1572 if (ISC_LINK_LINKED(node, deadlink))
1573 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1575 if (treelocktype == isc_rwlocktype_write)
1576 cleanup_dead_nodes(rbtdb, node->locknum);
1577 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1578 isc_rwlocktype_write);
1581 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1585 * Caller must be holding the node lock; either the "strong", read or write
1586 * lock. Note that the lock must be held even when node references are
1587 * atomically modified; in that case the decrement operation itself does not
1588 * have to be protected, but we must avoid a race condition where multiple
1589 * threads are decreasing the reference to zero simultaneously and at least
1590 * one of them is going to free the node.
1591 * This function returns ISC_TRUE if and only if the node reference decreases
1594 static isc_boolean_t
1595 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1596 rbtdb_serial_t least_serial,
1597 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1598 isc_boolean_t pruning)
1600 isc_result_t result;
1601 isc_boolean_t write_locked;
1602 rbtdb_nodelock_t *nodelock;
1603 unsigned int refs, nrefs;
1604 int bucket = node->locknum;
1605 isc_boolean_t no_reference;
1607 nodelock = &rbtdb->node_locks[bucket];
1609 /* Handle easy and typical case first. */
1610 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1611 dns_rbtnode_refdecrement(node, &nrefs);
1612 INSIST((int)nrefs >= 0);
1614 isc_refcount_decrement(&nodelock->references, &refs);
1615 INSIST((int)refs >= 0);
1617 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1620 /* Upgrade the lock? */
1621 if (nlock == isc_rwlocktype_read) {
1622 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1623 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1625 dns_rbtnode_refdecrement(node, &nrefs);
1626 INSIST((int)nrefs >= 0);
1628 /* Restore the lock? */
1629 if (nlock == isc_rwlocktype_read)
1630 NODE_WEAKDOWNGRADE(&nodelock->lock);
1634 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1635 if (IS_CACHE(rbtdb))
1636 clean_cache_node(rbtdb, node);
1638 if (least_serial == 0) {
1640 * Caller doesn't know the least serial.
1643 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1644 least_serial = rbtdb->least_serial;
1645 RBTDB_UNLOCK(&rbtdb->lock,
1646 isc_rwlocktype_read);
1648 clean_zone_node(rbtdb, node, least_serial);
1652 isc_refcount_decrement(&nodelock->references, &refs);
1653 INSIST((int)refs >= 0);
1656 * XXXDCL should this only be done for cache zones?
1658 if (node->data != NULL || node->down != NULL) {
1659 /* Restore the lock? */
1660 if (nlock == isc_rwlocktype_read)
1661 NODE_WEAKDOWNGRADE(&nodelock->lock);
1666 * Attempt to switch to a write lock on the tree. If this fails,
1667 * we will add this node to a linked list of nodes in this locking
1668 * bucket which we will free later.
1670 if (tlock != isc_rwlocktype_write) {
1672 * Locking hierarchy notwithstanding, we don't need to free
1673 * the node lock before acquiring the tree write lock because
1674 * we only do a trylock.
1676 if (tlock == isc_rwlocktype_read)
1677 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1679 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1680 isc_rwlocktype_write);
1681 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1682 result == ISC_R_LOCKBUSY);
1684 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1686 write_locked = ISC_TRUE;
1688 no_reference = ISC_TRUE;
1689 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1691 * We can now delete the node if the reference counter is
1692 * zero. This should be typically the case, but a different
1693 * thread may still gain a (new) reference just before the
1694 * current thread locks the tree (e.g., in findnode()).
1698 * If this node is the only one in the level it's in, deleting
1699 * this node may recursively make its parent the only node in
1700 * the parent level; if so, and if no one is currently using
1701 * the parent node, this is almost the only opportunity to
1702 * clean it up. But the recursive cleanup is not that trivial
1703 * since the child and parent may be in different lock buckets,
1704 * which would cause a lock order reversal problem. To avoid
1705 * the trouble, we'll dispatch a separate event for batch
1706 * cleaning. We need to check whether we're deleting the node
1707 * as a result of pruning to avoid infinite dispatching.
1708 * Note: pruning happens only when a task has been set for the
1709 * rbtdb. If the user of the rbtdb chooses not to set a task,
1710 * it's their responsibility to purge stale leaves (e.g. by
1711 * periodic walk-through).
1713 if (!pruning && node->parent != NULL &&
1714 node->parent->down == node && node->left == NULL &&
1715 node->right == NULL && rbtdb->task != NULL) {
1719 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1722 sizeof(isc_event_t));
1724 new_reference(rbtdb, node);
1726 attach((dns_db_t *)rbtdb, &db);
1728 isc_task_send(rbtdb->task, &ev);
1729 no_reference = ISC_FALSE;
1732 * XXX: this is a weird situation. We could
1733 * ignore this error case, but then the stale
1734 * node will unlikely be purged except via a
1735 * rare condition such as manual cleanup. So
1736 * we queue it in the deadnodes list, hoping
1737 * the memory shortage is temporary and the node
1738 * will be deleted later.
1740 isc_log_write(dns_lctx,
1741 DNS_LOGCATEGORY_DATABASE,
1742 DNS_LOGMODULE_CACHE,
1744 "decrement_reference: failed to "
1745 "allocate pruning event");
1746 INSIST(!ISC_LINK_LINKED(node, deadlink));
1747 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1751 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1752 char printname[DNS_NAME_FORMATSIZE];
1754 isc_log_write(dns_lctx,
1755 DNS_LOGCATEGORY_DATABASE,
1756 DNS_LOGMODULE_CACHE,
1758 "decrement_reference: "
1759 "delete from rbt: %p %s",
1761 dns_rbt_formatnodename(node,
1763 sizeof(printname)));
1766 INSIST(!ISC_LINK_LINKED(node, deadlink));
1768 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1771 result = dns_rbt_deletenode(rbtdb->tree, node,
1773 if (result != ISC_R_SUCCESS) {
1774 isc_log_write(dns_lctx,
1775 DNS_LOGCATEGORY_DATABASE,
1776 DNS_LOGMODULE_CACHE,
1778 "decrement_reference: "
1779 "dns_rbt_deletenode: %s",
1780 isc_result_totext(result));
1783 } else if (dns_rbtnode_refcurrent(node) == 0) {
1784 INSIST(!ISC_LINK_LINKED(node, deadlink));
1785 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1787 no_reference = ISC_FALSE;
1789 /* Restore the lock? */
1790 if (nlock == isc_rwlocktype_read)
1791 NODE_WEAKDOWNGRADE(&nodelock->lock);
1794 * Relock a read lock, or unlock the write lock if no lock was held.
1796 if (tlock == isc_rwlocktype_none)
1798 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1800 if (tlock == isc_rwlocktype_read)
1802 isc_rwlock_downgrade(&rbtdb->tree_lock);
1804 return (no_reference);
1808 * Prune the tree by recursively cleaning-up single leaves. In the worst
1809 * case, the number of iteration is the number of tree levels, which is at
1810 * most the maximum number of domain name labels, i.e, 127. In practice, this
1811 * should be much smaller (only a few times), and even the worst case would be
1812 * acceptable for a single event.
1815 prune_tree(isc_task_t *task, isc_event_t *event) {
1816 dns_rbtdb_t *rbtdb = event->ev_sender;
1817 dns_rbtnode_t *node = event->ev_arg;
1818 dns_rbtnode_t *parent;
1819 unsigned int locknum;
1823 isc_event_free(&event);
1825 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1826 locknum = node->locknum;
1827 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1829 parent = node->parent;
1830 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1831 isc_rwlocktype_write, ISC_TRUE);
1833 if (parent != NULL && parent->down == NULL) {
1835 * node was the only down child of the parent and has
1836 * just been removed. We'll then need to examine the
1837 * parent. Keep the lock if possible; otherwise,
1838 * release the old lock and acquire one for the parent.
1840 if (parent->locknum != locknum) {
1841 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1842 isc_rwlocktype_write);
1843 locknum = parent->locknum;
1844 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1845 isc_rwlocktype_write);
1849 * We need to gain a reference to the node before
1850 * decrementing it in the next iteration. In addition,
1851 * if the node is in the dead-nodes list, extract it
1852 * from the list beforehand as we do in
1853 * reactivate_node().
1855 new_reference(rbtdb, parent);
1856 if (ISC_LINK_LINKED(parent, deadlink)) {
1857 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1864 } while (node != NULL);
1865 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1866 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1868 detach((dns_db_t **)&rbtdb);
1872 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1873 rbtdb_changedlist_t *cleanup_list)
1876 * Caller must be holding the database lock.
1879 rbtdb->least_serial = version->serial;
1880 *cleanup_list = version->changed_list;
1881 ISC_LIST_INIT(version->changed_list);
1885 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1886 rbtdb_changed_t *changed, *next_changed;
1889 * If the changed record is dirty, then
1890 * an update created multiple versions of
1891 * a given rdataset. We keep this list
1892 * until we're the least open version, at
1893 * which point it's safe to get rid of any
1896 * If the changed record isn't dirty, then
1897 * we don't need it anymore since we're
1898 * committing and not rolling back.
1900 * The caller must be holding the database lock.
1902 for (changed = HEAD(version->changed_list);
1904 changed = next_changed) {
1905 next_changed = NEXT(changed, link);
1906 if (!changed->dirty) {
1907 UNLINK(version->changed_list,
1909 APPEND(*cleanup_list,
1916 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1917 dns_rdataset_t keyset;
1918 dns_rdataset_t nsecset, signsecset;
1919 dns_rdata_t rdata = DNS_RDATA_INIT;
1920 isc_boolean_t haszonekey = ISC_FALSE;
1921 isc_boolean_t hasnsec = ISC_FALSE;
1922 isc_boolean_t hasoptbit = ISC_FALSE;
1923 isc_boolean_t nsec3createflag = ISC_FALSE;
1924 isc_result_t result;
1926 dns_rdataset_init(&keyset);
1927 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1928 0, 0, &keyset, NULL);
1929 if (result == ISC_R_SUCCESS) {
1930 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1931 result = dns_rdataset_first(&keyset);
1932 while (result == ISC_R_SUCCESS) {
1933 dns_rdataset_current(&keyset, &keyrdata);
1934 if (dns_zonekey_iszonekey(&keyrdata)) {
1935 haszonekey = ISC_TRUE;
1938 result = dns_rdataset_next(&keyset);
1940 dns_rdataset_disassociate(&keyset);
1943 version->secure = dns_db_insecure;
1944 version->havensec3 = ISC_FALSE;
1948 dns_rdataset_init(&nsecset);
1949 dns_rdataset_init(&signsecset);
1950 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1951 0, 0, &nsecset, &signsecset);
1952 if (result == ISC_R_SUCCESS) {
1953 if (dns_rdataset_isassociated(&signsecset)) {
1955 result = dns_rdataset_first(&nsecset);
1956 if (result == ISC_R_SUCCESS) {
1957 dns_rdataset_current(&nsecset, &rdata);
1958 hasoptbit = dns_nsec_typepresent(&rdata,
1961 dns_rdataset_disassociate(&signsecset);
1963 dns_rdataset_disassociate(&nsecset);
1966 setnsec3parameters(db, version, &nsec3createflag);
1969 * Do we have a valid NSEC/NSEC3 chain?
1971 if (version->havensec3 || (hasnsec && !hasoptbit))
1972 version->secure = dns_db_secure;
1974 * Do we have a NSEC/NSEC3 chain under creation?
1976 else if (hasoptbit || nsec3createflag)
1977 version->secure = dns_db_partial;
1979 version->secure = dns_db_insecure;
1983 * Walk the origin node looking for NSEC3PARAM records.
1984 * Cache the nsec3 parameters.
1987 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1988 isc_boolean_t *nsec3createflag)
1990 dns_rbtnode_t *node;
1991 dns_rdata_nsec3param_t nsec3param;
1992 dns_rdata_t rdata = DNS_RDATA_INIT;
1993 isc_region_t region;
1994 isc_result_t result;
1995 rdatasetheader_t *header, *header_next;
1996 unsigned char *raw; /* RDATASLAB */
1997 unsigned int count, length;
1998 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2000 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2001 version->havensec3 = ISC_FALSE;
2002 node = rbtdb->origin_node;
2003 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2004 isc_rwlocktype_read);
2005 for (header = node->data;
2007 header = header_next) {
2008 header_next = header->next;
2010 if (header->serial <= version->serial &&
2012 if (NONEXISTENT(header))
2016 header = header->down;
2017 } while (header != NULL);
2019 if (header != NULL &&
2020 header->type == dns_rdatatype_nsec3param) {
2022 * Find A NSEC3PARAM with a supported algorithm.
2024 raw = (unsigned char *)header + sizeof(*header);
2025 count = raw[0] * 256 + raw[1]; /* count */
2026 #if DNS_RDATASET_FIXED
2027 raw += count * 4 + 2;
2031 while (count-- > 0U) {
2032 length = raw[0] * 256 + raw[1];
2033 #if DNS_RDATASET_FIXED
2039 region.length = length;
2041 dns_rdata_fromregion(&rdata,
2042 rbtdb->common.rdclass,
2043 dns_rdatatype_nsec3param,
2045 result = dns_rdata_tostruct(&rdata,
2048 INSIST(result == ISC_R_SUCCESS);
2049 dns_rdata_reset(&rdata);
2051 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2052 !dns_nsec3_supportedhash(nsec3param.hash))
2055 #ifdef RFC5155_STRICT
2056 if (nsec3param.flags != 0)
2059 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2061 *nsec3createflag = ISC_TRUE;
2062 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2067 memcpy(version->salt, nsec3param.salt,
2068 nsec3param.salt_length);
2069 version->hash = nsec3param.hash;
2070 version->salt_length = nsec3param.salt_length;
2071 version->iterations = nsec3param.iterations;
2072 version->flags = nsec3param.flags;
2073 version->havensec3 = ISC_TRUE;
2075 * Look for a better algorithm than the
2076 * unknown test algorithm.
2078 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2084 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2085 isc_rwlocktype_read);
2086 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2090 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2091 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2092 rbtdb_version_t *version, *cleanup_version, *least_greater;
2093 isc_boolean_t rollback = ISC_FALSE;
2094 rbtdb_changedlist_t cleanup_list;
2095 rdatasetheaderlist_t resigned_list;
2096 rbtdb_changed_t *changed, *next_changed;
2097 rbtdb_serial_t serial, least_serial;
2098 dns_rbtnode_t *rbtnode;
2100 rdatasetheader_t *header;
2101 isc_boolean_t writer;
2103 REQUIRE(VALID_RBTDB(rbtdb));
2104 version = (rbtdb_version_t *)*versionp;
2106 cleanup_version = NULL;
2107 ISC_LIST_INIT(cleanup_list);
2108 ISC_LIST_INIT(resigned_list);
2110 isc_refcount_decrement(&version->references, &refs);
2111 if (refs > 0) { /* typical and easy case first */
2113 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2114 INSIST(!version->writer);
2115 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2120 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2121 serial = version->serial;
2122 writer = version->writer;
2123 if (version->writer) {
2126 rbtdb_version_t *cur_version;
2128 INSIST(version->commit_ok);
2129 INSIST(version == rbtdb->future_version);
2131 * The current version is going to be replaced.
2132 * Release the (likely last) reference to it from the
2133 * DB itself and unlink it from the open list.
2135 cur_version = rbtdb->current_version;
2136 isc_refcount_decrement(&cur_version->references,
2139 if (cur_version->serial == rbtdb->least_serial)
2140 INSIST(EMPTY(cur_version->changed_list));
2141 UNLINK(rbtdb->open_versions,
2144 if (EMPTY(rbtdb->open_versions)) {
2146 * We're going to become the least open
2149 make_least_version(rbtdb, version,
2153 * Some other open version is the
2154 * least version. We can't cleanup
2155 * records that were changed in this
2156 * version because the older versions
2157 * may still be in use by an open
2160 * We can, however, discard the
2161 * changed records for things that
2162 * we've added that didn't exist in
2165 cleanup_nondirty(version, &cleanup_list);
2168 * If the (soon to be former) current version
2169 * isn't being used by anyone, we can clean
2173 cleanup_version = cur_version;
2174 APPENDLIST(version->changed_list,
2175 cleanup_version->changed_list,
2179 * Become the current version.
2181 version->writer = ISC_FALSE;
2182 rbtdb->current_version = version;
2183 rbtdb->current_serial = version->serial;
2184 rbtdb->future_version = NULL;
2187 * Keep the current version in the open list, and
2188 * gain a reference for the DB itself (see the DB
2189 * creation function below). This must be the only
2190 * case where we need to increment the counter from
2191 * zero and need to use isc_refcount_increment0().
2193 isc_refcount_increment0(&version->references,
2195 INSIST(cur_ref == 1);
2196 PREPEND(rbtdb->open_versions,
2197 rbtdb->current_version, link);
2198 resigned_list = version->resigned_list;
2199 ISC_LIST_INIT(version->resigned_list);
2202 * We're rolling back this transaction.
2204 cleanup_list = version->changed_list;
2205 ISC_LIST_INIT(version->changed_list);
2206 resigned_list = version->resigned_list;
2207 ISC_LIST_INIT(version->resigned_list);
2208 rollback = ISC_TRUE;
2209 cleanup_version = version;
2210 rbtdb->future_version = NULL;
2213 if (version != rbtdb->current_version) {
2215 * There are no external or internal references
2216 * to this version and it can be cleaned up.
2218 cleanup_version = version;
2221 * Find the version with the least serial
2222 * number greater than ours.
2224 least_greater = PREV(version, link);
2225 if (least_greater == NULL)
2226 least_greater = rbtdb->current_version;
2228 INSIST(version->serial < least_greater->serial);
2230 * Is this the least open version?
2232 if (version->serial == rbtdb->least_serial) {
2234 * Yes. Install the new least open
2237 make_least_version(rbtdb,
2242 * Add any unexecuted cleanups to
2243 * those of the least greater version.
2245 APPENDLIST(least_greater->changed_list,
2246 version->changed_list,
2249 } else if (version->serial == rbtdb->least_serial)
2250 INSIST(EMPTY(version->changed_list));
2251 UNLINK(rbtdb->open_versions, version, link);
2253 least_serial = rbtdb->least_serial;
2254 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2257 * Update the zone's secure status.
2259 if (writer && commit && !IS_CACHE(rbtdb))
2260 iszonesecure(db, version, rbtdb->origin_node);
2262 if (cleanup_version != NULL) {
2263 INSIST(EMPTY(cleanup_version->changed_list));
2264 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2265 sizeof(*cleanup_version));
2269 * Commit/rollback re-signed headers.
2271 for (header = HEAD(resigned_list);
2273 header = HEAD(resigned_list)) {
2276 ISC_LIST_UNLINK(resigned_list, header, link);
2278 lock = &rbtdb->node_locks[header->node->locknum].lock;
2279 NODE_LOCK(lock, isc_rwlocktype_write);
2281 resign_insert(rbtdb, header->node->locknum, header);
2282 decrement_reference(rbtdb, header->node, least_serial,
2283 isc_rwlocktype_write, isc_rwlocktype_none,
2285 NODE_UNLOCK(lock, isc_rwlocktype_write);
2288 if (!EMPTY(cleanup_list)) {
2290 * We acquire a tree write lock here in order to make sure
2291 * that stale nodes will be removed in decrement_reference().
2292 * If we didn't have the lock, those nodes could miss the
2293 * chance to be removed until the server stops. The write lock
2294 * is expensive, but this event should be rare enough to justify
2297 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2298 for (changed = HEAD(cleanup_list);
2300 changed = next_changed) {
2303 next_changed = NEXT(changed, link);
2304 rbtnode = changed->node;
2305 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2307 NODE_LOCK(lock, isc_rwlocktype_write);
2309 * This is a good opportunity to purge any dead nodes,
2312 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2315 rollback_node(rbtnode, serial);
2316 decrement_reference(rbtdb, rbtnode, least_serial,
2317 isc_rwlocktype_write,
2318 isc_rwlocktype_write, ISC_FALSE);
2320 NODE_UNLOCK(lock, isc_rwlocktype_write);
2322 isc_mem_put(rbtdb->common.mctx, changed,
2325 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2333 * Add the necessary magic for the wildcard name 'name'
2334 * to be found in 'rbtdb'.
2336 * In order for wildcard matching to work correctly in
2337 * zone_find(), we must ensure that a node for the wildcarding
2338 * level exists in the database, and has its 'find_callback'
2339 * and 'wild' bits set.
2341 * E.g. if the wildcard name is "*.sub.example." then we
2342 * must ensure that "sub.example." exists and is marked as
2346 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2347 isc_result_t result;
2348 dns_name_t foundname;
2349 dns_offsets_t offsets;
2351 dns_rbtnode_t *node = NULL;
2353 dns_name_init(&foundname, offsets);
2354 n = dns_name_countlabels(name);
2357 dns_name_getlabelsequence(name, 1, n, &foundname);
2358 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2359 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2362 node->find_callback = 1;
2364 return (ISC_R_SUCCESS);
2368 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2369 isc_result_t result;
2370 dns_name_t foundname;
2371 dns_offsets_t offsets;
2372 unsigned int n, l, i;
2374 dns_name_init(&foundname, offsets);
2375 n = dns_name_countlabels(name);
2376 l = dns_name_countlabels(&rbtdb->common.origin);
2379 dns_rbtnode_t *node = NULL; /* dummy */
2380 dns_name_getlabelsequence(name, n - i, i, &foundname);
2381 if (dns_name_iswildcard(&foundname)) {
2382 result = add_wildcard_magic(rbtdb, &foundname);
2383 if (result != ISC_R_SUCCESS)
2385 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2387 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2393 return (ISC_R_SUCCESS);
2397 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2398 dns_dbnode_t **nodep)
2400 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2401 dns_rbtnode_t *node = NULL;
2402 dns_name_t nodename;
2403 isc_result_t result;
2404 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2406 REQUIRE(VALID_RBTDB(rbtdb));
2408 dns_name_init(&nodename, NULL);
2409 RWLOCK(&rbtdb->tree_lock, locktype);
2410 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2411 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2412 if (result != ISC_R_SUCCESS) {
2413 RWUNLOCK(&rbtdb->tree_lock, locktype);
2415 if (result == DNS_R_PARTIALMATCH)
2416 result = ISC_R_NOTFOUND;
2420 * It would be nice to try to upgrade the lock instead of
2421 * unlocking then relocking.
2423 locktype = isc_rwlocktype_write;
2424 RWLOCK(&rbtdb->tree_lock, locktype);
2426 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2427 if (result == ISC_R_SUCCESS) {
2428 dns_rbt_namefromnode(node, &nodename);
2429 #ifdef DNS_RBT_USEHASH
2430 node->locknum = node->hashval % rbtdb->node_lock_count;
2432 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2433 rbtdb->node_lock_count;
2436 add_empty_wildcards(rbtdb, name);
2438 if (dns_name_iswildcard(name)) {
2439 result = add_wildcard_magic(rbtdb, name);
2440 if (result != ISC_R_SUCCESS) {
2441 RWUNLOCK(&rbtdb->tree_lock, locktype);
2445 } else if (result != ISC_R_EXISTS) {
2446 RWUNLOCK(&rbtdb->tree_lock, locktype);
2450 reactivate_node(rbtdb, node, locktype);
2451 RWUNLOCK(&rbtdb->tree_lock, locktype);
2453 *nodep = (dns_dbnode_t *)node;
2455 return (ISC_R_SUCCESS);
2459 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2460 dns_dbnode_t **nodep)
2462 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2463 dns_rbtnode_t *node = NULL;
2464 dns_name_t nodename;
2465 isc_result_t result;
2466 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2468 REQUIRE(VALID_RBTDB(rbtdb));
2470 dns_name_init(&nodename, NULL);
2471 RWLOCK(&rbtdb->tree_lock, locktype);
2472 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2473 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2474 if (result != ISC_R_SUCCESS) {
2475 RWUNLOCK(&rbtdb->tree_lock, locktype);
2477 if (result == DNS_R_PARTIALMATCH)
2478 result = ISC_R_NOTFOUND;
2482 * It would be nice to try to upgrade the lock instead of
2483 * unlocking then relocking.
2485 locktype = isc_rwlocktype_write;
2486 RWLOCK(&rbtdb->tree_lock, locktype);
2488 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2489 if (result == ISC_R_SUCCESS) {
2490 dns_rbt_namefromnode(node, &nodename);
2491 #ifdef DNS_RBT_USEHASH
2492 node->locknum = node->hashval % rbtdb->node_lock_count;
2494 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2495 rbtdb->node_lock_count;
2498 } else if (result != ISC_R_EXISTS) {
2499 RWUNLOCK(&rbtdb->tree_lock, locktype);
2503 INSIST(node->nsec3);
2504 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2505 new_reference(rbtdb, node);
2506 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2507 RWUNLOCK(&rbtdb->tree_lock, locktype);
2509 *nodep = (dns_dbnode_t *)node;
2511 return (ISC_R_SUCCESS);
2515 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2516 rbtdb_search_t *search = arg;
2517 rdatasetheader_t *header, *header_next;
2518 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2519 rdatasetheader_t *found;
2520 isc_result_t result;
2521 dns_rbtnode_t *onode;
2524 * We only want to remember the topmost zone cut, since it's the one
2525 * that counts, so we'll just continue if we've already found a
2528 if (search->zonecut != NULL)
2529 return (DNS_R_CONTINUE);
2532 result = DNS_R_CONTINUE;
2533 onode = search->rbtdb->origin_node;
2535 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2536 isc_rwlocktype_read);
2539 * Look for an NS or DNAME rdataset active in our version.
2542 dname_header = NULL;
2543 sigdname_header = NULL;
2544 for (header = node->data; header != NULL; header = header_next) {
2545 header_next = header->next;
2546 if (header->type == dns_rdatatype_ns ||
2547 header->type == dns_rdatatype_dname ||
2548 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2550 if (header->serial <= search->serial &&
2553 * Is this a "this rdataset doesn't
2556 if (NONEXISTENT(header))
2560 header = header->down;
2561 } while (header != NULL);
2562 if (header != NULL) {
2563 if (header->type == dns_rdatatype_dname)
2564 dname_header = header;
2565 else if (header->type ==
2566 RBTDB_RDATATYPE_SIGDNAME)
2567 sigdname_header = header;
2568 else if (node != onode ||
2569 IS_STUB(search->rbtdb)) {
2571 * We've found an NS rdataset that
2572 * isn't at the origin node. We check
2573 * that they're not at the origin node,
2574 * because otherwise we'd erroneously
2575 * treat the zone top as if it were
2585 * Did we find anything?
2587 if (dname_header != NULL) {
2589 * Note that DNAME has precedence over NS if both exist.
2591 found = dname_header;
2592 search->zonecut_sigrdataset = sigdname_header;
2593 } else if (ns_header != NULL) {
2595 search->zonecut_sigrdataset = NULL;
2598 if (found != NULL) {
2600 * We increment the reference count on node to ensure that
2601 * search->zonecut_rdataset will still be valid later.
2603 new_reference(search->rbtdb, node);
2604 search->zonecut = node;
2605 search->zonecut_rdataset = found;
2606 search->need_cleanup = ISC_TRUE;
2608 * Since we've found a zonecut, anything beneath it is
2609 * glue and is not subject to wildcard matching, so we
2610 * may clear search->wild.
2612 search->wild = ISC_FALSE;
2613 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2615 * If the caller does not want to find glue, then
2616 * this is the best answer and the search should
2619 result = DNS_R_PARTIALMATCH;
2624 * The search will continue beneath the zone cut.
2625 * This may or may not be the best match. In case it
2626 * is, we need to remember the node name.
2628 zcname = dns_fixedname_name(&search->zonecut_name);
2629 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2631 search->copy_name = ISC_TRUE;
2635 * There is no zonecut at this node which is active in this
2638 * If this is a "wild" node and the caller hasn't disabled
2639 * wildcard matching, remember that we've seen a wild node
2640 * in case we need to go searching for wildcard matches
2643 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2644 search->wild = ISC_TRUE;
2647 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2648 isc_rwlocktype_read);
2654 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2655 rdatasetheader_t *header, isc_stdtime_t now,
2656 dns_rdataset_t *rdataset)
2658 unsigned char *raw; /* RDATASLAB */
2661 * Caller must be holding the node reader lock.
2662 * XXXJT: technically, we need a writer lock, since we'll increment
2663 * the header count below. However, since the actual counter value
2664 * doesn't matter, we prioritize performance here. (We may want to
2665 * use atomic increment when available).
2668 if (rdataset == NULL)
2671 new_reference(rbtdb, node);
2673 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2675 rdataset->methods = &rdataset_methods;
2676 rdataset->rdclass = rbtdb->common.rdclass;
2677 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2678 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2679 rdataset->ttl = header->rdh_ttl - now;
2680 rdataset->trust = header->trust;
2681 if (NXDOMAIN(header))
2682 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2684 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2685 rdataset->private1 = rbtdb;
2686 rdataset->private2 = node;
2687 raw = (unsigned char *)header + sizeof(*header);
2688 rdataset->private3 = raw;
2689 rdataset->count = header->count++;
2690 if (rdataset->count == ISC_UINT32_MAX)
2691 rdataset->count = 0;
2694 * Reset iterator state.
2696 rdataset->privateuint4 = 0;
2697 rdataset->private5 = NULL;
2700 * Add noqname proof.
2702 rdataset->private6 = header->noqname;
2703 if (rdataset->private6 != NULL)
2704 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2705 rdataset->private7 = header->closest;
2706 if (rdataset->private7 != NULL)
2707 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2710 * Copy out re-signing information.
2712 if (RESIGN(header)) {
2713 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2714 rdataset->resign = header->resign;
2716 rdataset->resign = 0;
2719 static inline isc_result_t
2720 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2721 dns_name_t *foundname, dns_rdataset_t *rdataset,
2722 dns_rdataset_t *sigrdataset)
2724 isc_result_t result;
2726 rbtdb_rdatatype_t type;
2727 dns_rbtnode_t *node;
2730 * The caller MUST NOT be holding any node locks.
2733 node = search->zonecut;
2734 type = search->zonecut_rdataset->type;
2737 * If we have to set foundname, we do it before anything else.
2738 * If we were to set foundname after we had set nodep or bound the
2739 * rdataset, then we'd have to undo that work if dns_name_copy()
2740 * failed. By setting foundname first, there's nothing to undo if
2743 if (foundname != NULL && search->copy_name) {
2744 zcname = dns_fixedname_name(&search->zonecut_name);
2745 result = dns_name_copy(zcname, foundname, NULL);
2746 if (result != ISC_R_SUCCESS)
2749 if (nodep != NULL) {
2751 * Note that we don't have to increment the node's reference
2752 * count here because we're going to use the reference we
2753 * already have in the search block.
2756 search->need_cleanup = ISC_FALSE;
2758 if (rdataset != NULL) {
2759 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2760 isc_rwlocktype_read);
2761 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2762 search->now, rdataset);
2763 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2764 bind_rdataset(search->rbtdb, node,
2765 search->zonecut_sigrdataset,
2766 search->now, sigrdataset);
2767 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2768 isc_rwlocktype_read);
2771 if (type == dns_rdatatype_dname)
2772 return (DNS_R_DNAME);
2773 return (DNS_R_DELEGATION);
2776 static inline isc_boolean_t
2777 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2778 dns_rbtnode_t *node)
2780 unsigned char *raw; /* RDATASLAB */
2781 unsigned int count, size;
2783 isc_boolean_t valid = ISC_FALSE;
2784 dns_offsets_t offsets;
2785 isc_region_t region;
2786 rdatasetheader_t *header;
2789 * No additional locking is required.
2793 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2794 * if it occurs at a zone cut, but is not valid below it.
2796 if (type == dns_rdatatype_ns) {
2797 if (node != search->zonecut) {
2800 } else if (type != dns_rdatatype_a &&
2801 type != dns_rdatatype_aaaa &&
2802 type != dns_rdatatype_a6) {
2806 header = search->zonecut_rdataset;
2807 raw = (unsigned char *)header + sizeof(*header);
2808 count = raw[0] * 256 + raw[1];
2809 #if DNS_RDATASET_FIXED
2810 raw += 2 + (4 * count);
2817 size = raw[0] * 256 + raw[1];
2818 #if DNS_RDATASET_FIXED
2824 region.length = size;
2827 * XXX Until we have rdata structures, we have no choice but
2828 * to directly access the rdata format.
2830 dns_name_init(&ns_name, offsets);
2831 dns_name_fromregion(&ns_name, ®ion);
2832 if (dns_name_compare(&ns_name, name) == 0) {
2841 static inline isc_boolean_t
2842 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2845 dns_fixedname_t fnext;
2846 dns_fixedname_t forigin;
2851 dns_rbtnode_t *node;
2852 isc_result_t result;
2853 isc_boolean_t answer = ISC_FALSE;
2854 rdatasetheader_t *header;
2856 rbtdb = search->rbtdb;
2858 dns_name_init(&prefix, NULL);
2859 dns_fixedname_init(&fnext);
2860 next = dns_fixedname_name(&fnext);
2861 dns_fixedname_init(&forigin);
2862 origin = dns_fixedname_name(&forigin);
2864 result = dns_rbtnodechain_next(chain, NULL, NULL);
2865 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2867 result = dns_rbtnodechain_current(chain, &prefix,
2869 if (result != ISC_R_SUCCESS)
2871 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2872 isc_rwlocktype_read);
2873 for (header = node->data;
2875 header = header->next) {
2876 if (header->serial <= search->serial &&
2877 !IGNORE(header) && EXISTS(header))
2880 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2881 isc_rwlocktype_read);
2884 result = dns_rbtnodechain_next(chain, NULL, NULL);
2886 if (result == ISC_R_SUCCESS)
2887 result = dns_name_concatenate(&prefix, origin, next, NULL);
2888 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2893 static inline isc_boolean_t
2894 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2895 dns_fixedname_t fnext;
2896 dns_fixedname_t forigin;
2897 dns_fixedname_t fprev;
2905 dns_rbtnode_t *node;
2906 dns_rbtnodechain_t chain;
2907 isc_boolean_t check_next = ISC_TRUE;
2908 isc_boolean_t check_prev = ISC_TRUE;
2909 isc_boolean_t answer = ISC_FALSE;
2910 isc_result_t result;
2911 rdatasetheader_t *header;
2914 rbtdb = search->rbtdb;
2916 dns_name_init(&name, NULL);
2917 dns_name_init(&tname, NULL);
2918 dns_name_init(&rname, NULL);
2919 dns_fixedname_init(&fnext);
2920 next = dns_fixedname_name(&fnext);
2921 dns_fixedname_init(&fprev);
2922 prev = dns_fixedname_name(&fprev);
2923 dns_fixedname_init(&forigin);
2924 origin = dns_fixedname_name(&forigin);
2927 * Find if qname is at or below a empty node.
2928 * Use our own copy of the chain.
2931 chain = search->chain;
2934 result = dns_rbtnodechain_current(&chain, &name,
2936 if (result != ISC_R_SUCCESS)
2938 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2939 isc_rwlocktype_read);
2940 for (header = node->data;
2942 header = header->next) {
2943 if (header->serial <= search->serial &&
2944 !IGNORE(header) && EXISTS(header))
2947 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2948 isc_rwlocktype_read);
2951 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2952 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2953 if (result == ISC_R_SUCCESS)
2954 result = dns_name_concatenate(&name, origin, prev, NULL);
2955 if (result != ISC_R_SUCCESS)
2956 check_prev = ISC_FALSE;
2958 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2959 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2961 result = dns_rbtnodechain_current(&chain, &name,
2963 if (result != ISC_R_SUCCESS)
2965 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2966 isc_rwlocktype_read);
2967 for (header = node->data;
2969 header = header->next) {
2970 if (header->serial <= search->serial &&
2971 !IGNORE(header) && EXISTS(header))
2974 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2975 isc_rwlocktype_read);
2978 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2980 if (result == ISC_R_SUCCESS)
2981 result = dns_name_concatenate(&name, origin, next, NULL);
2982 if (result != ISC_R_SUCCESS)
2983 check_next = ISC_FALSE;
2985 dns_name_clone(qname, &rname);
2988 * Remove the wildcard label to find the terminal name.
2990 n = dns_name_countlabels(wname);
2991 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2994 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2995 (check_next && dns_name_issubdomain(next, &rname))) {
3000 * Remove the left hand label.
3002 n = dns_name_countlabels(&rname);
3003 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3004 } while (!dns_name_equal(&rname, &tname));
3008 static inline isc_result_t
3009 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3013 dns_rbtnode_t *node, *level_node, *wnode;
3014 rdatasetheader_t *header;
3015 isc_result_t result = ISC_R_NOTFOUND;
3018 dns_fixedname_t fwname;
3020 isc_boolean_t done, wild, active;
3021 dns_rbtnodechain_t wchain;
3024 * Caller must be holding the tree lock and MUST NOT be holding
3029 * Examine each ancestor level. If the level's wild bit
3030 * is set, then construct the corresponding wildcard name and
3031 * search for it. If the wildcard node exists, and is active in
3032 * this version, we're done. If not, then we next check to see
3033 * if the ancestor is active in this version. If so, then there
3034 * can be no possible wildcard match and again we're done. If not,
3035 * continue the search.
3038 rbtdb = search->rbtdb;
3039 i = search->chain.level_matches;
3043 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3044 isc_rwlocktype_read);
3047 * First we try to figure out if this node is active in
3048 * the search's version. We do this now, even though we
3049 * may not need the information, because it simplifies the
3050 * locking and code flow.
3052 for (header = node->data;
3054 header = header->next) {
3055 if (header->serial <= search->serial &&
3056 !IGNORE(header) && EXISTS(header))
3069 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3070 isc_rwlocktype_read);
3074 * Construct the wildcard name for this level.
3076 dns_name_init(&name, NULL);
3077 dns_rbt_namefromnode(node, &name);
3078 dns_fixedname_init(&fwname);
3079 wname = dns_fixedname_name(&fwname);
3080 result = dns_name_concatenate(dns_wildcardname, &name,
3083 while (result == ISC_R_SUCCESS && j != 0) {
3085 level_node = search->chain.levels[j];
3086 dns_name_init(&name, NULL);
3087 dns_rbt_namefromnode(level_node, &name);
3088 result = dns_name_concatenate(wname,
3093 if (result != ISC_R_SUCCESS)
3097 dns_rbtnodechain_init(&wchain, NULL);
3098 result = dns_rbt_findnode(rbtdb->tree, wname,
3099 NULL, &wnode, &wchain,
3100 DNS_RBTFIND_EMPTYDATA,
3102 if (result == ISC_R_SUCCESS) {
3106 * We have found the wildcard node. If it
3107 * is active in the search's version, we're
3110 lock = &rbtdb->node_locks[wnode->locknum].lock;
3111 NODE_LOCK(lock, isc_rwlocktype_read);
3112 for (header = wnode->data;
3114 header = header->next) {
3115 if (header->serial <= search->serial &&
3116 !IGNORE(header) && EXISTS(header))
3119 NODE_UNLOCK(lock, isc_rwlocktype_read);
3120 if (header != NULL ||
3121 activeempty(search, &wchain, wname)) {
3122 if (activeemtpynode(search, qname,
3124 return (ISC_R_NOTFOUND);
3127 * The wildcard node is active!
3129 * Note: result is still ISC_R_SUCCESS
3130 * so we don't have to set it.
3135 } else if (result != ISC_R_NOTFOUND &&
3136 result != DNS_R_PARTIALMATCH) {
3138 * An error has occurred. Bail out.
3146 * The level node is active. Any wildcarding
3147 * present at higher levels has no
3148 * effect and we're done.
3150 result = ISC_R_NOTFOUND;
3156 node = search->chain.levels[i];
3164 static isc_boolean_t
3165 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3167 dns_rdata_t rdata = DNS_RDATA_INIT;
3168 dns_rdata_nsec3_t nsec3;
3169 unsigned char *raw; /* RDATASLAB */
3170 unsigned int rdlen, count;
3171 isc_region_t region;
3172 isc_result_t result;
3174 REQUIRE(header->type == dns_rdatatype_nsec3);
3176 raw = (unsigned char *)header + sizeof(*header);
3177 count = raw[0] * 256 + raw[1]; /* count */
3178 #if DNS_RDATASET_FIXED
3179 raw += count * 4 + 2;
3183 while (count-- > 0) {
3184 rdlen = raw[0] * 256 + raw[1];
3185 #if DNS_RDATASET_FIXED
3191 region.length = rdlen;
3192 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3193 dns_rdatatype_nsec3, ®ion);
3195 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3196 INSIST(result == ISC_R_SUCCESS);
3197 if (nsec3.hash == search->rbtversion->hash &&
3198 nsec3.iterations == search->rbtversion->iterations &&
3199 nsec3.salt_length == search->rbtversion->salt_length &&
3200 memcmp(nsec3.salt, search->rbtversion->salt,
3201 nsec3.salt_length) == 0)
3203 dns_rdata_reset(&rdata);
3208 static inline isc_result_t
3209 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3210 dns_name_t *foundname, dns_rdataset_t *rdataset,
3211 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3212 dns_db_secure_t secure)
3214 dns_rbtnode_t *node;
3215 rdatasetheader_t *header, *header_next, *found, *foundsig;
3216 isc_boolean_t empty_node;
3217 isc_result_t result;
3218 dns_fixedname_t fname, forigin;
3219 dns_name_t *name, *origin;
3220 dns_rdatatype_t type;
3221 rbtdb_rdatatype_t sigtype;
3222 isc_boolean_t wraps;
3223 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3225 if (tree == search->rbtdb->nsec3) {
3226 type = dns_rdatatype_nsec3;
3227 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3230 type = dns_rdatatype_nsec;
3231 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3238 dns_fixedname_init(&fname);
3239 name = dns_fixedname_name(&fname);
3240 dns_fixedname_init(&forigin);
3241 origin = dns_fixedname_name(&forigin);
3242 result = dns_rbtnodechain_current(&search->chain, name,
3244 if (result != ISC_R_SUCCESS)
3246 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3247 isc_rwlocktype_read);
3250 empty_node = ISC_TRUE;
3251 for (header = node->data;
3253 header = header_next) {
3254 header_next = header->next;
3256 * Look for an active, extant NSEC or RRSIG NSEC.
3259 if (header->serial <= search->serial &&
3262 * Is this a "this rdataset doesn't
3265 if (NONEXISTENT(header))
3269 header = header->down;
3270 } while (header != NULL);
3271 if (header != NULL) {
3273 * We now know that there is at least one
3274 * active rdataset at this node.
3276 empty_node = ISC_FALSE;
3277 if (header->type == type) {
3279 if (foundsig != NULL)
3281 } else if (header->type == sigtype) {
3289 if (found != NULL && search->rbtversion->havensec3 &&
3290 found->type == dns_rdatatype_nsec3 &&
3291 !matchparams(found, search)) {
3292 empty_node = ISC_TRUE;
3295 result = dns_rbtnodechain_prev(&search->chain,
3297 } else if (found != NULL &&
3298 (foundsig != NULL || !need_sig))
3301 * We've found the right NSEC/NSEC3 record.
3303 * Note: for this to really be the right
3304 * NSEC record, it's essential that the NSEC
3305 * records of any nodes obscured by a zone
3306 * cut have been removed; we assume this is
3309 result = dns_name_concatenate(name, origin,
3311 if (result == ISC_R_SUCCESS) {
3312 if (nodep != NULL) {
3313 new_reference(search->rbtdb,
3317 bind_rdataset(search->rbtdb, node,
3320 if (foundsig != NULL)
3321 bind_rdataset(search->rbtdb,
3327 } else if (found == NULL && foundsig == NULL) {
3329 * This node is active, but has no NSEC or
3330 * RRSIG NSEC. That means it's glue or
3331 * other obscured zone data that isn't
3332 * relevant for our search. Treat the
3333 * node as if it were empty and keep looking.
3335 empty_node = ISC_TRUE;
3336 result = dns_rbtnodechain_prev(&search->chain,
3340 * We found an active node, but either the
3341 * NSEC or the RRSIG NSEC is missing. This
3344 result = DNS_R_BADDB;
3348 * This node isn't active. We've got to keep
3351 result = dns_rbtnodechain_prev(&search->chain, NULL,
3354 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3355 isc_rwlocktype_read);
3356 } while (empty_node && result == ISC_R_SUCCESS);
3358 if (result == ISC_R_NOMORE && wraps) {
3359 result = dns_rbtnodechain_last(&search->chain, tree,
3361 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3368 * If the result is ISC_R_NOMORE, then we got to the beginning of
3369 * the database and didn't find a NSEC record. This shouldn't
3372 if (result == ISC_R_NOMORE)
3373 result = DNS_R_BADDB;
3379 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3380 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3381 dns_dbnode_t **nodep, dns_name_t *foundname,
3382 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3384 dns_rbtnode_t *node = NULL;
3385 isc_result_t result;
3386 rbtdb_search_t search;
3387 isc_boolean_t cname_ok = ISC_TRUE;
3388 isc_boolean_t close_version = ISC_FALSE;
3389 isc_boolean_t maybe_zonecut = ISC_FALSE;
3390 isc_boolean_t at_zonecut = ISC_FALSE;
3392 isc_boolean_t empty_node;
3393 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3394 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3395 rbtdb_rdatatype_t sigtype;
3396 isc_boolean_t active;
3397 dns_rbtnodechain_t chain;
3401 search.rbtdb = (dns_rbtdb_t *)db;
3403 REQUIRE(VALID_RBTDB(search.rbtdb));
3406 * We don't care about 'now'.
3411 * If the caller didn't supply a version, attach to the current
3414 if (version == NULL) {
3415 currentversion(db, &version);
3416 close_version = ISC_TRUE;
3419 search.rbtversion = version;
3420 search.serial = search.rbtversion->serial;
3421 search.options = options;
3422 search.copy_name = ISC_FALSE;
3423 search.need_cleanup = ISC_FALSE;
3424 search.wild = ISC_FALSE;
3425 search.zonecut = NULL;
3426 dns_fixedname_init(&search.zonecut_name);
3427 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3431 * 'wild' will be true iff. we've matched a wildcard.
3435 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3438 * Search down from the root of the tree. If, while going down, we
3439 * encounter a callback node, zone_zonecut_callback() will search the
3440 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3442 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3444 result = dns_rbt_findnode(tree, name, foundname, &node,
3445 &search.chain, DNS_RBTFIND_EMPTYDATA,
3446 zone_zonecut_callback, &search);
3448 if (result == DNS_R_PARTIALMATCH) {
3450 if (search.zonecut != NULL) {
3451 result = setup_delegation(&search, nodep, foundname,
3452 rdataset, sigrdataset);
3458 * At least one of the levels in the search chain
3459 * potentially has a wildcard. For each such level,
3460 * we must see if there's a matching wildcard active
3461 * in the current version.
3463 result = find_wildcard(&search, &node, name);
3464 if (result == ISC_R_SUCCESS) {
3465 result = dns_name_copy(name, foundname, NULL);
3466 if (result != ISC_R_SUCCESS)
3471 else if (result != ISC_R_NOTFOUND)
3475 chain = search.chain;
3476 active = activeempty(&search, &chain, name);
3479 * If we're here, then the name does not exist, is not
3480 * beneath a zonecut, and there's no matching wildcard.
3482 if ((search.rbtversion->secure == dns_db_secure &&
3483 !search.rbtversion->havensec3) ||
3484 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3485 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3487 result = find_closest_nsec(&search, nodep, foundname,
3488 rdataset, sigrdataset, tree,
3489 search.rbtversion->secure);
3490 if (result == ISC_R_SUCCESS)
3491 result = active ? DNS_R_EMPTYNAME :
3494 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3496 } else if (result != ISC_R_SUCCESS)
3501 * We have found a node whose name is the desired name, or we
3502 * have matched a wildcard.
3505 if (search.zonecut != NULL) {
3507 * If we're beneath a zone cut, we don't want to look for
3508 * CNAMEs because they're not legitimate zone glue.
3510 cname_ok = ISC_FALSE;
3513 * The node may be a zone cut itself. If it might be one,
3514 * make sure we check for it later.
3516 * DS records live above the zone cut in ordinary zone so
3517 * we want to ignore any referral.
3519 * Stub zones don't have anything "above" the delgation so
3520 * we always return a referral.
3522 if (node->find_callback &&
3523 ((node != search.rbtdb->origin_node &&
3524 !dns_rdatatype_atparent(type)) ||
3525 IS_STUB(search.rbtdb)))
3526 maybe_zonecut = ISC_TRUE;
3530 * Certain DNSSEC types are not subject to CNAME matching
3531 * (RFC4035, section 2.5 and RFC3007).
3533 * We don't check for RRSIG, because we don't store RRSIG records
3536 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3537 cname_ok = ISC_FALSE;
3540 * We now go looking for rdata...
3543 lock = &search.rbtdb->node_locks[node->locknum].lock;
3544 NODE_LOCK(lock, isc_rwlocktype_read);
3548 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3552 empty_node = ISC_TRUE;
3553 for (header = node->data; header != NULL; header = header_next) {
3554 header_next = header->next;
3556 * Look for an active, extant rdataset.
3559 if (header->serial <= search.serial &&
3562 * Is this a "this rdataset doesn't
3565 if (NONEXISTENT(header))
3569 header = header->down;
3570 } while (header != NULL);
3571 if (header != NULL) {
3573 * We now know that there is at least one active
3574 * rdataset at this node.
3576 empty_node = ISC_FALSE;
3579 * Do special zone cut handling, if requested.
3581 if (maybe_zonecut &&
3582 header->type == dns_rdatatype_ns) {
3584 * We increment the reference count on node to
3585 * ensure that search->zonecut_rdataset will
3586 * still be valid later.
3588 new_reference(search.rbtdb, node);
3589 search.zonecut = node;
3590 search.zonecut_rdataset = header;
3591 search.zonecut_sigrdataset = NULL;
3592 search.need_cleanup = ISC_TRUE;
3593 maybe_zonecut = ISC_FALSE;
3594 at_zonecut = ISC_TRUE;
3596 * It is not clear if KEY should still be
3597 * allowed at the parent side of the zone
3598 * cut or not. It is needed for RFC3007
3599 * validated updates.
3601 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3602 && type != dns_rdatatype_nsec
3603 && type != dns_rdatatype_key) {
3605 * Glue is not OK, but any answer we
3606 * could return would be glue. Return
3612 if (found != NULL && foundsig != NULL)
3618 * If the NSEC3 record doesn't match the chain
3619 * we are using behave as if it isn't here.
3621 if (header->type == dns_rdatatype_nsec3 &&
3622 !matchparams(header, &search)) {
3623 NODE_UNLOCK(lock, isc_rwlocktype_read);
3627 * If we found a type we were looking for,
3630 if (header->type == type ||
3631 type == dns_rdatatype_any ||
3632 (header->type == dns_rdatatype_cname &&
3635 * We've found the answer!
3638 if (header->type == dns_rdatatype_cname &&
3641 * We may be finding a CNAME instead
3642 * of the desired type.
3644 * If we've already got the CNAME RRSIG,
3645 * use it, otherwise change sigtype
3646 * so that we find it.
3648 if (cnamesig != NULL)
3649 foundsig = cnamesig;
3652 RBTDB_RDATATYPE_SIGCNAME;
3655 * If we've got all we need, end the search.
3657 if (!maybe_zonecut && foundsig != NULL)
3659 } else if (header->type == sigtype) {
3661 * We've found the RRSIG rdataset for our
3662 * target type. Remember it.
3666 * If we've got all we need, end the search.
3668 if (!maybe_zonecut && found != NULL)
3670 } else if (header->type == dns_rdatatype_nsec &&
3671 !search.rbtversion->havensec3) {
3673 * Remember a NSEC rdataset even if we're
3674 * not specifically looking for it, because
3675 * we might need it later.
3677 nsecheader = header;
3678 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3679 !search.rbtversion->havensec3) {
3681 * If we need the NSEC rdataset, we'll also
3682 * need its signature.
3685 } else if (cname_ok &&
3686 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3688 * If we get a CNAME match, we'll also need
3698 * We have an exact match for the name, but there are no
3699 * active rdatasets in the desired version. That means that
3700 * this node doesn't exist in the desired version, and that
3701 * we really have a partial match.
3704 NODE_UNLOCK(lock, isc_rwlocktype_read);
3710 * If we didn't find what we were looking for...
3712 if (found == NULL) {
3713 if (search.zonecut != NULL) {
3715 * We were trying to find glue at a node beneath a
3716 * zone cut, but didn't.
3718 * Return the delegation.
3720 NODE_UNLOCK(lock, isc_rwlocktype_read);
3721 result = setup_delegation(&search, nodep, foundname,
3722 rdataset, sigrdataset);
3726 * The desired type doesn't exist.
3728 result = DNS_R_NXRRSET;
3729 if (search.rbtversion->secure == dns_db_secure &&
3730 !search.rbtversion->havensec3 &&
3731 (nsecheader == NULL || nsecsig == NULL)) {
3733 * The zone is secure but there's no NSEC,
3734 * or the NSEC has no signature!
3737 result = DNS_R_BADDB;
3741 NODE_UNLOCK(lock, isc_rwlocktype_read);
3742 result = find_closest_nsec(&search, nodep, foundname,
3743 rdataset, sigrdataset,
3745 search.rbtversion->secure);
3746 if (result == ISC_R_SUCCESS)
3747 result = DNS_R_EMPTYWILD;
3750 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3754 * There's no NSEC record, and we were told
3757 result = DNS_R_BADDB;
3760 if (nodep != NULL) {
3761 new_reference(search.rbtdb, node);
3764 if ((search.rbtversion->secure == dns_db_secure &&
3765 !search.rbtversion->havensec3) ||
3766 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3768 bind_rdataset(search.rbtdb, node, nsecheader,
3770 if (nsecsig != NULL)
3771 bind_rdataset(search.rbtdb, node,
3772 nsecsig, 0, sigrdataset);
3775 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3780 * We found what we were looking for, or we found a CNAME.
3783 if (type != found->type &&
3784 type != dns_rdatatype_any &&
3785 found->type == dns_rdatatype_cname) {
3787 * We weren't doing an ANY query and we found a CNAME instead
3788 * of the type we were looking for, so we need to indicate
3789 * that result to the caller.
3791 result = DNS_R_CNAME;
3792 } else if (search.zonecut != NULL) {
3794 * If we're beneath a zone cut, we must indicate that the
3795 * result is glue, unless we're actually at the zone cut
3796 * and the type is NSEC or KEY.
3798 if (search.zonecut == node) {
3800 * It is not clear if KEY should still be
3801 * allowed at the parent side of the zone
3802 * cut or not. It is needed for RFC3007
3803 * validated updates.
3805 if (type == dns_rdatatype_nsec ||
3806 type == dns_rdatatype_nsec3 ||
3807 type == dns_rdatatype_key)
3808 result = ISC_R_SUCCESS;
3809 else if (type == dns_rdatatype_any)
3810 result = DNS_R_ZONECUT;
3812 result = DNS_R_GLUE;
3814 result = DNS_R_GLUE;
3816 * We might have found data that isn't glue, but was occluded
3817 * by a dynamic update. If the caller cares about this, they
3818 * will have told us to validate glue.
3820 * XXX We should cache the glue validity state!
3822 if (result == DNS_R_GLUE &&
3823 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3824 !valid_glue(&search, foundname, type, node)) {
3825 NODE_UNLOCK(lock, isc_rwlocktype_read);
3826 result = setup_delegation(&search, nodep, foundname,
3827 rdataset, sigrdataset);
3832 * An ordinary successful query!
3834 result = ISC_R_SUCCESS;
3837 if (nodep != NULL) {
3839 new_reference(search.rbtdb, node);
3841 search.need_cleanup = ISC_FALSE;
3845 if (type != dns_rdatatype_any) {
3846 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3847 if (foundsig != NULL)
3848 bind_rdataset(search.rbtdb, node, foundsig, 0,
3853 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3856 NODE_UNLOCK(lock, isc_rwlocktype_read);
3859 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3862 * If we found a zonecut but aren't going to use it, we have to
3865 if (search.need_cleanup) {
3866 node = search.zonecut;
3867 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3869 NODE_LOCK(lock, isc_rwlocktype_read);
3870 decrement_reference(search.rbtdb, node, 0,
3871 isc_rwlocktype_read, isc_rwlocktype_none,
3873 NODE_UNLOCK(lock, isc_rwlocktype_read);
3877 closeversion(db, &version, ISC_FALSE);
3879 dns_rbtnodechain_reset(&search.chain);
3885 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3886 isc_stdtime_t now, dns_dbnode_t **nodep,
3887 dns_name_t *foundname,
3888 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3897 UNUSED(sigrdataset);
3899 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3901 return (ISC_R_NOTIMPLEMENTED);
3905 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3906 rbtdb_search_t *search = arg;
3907 rdatasetheader_t *header, *header_prev, *header_next;
3908 rdatasetheader_t *dname_header, *sigdname_header;
3909 isc_result_t result;
3911 isc_rwlocktype_t locktype;
3915 REQUIRE(search->zonecut == NULL);
3918 * Keep compiler silent.
3922 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3923 locktype = isc_rwlocktype_read;
3924 NODE_LOCK(lock, locktype);
3927 * Look for a DNAME or RRSIG DNAME rdataset.
3929 dname_header = NULL;
3930 sigdname_header = NULL;
3932 for (header = node->data; header != NULL; header = header_next) {
3933 header_next = header->next;
3934 if (header->rdh_ttl <= search->now) {
3936 * This rdataset is stale. If no one else is
3937 * using the node, we can clean it up right
3938 * now, otherwise we mark it as stale, and
3939 * the node as dirty, so it will get cleaned
3942 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3943 (locktype == isc_rwlocktype_write ||
3944 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3946 * We update the node's status only when we
3947 * can get write access; otherwise, we leave
3948 * others to this work. Periodical cleaning
3949 * will eventually take the job as the last
3951 * We won't downgrade the lock, since other
3952 * rdatasets are probably stale, too.
3954 locktype = isc_rwlocktype_write;
3956 if (dns_rbtnode_refcurrent(node) == 0) {
3960 * header->down can be non-NULL if the
3961 * refcount has just decremented to 0
3962 * but decrement_reference() has not
3963 * performed clean_cache_node(), in
3964 * which case we need to purge the
3965 * stale headers first.
3967 mctx = search->rbtdb->common.mctx;
3968 clean_stale_headers(search->rbtdb,
3971 if (header_prev != NULL)
3975 node->data = header->next;
3976 free_rdataset(search->rbtdb, mctx,
3979 header->attributes |=
3980 RDATASET_ATTR_STALE;
3982 header_prev = header;
3985 header_prev = header;
3986 } else if (header->type == dns_rdatatype_dname &&
3988 dname_header = header;
3989 header_prev = header;
3990 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3992 sigdname_header = header;
3993 header_prev = header;
3995 header_prev = header;
3998 if (dname_header != NULL &&
3999 (!DNS_TRUST_PENDING(dname_header->trust) ||
4000 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4002 * We increment the reference count on node to ensure that
4003 * search->zonecut_rdataset will still be valid later.
4005 new_reference(search->rbtdb, node);
4006 INSIST(!ISC_LINK_LINKED(node, deadlink));
4007 search->zonecut = node;
4008 search->zonecut_rdataset = dname_header;
4009 search->zonecut_sigrdataset = sigdname_header;
4010 search->need_cleanup = ISC_TRUE;
4011 result = DNS_R_PARTIALMATCH;
4013 result = DNS_R_CONTINUE;
4015 NODE_UNLOCK(lock, locktype);
4020 static inline isc_result_t
4021 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4022 dns_dbnode_t **nodep, dns_name_t *foundname,
4023 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4026 dns_rbtnode_t *level_node;
4027 rdatasetheader_t *header, *header_prev, *header_next;
4028 rdatasetheader_t *found, *foundsig;
4029 isc_result_t result = ISC_R_NOTFOUND;
4034 isc_rwlocktype_t locktype;
4037 * Caller must be holding the tree lock.
4040 rbtdb = search->rbtdb;
4041 i = search->chain.level_matches;
4044 locktype = isc_rwlocktype_read;
4045 lock = &rbtdb->node_locks[node->locknum].lock;
4046 NODE_LOCK(lock, locktype);
4049 * Look for NS and RRSIG NS rdatasets.
4054 for (header = node->data;
4056 header = header_next) {
4057 header_next = header->next;
4058 if (header->rdh_ttl <= search->now) {
4060 * This rdataset is stale. If no one else is
4061 * using the node, we can clean it up right
4062 * now, otherwise we mark it as stale, and
4063 * the node as dirty, so it will get cleaned
4066 if ((header->rdh_ttl <= search->now -
4068 (locktype == isc_rwlocktype_write ||
4069 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4071 * We update the node's status only
4072 * when we can get write access.
4074 locktype = isc_rwlocktype_write;
4076 if (dns_rbtnode_refcurrent(node)
4080 m = search->rbtdb->common.mctx;
4081 clean_stale_headers(
4084 if (header_prev != NULL)
4090 free_rdataset(rbtdb, m,
4093 header->attributes |=
4094 RDATASET_ATTR_STALE;
4096 header_prev = header;
4099 header_prev = header;
4100 } else if (EXISTS(header)) {
4102 * We've found an extant rdataset. See if
4103 * we're interested in it.
4105 if (header->type == dns_rdatatype_ns) {
4107 if (foundsig != NULL)
4109 } else if (header->type ==
4110 RBTDB_RDATATYPE_SIGNS) {
4115 header_prev = header;
4117 header_prev = header;
4120 if (found != NULL) {
4122 * If we have to set foundname, we do it before
4123 * anything else. If we were to set foundname after
4124 * we had set nodep or bound the rdataset, then we'd
4125 * have to undo that work if dns_name_concatenate()
4126 * failed. By setting foundname first, there's
4127 * nothing to undo if we have trouble.
4129 if (foundname != NULL) {
4130 dns_name_init(&name, NULL);
4131 dns_rbt_namefromnode(node, &name);
4132 result = dns_name_copy(&name, foundname, NULL);
4133 while (result == ISC_R_SUCCESS && i > 0) {
4135 level_node = search->chain.levels[i];
4136 dns_name_init(&name, NULL);
4137 dns_rbt_namefromnode(level_node,
4140 dns_name_concatenate(foundname,
4145 if (result != ISC_R_SUCCESS) {
4150 result = DNS_R_DELEGATION;
4151 if (nodep != NULL) {
4152 new_reference(search->rbtdb, node);
4155 bind_rdataset(search->rbtdb, node, found, search->now,
4157 if (foundsig != NULL)
4158 bind_rdataset(search->rbtdb, node, foundsig,
4159 search->now, sigrdataset);
4160 if (need_headerupdate(found, search->now) ||
4161 (foundsig != NULL &&
4162 need_headerupdate(foundsig, search->now))) {
4163 if (locktype != isc_rwlocktype_write) {
4164 NODE_UNLOCK(lock, locktype);
4165 NODE_LOCK(lock, isc_rwlocktype_write);
4166 locktype = isc_rwlocktype_write;
4168 if (need_headerupdate(found, search->now))
4169 update_header(search->rbtdb, found,
4171 if (foundsig != NULL &&
4172 need_headerupdate(foundsig, search->now)) {
4173 update_header(search->rbtdb, foundsig,
4180 NODE_UNLOCK(lock, locktype);
4182 if (found == NULL && i > 0) {
4184 node = search->chain.levels[i];
4194 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4195 isc_stdtime_t now, dns_name_t *foundname,
4196 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4198 dns_rbtnode_t *node;
4199 rdatasetheader_t *header, *header_next, *header_prev;
4200 rdatasetheader_t *found, *foundsig;
4201 isc_boolean_t empty_node;
4202 isc_result_t result;
4203 dns_fixedname_t fname, forigin;
4204 dns_name_t *name, *origin;
4205 rbtdb_rdatatype_t matchtype, sigmatchtype;
4207 isc_rwlocktype_t locktype;
4209 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4210 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4211 dns_rdatatype_nsec);
4215 dns_fixedname_init(&fname);
4216 name = dns_fixedname_name(&fname);
4217 dns_fixedname_init(&forigin);
4218 origin = dns_fixedname_name(&forigin);
4219 result = dns_rbtnodechain_current(&search->chain, name,
4221 if (result != ISC_R_SUCCESS)
4223 locktype = isc_rwlocktype_read;
4224 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4225 NODE_LOCK(lock, locktype);
4228 empty_node = ISC_TRUE;
4230 for (header = node->data;
4232 header = header_next) {
4233 header_next = header->next;
4234 if (header->rdh_ttl <= now) {
4236 * This rdataset is stale. If no one else is
4237 * using the node, we can clean it up right
4238 * now, otherwise we mark it as stale, and the
4239 * node as dirty, so it will get cleaned up
4242 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4243 (locktype == isc_rwlocktype_write ||
4244 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4246 * We update the node's status only
4247 * when we can get write access.
4249 locktype = isc_rwlocktype_write;
4251 if (dns_rbtnode_refcurrent(node)
4255 m = search->rbtdb->common.mctx;
4256 clean_stale_headers(
4259 if (header_prev != NULL)
4263 node->data = header->next;
4264 free_rdataset(search->rbtdb, m,
4267 header->attributes |=
4268 RDATASET_ATTR_STALE;
4270 header_prev = header;
4273 header_prev = header;
4276 if (NONEXISTENT(header) ||
4277 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4278 header_prev = header;
4281 empty_node = ISC_FALSE;
4282 if (header->type == matchtype)
4284 else if (header->type == sigmatchtype)
4286 header_prev = header;
4288 if (found != NULL) {
4289 result = dns_name_concatenate(name, origin,
4291 if (result != ISC_R_SUCCESS)
4293 bind_rdataset(search->rbtdb, node, found,
4295 if (foundsig != NULL)
4296 bind_rdataset(search->rbtdb, node, foundsig,
4298 new_reference(search->rbtdb, node);
4300 result = DNS_R_COVERINGNSEC;
4301 } else if (!empty_node) {
4302 result = ISC_R_NOTFOUND;
4304 result = dns_rbtnodechain_prev(&search->chain, NULL,
4307 NODE_UNLOCK(lock, locktype);
4308 } while (empty_node && result == ISC_R_SUCCESS);
4313 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4314 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4315 dns_dbnode_t **nodep, dns_name_t *foundname,
4316 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4318 dns_rbtnode_t *node = NULL;
4319 isc_result_t result;
4320 rbtdb_search_t search;
4321 isc_boolean_t cname_ok = ISC_TRUE;
4322 isc_boolean_t empty_node;
4324 isc_rwlocktype_t locktype;
4325 rdatasetheader_t *header, *header_prev, *header_next;
4326 rdatasetheader_t *found, *nsheader;
4327 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4328 rdatasetheader_t *update, *updatesig;
4329 rbtdb_rdatatype_t sigtype, negtype;
4333 search.rbtdb = (dns_rbtdb_t *)db;
4335 REQUIRE(VALID_RBTDB(search.rbtdb));
4336 REQUIRE(version == NULL);
4339 isc_stdtime_get(&now);
4341 search.rbtversion = NULL;
4343 search.options = options;
4344 search.copy_name = ISC_FALSE;
4345 search.need_cleanup = ISC_FALSE;
4346 search.wild = ISC_FALSE;
4347 search.zonecut = NULL;
4348 dns_fixedname_init(&search.zonecut_name);
4349 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4354 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4357 * Search down from the root of the tree. If, while going down, we
4358 * encounter a callback node, cache_zonecut_callback() will search the
4359 * rdatasets at the zone cut for a DNAME rdataset.
4361 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4362 &search.chain, DNS_RBTFIND_EMPTYDATA,
4363 cache_zonecut_callback, &search);
4365 if (result == DNS_R_PARTIALMATCH) {
4366 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4367 result = find_coveringnsec(&search, nodep, now,
4368 foundname, rdataset,
4370 if (result == DNS_R_COVERINGNSEC)
4373 if (search.zonecut != NULL) {
4374 result = setup_delegation(&search, nodep, foundname,
4375 rdataset, sigrdataset);
4379 result = find_deepest_zonecut(&search, node, nodep,
4380 foundname, rdataset,
4384 } else if (result != ISC_R_SUCCESS)
4388 * Certain DNSSEC types are not subject to CNAME matching
4389 * (RFC4035, section 2.5 and RFC3007).
4391 * We don't check for RRSIG, because we don't store RRSIG records
4394 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4395 cname_ok = ISC_FALSE;
4398 * We now go looking for rdata...
4401 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4402 locktype = isc_rwlocktype_read;
4403 NODE_LOCK(lock, locktype);
4407 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4408 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4412 empty_node = ISC_TRUE;
4414 for (header = node->data; header != NULL; header = header_next) {
4415 header_next = header->next;
4416 if (header->rdh_ttl <= now) {
4418 * This rdataset is stale. If no one else is using the
4419 * node, we can clean it up right now, otherwise we
4420 * mark it as stale, and the node as dirty, so it will
4421 * get cleaned up later.
4423 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4424 (locktype == isc_rwlocktype_write ||
4425 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4427 * We update the node's status only when we
4428 * can get write access.
4430 locktype = isc_rwlocktype_write;
4432 if (dns_rbtnode_refcurrent(node) == 0) {
4435 mctx = search.rbtdb->common.mctx;
4436 clean_stale_headers(search.rbtdb, mctx,
4438 if (header_prev != NULL)
4442 node->data = header->next;
4443 free_rdataset(search.rbtdb, mctx,
4446 header->attributes |=
4447 RDATASET_ATTR_STALE;
4449 header_prev = header;
4452 header_prev = header;
4453 } else if (EXISTS(header)) {
4455 * We now know that there is at least one active
4456 * non-stale rdataset at this node.
4458 empty_node = ISC_FALSE;
4461 * If we found a type we were looking for, remember
4464 if (header->type == type ||
4465 (type == dns_rdatatype_any &&
4466 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4467 (cname_ok && header->type ==
4468 dns_rdatatype_cname)) {
4470 * We've found the answer.
4473 if (header->type == dns_rdatatype_cname &&
4477 * If we've already got the CNAME RRSIG,
4478 * use it, otherwise change sigtype
4479 * so that we find it.
4481 if (cnamesig != NULL)
4482 foundsig = cnamesig;
4485 RBTDB_RDATATYPE_SIGCNAME;
4486 foundsig = cnamesig;
4488 } else if (header->type == sigtype) {
4490 * We've found the RRSIG rdataset for our
4491 * target type. Remember it.
4494 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4495 header->type == negtype) {
4497 * We've found a negative cache entry.
4500 } else if (header->type == dns_rdatatype_ns) {
4502 * Remember a NS rdataset even if we're
4503 * not specifically looking for it, because
4504 * we might need it later.
4507 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4509 * If we need the NS rdataset, we'll also
4510 * need its signature.
4513 } else if (cname_ok &&
4514 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4516 * If we get a CNAME match, we'll also need
4521 header_prev = header;
4523 header_prev = header;
4528 * We have an exact match for the name, but there are no
4529 * extant rdatasets. That means that this node doesn't
4530 * meaningfully exist, and that we really have a partial match.
4532 NODE_UNLOCK(lock, locktype);
4537 * If we didn't find what we were looking for...
4539 if (found == NULL ||
4540 (DNS_TRUST_ADDITIONAL(found->trust) &&
4541 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4542 (found->trust == dns_trust_glue &&
4543 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4544 (DNS_TRUST_PENDING(found->trust) &&
4545 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4547 * If there is an NS rdataset at this node, then this is the
4550 if (nsheader != NULL) {
4551 if (nodep != NULL) {
4552 new_reference(search.rbtdb, node);
4553 INSIST(!ISC_LINK_LINKED(node, deadlink));
4556 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4558 if (need_headerupdate(nsheader, search.now))
4560 if (nssig != NULL) {
4561 bind_rdataset(search.rbtdb, node, nssig,
4562 search.now, sigrdataset);
4563 if (need_headerupdate(nssig, search.now))
4566 result = DNS_R_DELEGATION;
4571 * Go find the deepest zone cut.
4573 NODE_UNLOCK(lock, locktype);
4578 * We found what we were looking for, or we found a CNAME.
4581 if (nodep != NULL) {
4582 new_reference(search.rbtdb, node);
4583 INSIST(!ISC_LINK_LINKED(node, deadlink));
4587 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4589 * We found a negative cache entry.
4591 if (NXDOMAIN(found))
4592 result = DNS_R_NCACHENXDOMAIN;
4594 result = DNS_R_NCACHENXRRSET;
4595 } else if (type != found->type &&
4596 type != dns_rdatatype_any &&
4597 found->type == dns_rdatatype_cname) {
4599 * We weren't doing an ANY query and we found a CNAME instead
4600 * of the type we were looking for, so we need to indicate
4601 * that result to the caller.
4603 result = DNS_R_CNAME;
4606 * An ordinary successful query!
4608 result = ISC_R_SUCCESS;
4611 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4612 result == DNS_R_NCACHENXRRSET) {
4613 bind_rdataset(search.rbtdb, node, found, search.now,
4615 if (need_headerupdate(found, search.now))
4617 if (foundsig != NULL) {
4618 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4620 if (need_headerupdate(foundsig, search.now))
4621 updatesig = foundsig;
4626 if ((update != NULL || updatesig != NULL) &&
4627 locktype != isc_rwlocktype_write) {
4628 NODE_UNLOCK(lock, locktype);
4629 NODE_LOCK(lock, isc_rwlocktype_write);
4630 locktype = isc_rwlocktype_write;
4632 if (update != NULL && need_headerupdate(update, search.now))
4633 update_header(search.rbtdb, update, search.now);
4634 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4635 update_header(search.rbtdb, updatesig, search.now);
4637 NODE_UNLOCK(lock, locktype);
4640 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4643 * If we found a zonecut but aren't going to use it, we have to
4646 if (search.need_cleanup) {
4647 node = search.zonecut;
4648 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4650 NODE_LOCK(lock, isc_rwlocktype_read);
4651 decrement_reference(search.rbtdb, node, 0,
4652 isc_rwlocktype_read, isc_rwlocktype_none,
4654 NODE_UNLOCK(lock, isc_rwlocktype_read);
4657 dns_rbtnodechain_reset(&search.chain);
4663 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4664 isc_stdtime_t now, dns_dbnode_t **nodep,
4665 dns_name_t *foundname,
4666 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4668 dns_rbtnode_t *node = NULL;
4670 isc_result_t result;
4671 rbtdb_search_t search;
4672 rdatasetheader_t *header, *header_prev, *header_next;
4673 rdatasetheader_t *found, *foundsig;
4674 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4675 isc_rwlocktype_t locktype;
4677 search.rbtdb = (dns_rbtdb_t *)db;
4679 REQUIRE(VALID_RBTDB(search.rbtdb));
4682 isc_stdtime_get(&now);
4684 search.rbtversion = NULL;
4686 search.options = options;
4687 search.copy_name = ISC_FALSE;
4688 search.need_cleanup = ISC_FALSE;
4689 search.wild = ISC_FALSE;
4690 search.zonecut = NULL;
4691 dns_fixedname_init(&search.zonecut_name);
4692 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4695 if ((options & DNS_DBFIND_NOEXACT) != 0)
4696 rbtoptions |= DNS_RBTFIND_NOEXACT;
4698 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4701 * Search down from the root of the tree.
4703 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4704 &search.chain, rbtoptions, NULL, &search);
4706 if (result == DNS_R_PARTIALMATCH) {
4708 result = find_deepest_zonecut(&search, node, nodep, foundname,
4709 rdataset, sigrdataset);
4711 } else if (result != ISC_R_SUCCESS)
4715 * We now go looking for an NS rdataset at the node.
4718 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4719 locktype = isc_rwlocktype_read;
4720 NODE_LOCK(lock, locktype);
4725 for (header = node->data; header != NULL; header = header_next) {
4726 header_next = header->next;
4727 if (header->rdh_ttl <= now) {
4729 * This rdataset is stale. If no one else is using the
4730 * node, we can clean it up right now, otherwise we
4731 * mark it as stale, and the node as dirty, so it will
4732 * get cleaned up later.
4734 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4735 (locktype == isc_rwlocktype_write ||
4736 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4738 * We update the node's status only when we
4739 * can get write access.
4741 locktype = isc_rwlocktype_write;
4743 if (dns_rbtnode_refcurrent(node) == 0) {
4746 mctx = search.rbtdb->common.mctx;
4747 clean_stale_headers(search.rbtdb, mctx,
4749 if (header_prev != NULL)
4753 node->data = header->next;
4754 free_rdataset(search.rbtdb, mctx,
4757 header->attributes |=
4758 RDATASET_ATTR_STALE;
4760 header_prev = header;
4763 header_prev = header;
4764 } else if (EXISTS(header)) {
4766 * If we found a type we were looking for, remember
4769 if (header->type == dns_rdatatype_ns) {
4771 * Remember a NS rdataset even if we're
4772 * not specifically looking for it, because
4773 * we might need it later.
4776 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4778 * If we need the NS rdataset, we'll also
4779 * need its signature.
4783 header_prev = header;
4785 header_prev = header;
4788 if (found == NULL) {
4790 * No NS records here.
4792 NODE_UNLOCK(lock, locktype);
4796 if (nodep != NULL) {
4797 new_reference(search.rbtdb, node);
4798 INSIST(!ISC_LINK_LINKED(node, deadlink));
4802 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4803 if (foundsig != NULL)
4804 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4807 if (need_headerupdate(found, search.now) ||
4808 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4809 if (locktype != isc_rwlocktype_write) {
4810 NODE_UNLOCK(lock, locktype);
4811 NODE_LOCK(lock, isc_rwlocktype_write);
4812 locktype = isc_rwlocktype_write;
4814 if (need_headerupdate(found, search.now))
4815 update_header(search.rbtdb, found, search.now);
4816 if (foundsig != NULL &&
4817 need_headerupdate(foundsig, search.now)) {
4818 update_header(search.rbtdb, foundsig, search.now);
4822 NODE_UNLOCK(lock, locktype);
4825 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4827 INSIST(!search.need_cleanup);
4829 dns_rbtnodechain_reset(&search.chain);
4831 if (result == DNS_R_DELEGATION)
4832 result = ISC_R_SUCCESS;
4838 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4839 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4840 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4843 REQUIRE(VALID_RBTDB(rbtdb));
4844 REQUIRE(targetp != NULL && *targetp == NULL);
4846 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4847 dns_rbtnode_refincrement(node, &refs);
4849 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4855 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4856 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4857 dns_rbtnode_t *node;
4858 isc_boolean_t want_free = ISC_FALSE;
4859 isc_boolean_t inactive = ISC_FALSE;
4860 rbtdb_nodelock_t *nodelock;
4862 REQUIRE(VALID_RBTDB(rbtdb));
4863 REQUIRE(targetp != NULL && *targetp != NULL);
4865 node = (dns_rbtnode_t *)(*targetp);
4866 nodelock = &rbtdb->node_locks[node->locknum];
4868 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4870 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4871 isc_rwlocktype_none, ISC_FALSE)) {
4872 if (isc_refcount_current(&nodelock->references) == 0 &&
4873 nodelock->exiting) {
4874 inactive = ISC_TRUE;
4878 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4883 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4885 if (rbtdb->active == 0)
4886 want_free = ISC_TRUE;
4887 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4889 char buf[DNS_NAME_FORMATSIZE];
4890 if (dns_name_dynamic(&rbtdb->common.origin))
4891 dns_name_format(&rbtdb->common.origin, buf,
4894 strcpy(buf, "<UNKNOWN>");
4895 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4896 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4897 "calling free_rbtdb(%s)", buf);
4898 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4904 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4905 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4906 dns_rbtnode_t *rbtnode = node;
4907 rdatasetheader_t *header;
4908 isc_boolean_t force_expire = ISC_FALSE;
4910 * These are the category and module used by the cache cleaner.
4912 isc_boolean_t log = ISC_FALSE;
4913 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4914 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4915 int level = ISC_LOG_DEBUG(2);
4916 char printname[DNS_NAME_FORMATSIZE];
4918 REQUIRE(VALID_RBTDB(rbtdb));
4921 * Caller must hold a tree lock.
4925 isc_stdtime_get(&now);
4927 if (rbtdb->overmem) {
4930 isc_random_get(&val);
4932 * XXXDCL Could stand to have a better policy, like LRU.
4934 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4937 * Note that 'log' can be true IFF rbtdb->overmem is also true.
4938 * rbtdb->overmem can currently only be true for cache
4939 * databases -- hence all of the "overmem cache" log strings.
4941 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4943 isc_log_write(dns_lctx, category, module, level,
4944 "overmem cache: %s %s",
4945 force_expire ? "FORCE" : "check",
4946 dns_rbt_formatnodename(rbtnode,
4948 sizeof(printname)));
4952 * We may not need write access, but this code path is not performance
4953 * sensitive, so it should be okay to always lock as a writer.
4955 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4956 isc_rwlocktype_write);
4958 for (header = rbtnode->data; header != NULL; header = header->next)
4959 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4961 * We don't check if refcurrent(rbtnode) == 0 and try
4962 * to free like we do in cache_find(), because
4963 * refcurrent(rbtnode) must be non-zero. This is so
4964 * because 'node' is an argument to the function.
4966 header->attributes |= RDATASET_ATTR_STALE;
4969 isc_log_write(dns_lctx, category, module,
4970 level, "overmem cache: stale %s",
4972 } else if (force_expire) {
4973 if (! RETAIN(header)) {
4974 set_ttl(rbtdb, header, 0);
4975 header->attributes |= RDATASET_ATTR_STALE;
4978 isc_log_write(dns_lctx, category, module,
4979 level, "overmem cache: "
4980 "reprieve by RETAIN() %s",
4983 } else if (rbtdb->overmem && log)
4984 isc_log_write(dns_lctx, category, module, level,
4985 "overmem cache: saved %s", printname);
4987 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4988 isc_rwlocktype_write);
4990 return (ISC_R_SUCCESS);
4994 overmem(dns_db_t *db, isc_boolean_t overmem) {
4995 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4997 if (IS_CACHE(rbtdb))
4998 rbtdb->overmem = overmem;
5002 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5003 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5004 dns_rbtnode_t *rbtnode = node;
5005 isc_boolean_t first;
5007 REQUIRE(VALID_RBTDB(rbtdb));
5009 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5010 isc_rwlocktype_read);
5012 fprintf(out, "node %p, %u references, locknum = %u\n",
5013 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5015 if (rbtnode->data != NULL) {
5016 rdatasetheader_t *current, *top_next;
5018 for (current = rbtnode->data; current != NULL;
5019 current = top_next) {
5020 top_next = current->next;
5022 fprintf(out, "\ttype %u", current->type);
5028 "\tserial = %lu, ttl = %u, "
5029 "trust = %u, attributes = %u, "
5031 (unsigned long)current->serial,
5034 current->attributes,
5036 current = current->down;
5037 } while (current != NULL);
5040 fprintf(out, "(empty)\n");
5042 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5043 isc_rwlocktype_read);
5047 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5049 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5050 rbtdb_dbiterator_t *rbtdbiter;
5052 REQUIRE(VALID_RBTDB(rbtdb));
5054 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5055 if (rbtdbiter == NULL)
5056 return (ISC_R_NOMEMORY);
5058 rbtdbiter->common.methods = &dbiterator_methods;
5059 rbtdbiter->common.db = NULL;
5060 dns_db_attach(db, &rbtdbiter->common.db);
5061 rbtdbiter->common.relative_names =
5062 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5063 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5064 rbtdbiter->common.cleaning = ISC_FALSE;
5065 rbtdbiter->paused = ISC_TRUE;
5066 rbtdbiter->tree_locked = isc_rwlocktype_none;
5067 rbtdbiter->result = ISC_R_SUCCESS;
5068 dns_fixedname_init(&rbtdbiter->name);
5069 dns_fixedname_init(&rbtdbiter->origin);
5070 rbtdbiter->node = NULL;
5071 rbtdbiter->delete = 0;
5072 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5073 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5074 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5075 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5076 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5077 if (rbtdbiter->nsec3only)
5078 rbtdbiter->current = &rbtdbiter->nsec3chain;
5080 rbtdbiter->current = &rbtdbiter->chain;
5082 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5084 return (ISC_R_SUCCESS);
5088 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5089 dns_rdatatype_t type, dns_rdatatype_t covers,
5090 isc_stdtime_t now, dns_rdataset_t *rdataset,
5091 dns_rdataset_t *sigrdataset)
5093 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5094 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5095 rdatasetheader_t *header, *header_next, *found, *foundsig;
5096 rbtdb_serial_t serial;
5097 rbtdb_version_t *rbtversion = version;
5098 isc_boolean_t close_version = ISC_FALSE;
5099 rbtdb_rdatatype_t matchtype, sigmatchtype;
5101 REQUIRE(VALID_RBTDB(rbtdb));
5102 REQUIRE(type != dns_rdatatype_any);
5104 if (rbtversion == NULL) {
5105 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5106 close_version = ISC_TRUE;
5108 serial = rbtversion->serial;
5111 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5112 isc_rwlocktype_read);
5116 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5118 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5122 for (header = rbtnode->data; header != NULL; header = header_next) {
5123 header_next = header->next;
5125 if (header->serial <= serial &&
5128 * Is this a "this rdataset doesn't
5131 if (NONEXISTENT(header))
5135 header = header->down;
5136 } while (header != NULL);
5137 if (header != NULL) {
5139 * We have an active, extant rdataset. If it's a
5140 * type we're looking for, remember it.
5142 if (header->type == matchtype) {
5144 if (foundsig != NULL)
5146 } else if (header->type == sigmatchtype) {
5153 if (found != NULL) {
5154 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5155 if (foundsig != NULL)
5156 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5160 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5161 isc_rwlocktype_read);
5164 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5168 return (ISC_R_NOTFOUND);
5170 return (ISC_R_SUCCESS);
5174 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5175 dns_rdatatype_t type, dns_rdatatype_t covers,
5176 isc_stdtime_t now, dns_rdataset_t *rdataset,
5177 dns_rdataset_t *sigrdataset)
5179 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5180 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5181 rdatasetheader_t *header, *header_next, *found, *foundsig;
5182 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5183 isc_result_t result;
5185 isc_rwlocktype_t locktype;
5187 REQUIRE(VALID_RBTDB(rbtdb));
5188 REQUIRE(type != dns_rdatatype_any);
5192 result = ISC_R_SUCCESS;
5195 isc_stdtime_get(&now);
5197 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5198 locktype = isc_rwlocktype_read;
5199 NODE_LOCK(lock, locktype);
5203 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5204 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5206 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5210 for (header = rbtnode->data; header != NULL; header = header_next) {
5211 header_next = header->next;
5212 if (header->rdh_ttl <= now) {
5213 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5214 (locktype == isc_rwlocktype_write ||
5215 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5217 * We update the node's status only when we
5218 * can get write access.
5220 locktype = isc_rwlocktype_write;
5223 * We don't check if refcurrent(rbtnode) == 0
5224 * and try to free like we do in cache_find(),
5225 * because refcurrent(rbtnode) must be
5226 * non-zero. This is so because 'node' is an
5227 * argument to the function.
5229 header->attributes |= RDATASET_ATTR_STALE;
5232 } else if (EXISTS(header)) {
5233 if (header->type == matchtype)
5235 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5236 header->type == negtype)
5238 else if (header->type == sigmatchtype)
5242 if (found != NULL) {
5243 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5244 if (foundsig != NULL)
5245 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5249 NODE_UNLOCK(lock, locktype);
5252 return (ISC_R_NOTFOUND);
5254 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5256 * We found a negative cache entry.
5258 if (NXDOMAIN(found))
5259 result = DNS_R_NCACHENXDOMAIN;
5261 result = DNS_R_NCACHENXRRSET;
5268 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5269 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5271 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5272 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5273 rbtdb_version_t *rbtversion = version;
5274 rbtdb_rdatasetiter_t *iterator;
5277 REQUIRE(VALID_RBTDB(rbtdb));
5279 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5280 if (iterator == NULL)
5281 return (ISC_R_NOMEMORY);
5283 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5285 if (rbtversion == NULL)
5287 (dns_dbversion_t **) (void *)(&rbtversion));
5291 isc_refcount_increment(&rbtversion->references,
5297 isc_stdtime_get(&now);
5301 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5302 iterator->common.methods = &rdatasetiter_methods;
5303 iterator->common.db = db;
5304 iterator->common.node = node;
5305 iterator->common.version = (dns_dbversion_t *)rbtversion;
5306 iterator->common.now = now;
5308 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5310 dns_rbtnode_refincrement(rbtnode, &refs);
5313 iterator->current = NULL;
5315 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5317 *iteratorp = (dns_rdatasetiter_t *)iterator;
5319 return (ISC_R_SUCCESS);
5322 static isc_boolean_t
5323 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5324 rdatasetheader_t *header, *header_next;
5325 isc_boolean_t cname, other_data;
5326 dns_rdatatype_t rdtype;
5329 * The caller must hold the node lock.
5333 * Look for CNAME and "other data" rdatasets active in our version.
5336 other_data = ISC_FALSE;
5337 for (header = node->data; header != NULL; header = header_next) {
5338 header_next = header->next;
5339 if (header->type == dns_rdatatype_cname) {
5341 * Look for an active extant CNAME.
5344 if (header->serial <= serial &&
5347 * Is this a "this rdataset doesn't
5350 if (NONEXISTENT(header))
5354 header = header->down;
5355 } while (header != NULL);
5360 * Look for active extant "other data".
5362 * "Other data" is any rdataset whose type is not
5363 * KEY, NSEC, SIG or RRSIG.
5365 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5366 if (rdtype != dns_rdatatype_key &&
5367 rdtype != dns_rdatatype_sig &&
5368 rdtype != dns_rdatatype_nsec &&
5369 rdtype != dns_rdatatype_rrsig) {
5371 * Is it active and extant?
5374 if (header->serial <= serial &&
5377 * Is this a "this rdataset
5378 * doesn't exist" record?
5380 if (NONEXISTENT(header))
5384 header = header->down;
5385 } while (header != NULL);
5387 other_data = ISC_TRUE;
5392 if (cname && other_data)
5399 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5400 isc_result_t result;
5402 INSIST(!IS_CACHE(rbtdb));
5403 INSIST(newheader->heap_index == 0);
5404 INSIST(!ISC_LINK_LINKED(newheader, link));
5406 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5411 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5412 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5413 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5415 rbtdb_changed_t *changed = NULL;
5416 rdatasetheader_t *topheader, *topheader_prev, *header;
5417 unsigned char *merged;
5418 isc_result_t result;
5419 isc_boolean_t header_nx;
5420 isc_boolean_t newheader_nx;
5421 isc_boolean_t merge;
5422 dns_rdatatype_t rdtype, covers;
5423 rbtdb_rdatatype_t negtype;
5428 * Add an rdatasetheader_t to a node.
5432 * Caller must be holding the node lock.
5435 if ((options & DNS_DBADD_MERGE) != 0) {
5436 REQUIRE(rbtversion != NULL);
5441 if ((options & DNS_DBADD_FORCE) != 0)
5442 trust = dns_trust_ultimate;
5444 trust = newheader->trust;
5446 if (rbtversion != NULL && !loading) {
5448 * We always add a changed record, even if no changes end up
5449 * being made to this node, because it's harmless and
5450 * simplifies the code.
5452 changed = add_changed(rbtdb, rbtversion, rbtnode);
5453 if (changed == NULL) {
5454 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5455 return (ISC_R_NOMEMORY);
5459 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5460 topheader_prev = NULL;
5463 if (rbtversion == NULL && !newheader_nx) {
5464 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5467 * We're adding a negative cache entry.
5469 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5470 if (covers == dns_rdatatype_any) {
5472 * We're adding an negative cache entry
5473 * which covers all types (NXDOMAIN,
5474 * NODATA(QTYPE=ANY)).
5476 * We make all other data stale so that the
5477 * only rdataset that can be found at this
5478 * node is the negative cache entry.
5480 for (topheader = rbtnode->data;
5482 topheader = topheader->next) {
5483 set_ttl(rbtdb, topheader, 0);
5484 topheader->attributes |=
5485 RDATASET_ATTR_STALE;
5490 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5493 * We're adding something that isn't a
5494 * negative cache entry. Look for an extant
5495 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5498 for (topheader = rbtnode->data;
5500 topheader = topheader->next) {
5501 if (topheader->type ==
5502 RBTDB_RDATATYPE_NCACHEANY)
5505 if (topheader != NULL && EXISTS(topheader) &&
5506 topheader->rdh_ttl > now) {
5510 if (trust < topheader->trust) {
5512 * The NXDOMAIN/NODATA(QTYPE=ANY)
5515 free_rdataset(rbtdb,
5518 if (addedrdataset != NULL)
5519 bind_rdataset(rbtdb, rbtnode,
5522 return (DNS_R_UNCHANGED);
5525 * The new rdataset is better. Expire the
5526 * NXDOMAIN/NODATA(QTYPE=ANY).
5528 set_ttl(rbtdb, topheader, 0);
5529 topheader->attributes |= RDATASET_ATTR_STALE;
5534 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5538 for (topheader = rbtnode->data;
5540 topheader = topheader->next) {
5541 if (topheader->type == newheader->type ||
5542 topheader->type == negtype)
5544 topheader_prev = topheader;
5549 * If header isn't NULL, we've found the right type. There may be
5550 * IGNORE rdatasets between the top of the chain and the first real
5551 * data. We skip over them.
5554 while (header != NULL && IGNORE(header))
5555 header = header->down;
5556 if (header != NULL) {
5557 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5560 * Deleting an already non-existent rdataset has no effect.
5562 if (header_nx && newheader_nx) {
5563 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5564 return (DNS_R_UNCHANGED);
5568 * Trying to add an rdataset with lower trust to a cache DB
5569 * has no effect, provided that the cache data isn't stale.
5571 if (rbtversion == NULL && trust < header->trust &&
5572 (header->rdh_ttl > now || header_nx)) {
5573 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5574 if (addedrdataset != NULL)
5575 bind_rdataset(rbtdb, rbtnode, header, now,
5577 return (DNS_R_UNCHANGED);
5581 * Don't merge if a nonexistent rdataset is involved.
5583 if (merge && (header_nx || newheader_nx))
5587 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5588 * that is the union of 'newheader' and 'header'.
5591 unsigned int flags = 0;
5592 INSIST(rbtversion->serial >= header->serial);
5594 result = ISC_R_SUCCESS;
5596 if ((options & DNS_DBADD_EXACT) != 0)
5597 flags |= DNS_RDATASLAB_EXACT;
5598 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5599 newheader->rdh_ttl != header->rdh_ttl)
5600 result = DNS_R_NOTEXACT;
5601 else if (newheader->rdh_ttl != header->rdh_ttl)
5602 flags |= DNS_RDATASLAB_FORCE;
5603 if (result == ISC_R_SUCCESS)
5604 result = dns_rdataslab_merge(
5605 (unsigned char *)header,
5606 (unsigned char *)newheader,
5607 (unsigned int)(sizeof(*newheader)),
5609 rbtdb->common.rdclass,
5610 (dns_rdatatype_t)header->type,
5612 if (result == ISC_R_SUCCESS) {
5614 * If 'header' has the same serial number as
5615 * we do, we could clean it up now if we knew
5616 * that our caller had no references to it.
5617 * We don't know this, however, so we leave it
5618 * alone. It will get cleaned up when
5619 * clean_zone_node() runs.
5621 free_rdataset(rbtdb, rbtdb->common.mctx,
5623 newheader = (rdatasetheader_t *)merged;
5624 if (loading && RESIGN(newheader) &&
5626 header->resign < newheader->resign)
5627 newheader->resign = header->resign;
5629 free_rdataset(rbtdb, rbtdb->common.mctx,
5635 * Don't replace existing NS, A and AAAA RRsets
5636 * in the cache if they are already exist. This
5637 * prevents named being locked to old servers.
5638 * Don't lower trust of existing record if the
5641 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5642 header->type == dns_rdatatype_ns &&
5643 !header_nx && !newheader_nx &&
5644 header->trust >= newheader->trust &&
5645 dns_rdataslab_equalx((unsigned char *)header,
5646 (unsigned char *)newheader,
5647 (unsigned int)(sizeof(*newheader)),
5648 rbtdb->common.rdclass,
5649 (dns_rdatatype_t)header->type)) {
5651 * Honour the new ttl if it is less than the
5654 if (header->rdh_ttl > newheader->rdh_ttl)
5655 set_ttl(rbtdb, header, newheader->rdh_ttl);
5656 if (header->noqname == NULL &&
5657 newheader->noqname != NULL) {
5658 header->noqname = newheader->noqname;
5659 newheader->noqname = NULL;
5661 if (header->closest == NULL &&
5662 newheader->closest != NULL) {
5663 header->closest = newheader->closest;
5664 newheader->closest = NULL;
5666 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5667 if (addedrdataset != NULL)
5668 bind_rdataset(rbtdb, rbtnode, header, now,
5670 return (ISC_R_SUCCESS);
5672 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5673 (header->type == dns_rdatatype_a ||
5674 header->type == dns_rdatatype_aaaa) &&
5675 !header_nx && !newheader_nx &&
5676 header->trust >= newheader->trust &&
5677 dns_rdataslab_equal((unsigned char *)header,
5678 (unsigned char *)newheader,
5679 (unsigned int)(sizeof(*newheader)))) {
5681 * Honour the new ttl if it is less than the
5684 if (header->rdh_ttl > newheader->rdh_ttl)
5685 set_ttl(rbtdb, header, newheader->rdh_ttl);
5686 if (header->noqname == NULL &&
5687 newheader->noqname != NULL) {
5688 header->noqname = newheader->noqname;
5689 newheader->noqname = NULL;
5691 if (header->closest == NULL &&
5692 newheader->closest != NULL) {
5693 header->closest = newheader->closest;
5694 newheader->closest = NULL;
5696 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5697 if (addedrdataset != NULL)
5698 bind_rdataset(rbtdb, rbtnode, header, now,
5700 return (ISC_R_SUCCESS);
5702 INSIST(rbtversion == NULL ||
5703 rbtversion->serial >= topheader->serial);
5704 if (topheader_prev != NULL)
5705 topheader_prev->next = newheader;
5707 rbtnode->data = newheader;
5708 newheader->next = topheader->next;
5711 * There are no other references to 'header' when
5712 * loading, so we MAY clean up 'header' now.
5713 * Since we don't generate changed records when
5714 * loading, we MUST clean up 'header' now.
5716 newheader->down = NULL;
5717 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5719 newheader->down = topheader;
5720 topheader->next = newheader;
5722 if (changed != NULL)
5723 changed->dirty = ISC_TRUE;
5724 if (rbtversion == NULL) {
5725 set_ttl(rbtdb, header, 0);
5726 header->attributes |= RDATASET_ATTR_STALE;
5728 idx = newheader->node->locknum;
5729 if (IS_CACHE(rbtdb)) {
5730 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5733 * XXXMLG We don't check the return value
5734 * here. If it fails, we will not do TTL
5735 * based expiry on this node. However, we
5736 * will do it on the LRU side, so memory
5737 * will not leak... for long.
5739 isc_heap_insert(rbtdb->heaps[idx], newheader);
5740 } else if (RESIGN(newheader))
5741 resign_insert(rbtdb, idx, newheader);
5745 * No non-IGNORED rdatasets of the given type exist at
5750 * If we're trying to delete the type, don't bother.
5753 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5754 return (DNS_R_UNCHANGED);
5757 if (topheader != NULL) {
5759 * We have an list of rdatasets of the given type,
5760 * but they're all marked IGNORE. We simply insert
5761 * the new rdataset at the head of the list.
5763 * Ignored rdatasets cannot occur during loading, so
5767 INSIST(rbtversion == NULL ||
5768 rbtversion->serial >= topheader->serial);
5769 if (topheader_prev != NULL)
5770 topheader_prev->next = newheader;
5772 rbtnode->data = newheader;
5773 newheader->next = topheader->next;
5774 newheader->down = topheader;
5775 topheader->next = newheader;
5777 if (changed != NULL)
5778 changed->dirty = ISC_TRUE;
5781 * No rdatasets of the given type exist at the node.
5783 newheader->next = rbtnode->data;
5784 newheader->down = NULL;
5785 rbtnode->data = newheader;
5787 idx = newheader->node->locknum;
5788 if (IS_CACHE(rbtdb)) {
5789 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5791 isc_heap_insert(rbtdb->heaps[idx], newheader);
5792 } else if (RESIGN(newheader)) {
5793 resign_insert(rbtdb, idx, newheader);
5798 * Check if the node now contains CNAME and other data.
5800 if (rbtversion != NULL &&
5801 cname_and_other_data(rbtnode, rbtversion->serial))
5802 return (DNS_R_CNAMEANDOTHER);
5804 if (addedrdataset != NULL)
5805 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5807 return (ISC_R_SUCCESS);
5810 static inline isc_boolean_t
5811 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5812 rbtdb_rdatatype_t type)
5814 if (IS_CACHE(rbtdb)) {
5815 if (type == dns_rdatatype_dname)
5819 } else if (type == dns_rdatatype_dname ||
5820 (type == dns_rdatatype_ns &&
5821 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5826 static inline isc_result_t
5827 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5828 dns_rdataset_t *rdataset)
5830 struct noqname *noqname;
5831 isc_mem_t *mctx = rbtdb->common.mctx;
5833 dns_rdataset_t neg, negsig;
5834 isc_result_t result;
5837 dns_name_init(&name, NULL);
5838 dns_rdataset_init(&neg);
5839 dns_rdataset_init(&negsig);
5841 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5842 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5844 noqname = isc_mem_get(mctx, sizeof(*noqname));
5845 if (noqname == NULL) {
5846 result = ISC_R_NOMEMORY;
5849 dns_name_init(&noqname->name, NULL);
5850 noqname->neg = NULL;
5851 noqname->negsig = NULL;
5852 noqname->type = neg.type;
5853 result = dns_name_dup(&name, mctx, &noqname->name);
5854 if (result != ISC_R_SUCCESS)
5856 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5857 if (result != ISC_R_SUCCESS)
5859 noqname->neg = r.base;
5860 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5861 if (result != ISC_R_SUCCESS)
5863 noqname->negsig = r.base;
5864 dns_rdataset_disassociate(&neg);
5865 dns_rdataset_disassociate(&negsig);
5866 newheader->noqname = noqname;
5867 return (ISC_R_SUCCESS);
5870 dns_rdataset_disassociate(&neg);
5871 dns_rdataset_disassociate(&negsig);
5872 free_noqname(mctx, &noqname);
5876 static inline isc_result_t
5877 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5878 dns_rdataset_t *rdataset)
5880 struct noqname *closest;
5881 isc_mem_t *mctx = rbtdb->common.mctx;
5883 dns_rdataset_t neg, negsig;
5884 isc_result_t result;
5887 dns_name_init(&name, NULL);
5888 dns_rdataset_init(&neg);
5889 dns_rdataset_init(&negsig);
5891 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5892 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5894 closest = isc_mem_get(mctx, sizeof(*closest));
5895 if (closest == NULL) {
5896 result = ISC_R_NOMEMORY;
5899 dns_name_init(&closest->name, NULL);
5900 closest->neg = NULL;
5901 closest->negsig = NULL;
5902 closest->type = neg.type;
5903 result = dns_name_dup(&name, mctx, &closest->name);
5904 if (result != ISC_R_SUCCESS)
5906 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5907 if (result != ISC_R_SUCCESS)
5909 closest->neg = r.base;
5910 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5911 if (result != ISC_R_SUCCESS)
5913 closest->negsig = r.base;
5914 dns_rdataset_disassociate(&neg);
5915 dns_rdataset_disassociate(&negsig);
5916 newheader->closest = closest;
5917 return (ISC_R_SUCCESS);
5920 dns_rdataset_disassociate(&neg);
5921 dns_rdataset_disassociate(&negsig);
5922 free_noqname(mctx, &closest);
5926 static dns_dbmethods_t zone_methods;
5929 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5930 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5931 dns_rdataset_t *addedrdataset)
5933 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5934 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5935 rbtdb_version_t *rbtversion = version;
5936 isc_region_t region;
5937 rdatasetheader_t *newheader;
5938 rdatasetheader_t *header;
5939 isc_result_t result;
5940 isc_boolean_t delegating;
5941 isc_boolean_t tree_locked = ISC_FALSE;
5943 REQUIRE(VALID_RBTDB(rbtdb));
5945 if (rbtdb->common.methods == &zone_methods)
5946 REQUIRE(((rbtnode->nsec3 &&
5947 (rdataset->type == dns_rdatatype_nsec3 ||
5948 rdataset->covers == dns_rdatatype_nsec3)) ||
5950 rdataset->type != dns_rdatatype_nsec3 &&
5951 rdataset->covers != dns_rdatatype_nsec3)));
5953 if (rbtversion == NULL) {
5955 isc_stdtime_get(&now);
5959 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5961 sizeof(rdatasetheader_t));
5962 if (result != ISC_R_SUCCESS)
5965 newheader = (rdatasetheader_t *)region.base;
5966 init_rdataset(rbtdb, newheader);
5967 set_ttl(rbtdb, newheader, rdataset->ttl + now);
5968 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5970 newheader->attributes = 0;
5971 newheader->noqname = NULL;
5972 newheader->closest = NULL;
5973 newheader->count = init_count++;
5974 newheader->trust = rdataset->trust;
5975 newheader->additional_auth = NULL;
5976 newheader->additional_glue = NULL;
5977 newheader->last_used = now;
5978 newheader->node = rbtnode;
5979 if (rbtversion != NULL) {
5980 newheader->serial = rbtversion->serial;
5983 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5984 newheader->attributes |= RDATASET_ATTR_RESIGN;
5985 newheader->resign = rdataset->resign;
5987 newheader->resign = 0;
5989 newheader->serial = 1;
5990 newheader->resign = 0;
5991 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5992 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
5993 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
5994 newheader->attributes |= RDATASET_ATTR_OPTOUT;
5995 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
5996 result = addnoqname(rbtdb, newheader, rdataset);
5997 if (result != ISC_R_SUCCESS) {
5998 free_rdataset(rbtdb, rbtdb->common.mctx,
6003 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6004 result = addclosest(rbtdb, newheader, rdataset);
6005 if (result != ISC_R_SUCCESS) {
6006 free_rdataset(rbtdb, rbtdb->common.mctx,
6014 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6015 * just DNAME for the cache), then we need to set the callback bit
6018 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6019 delegating = ISC_TRUE;
6021 delegating = ISC_FALSE;
6024 * If we're adding a delegation type or the DB is a cache in an overmem
6025 * state, hold an exclusive lock on the tree. In the latter case
6026 * the lock does not necessarily have to be acquired but it will help
6027 * purge stale entries more effectively.
6029 if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6030 tree_locked = ISC_TRUE;
6031 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6034 if (IS_CACHE(rbtdb) && rbtdb->overmem)
6035 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6037 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6038 isc_rwlocktype_write);
6040 if (rbtdb->rrsetstats != NULL) {
6041 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6042 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6045 if (IS_CACHE(rbtdb)) {
6047 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6049 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6050 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6051 expire_header(rbtdb, header, tree_locked);
6054 * If we've been holding a write lock on the tree just for
6055 * cleaning, we can release it now. However, we still need the
6058 if (tree_locked && !delegating) {
6059 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6060 tree_locked = ISC_FALSE;
6064 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6065 addedrdataset, now);
6066 if (result == ISC_R_SUCCESS && delegating)
6067 rbtnode->find_callback = 1;
6069 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6070 isc_rwlocktype_write);
6073 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6076 * Update the zone's secure status. If version is non-NULL
6077 * this is deferred until closeversion() is called.
6079 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6080 iszonesecure(db, version, rbtdb->origin_node);
6086 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6087 dns_rdataset_t *rdataset, unsigned int options,
6088 dns_rdataset_t *newrdataset)
6090 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6091 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6092 rbtdb_version_t *rbtversion = version;
6093 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6094 unsigned char *subresult;
6095 isc_region_t region;
6096 isc_result_t result;
6097 rbtdb_changed_t *changed;
6099 REQUIRE(VALID_RBTDB(rbtdb));
6101 if (rbtdb->common.methods == &zone_methods)
6102 REQUIRE(((rbtnode->nsec3 &&
6103 (rdataset->type == dns_rdatatype_nsec3 ||
6104 rdataset->covers == dns_rdatatype_nsec3)) ||
6106 rdataset->type != dns_rdatatype_nsec3 &&
6107 rdataset->covers != dns_rdatatype_nsec3)));
6109 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6111 sizeof(rdatasetheader_t));
6112 if (result != ISC_R_SUCCESS)
6114 newheader = (rdatasetheader_t *)region.base;
6115 init_rdataset(rbtdb, newheader);
6116 set_ttl(rbtdb, newheader, rdataset->ttl);
6117 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6119 newheader->attributes = 0;
6120 newheader->serial = rbtversion->serial;
6121 newheader->trust = 0;
6122 newheader->noqname = NULL;
6123 newheader->closest = NULL;
6124 newheader->count = init_count++;
6125 newheader->additional_auth = NULL;
6126 newheader->additional_glue = NULL;
6127 newheader->last_used = 0;
6128 newheader->node = rbtnode;
6129 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6130 newheader->attributes |= RDATASET_ATTR_RESIGN;
6131 newheader->resign = rdataset->resign;
6133 newheader->resign = 0;
6135 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6136 isc_rwlocktype_write);
6138 changed = add_changed(rbtdb, rbtversion, rbtnode);
6139 if (changed == NULL) {
6140 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6141 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6142 isc_rwlocktype_write);
6143 return (ISC_R_NOMEMORY);
6146 topheader_prev = NULL;
6147 for (topheader = rbtnode->data;
6149 topheader = topheader->next) {
6150 if (topheader->type == newheader->type)
6152 topheader_prev = topheader;
6155 * If header isn't NULL, we've found the right type. There may be
6156 * IGNORE rdatasets between the top of the chain and the first real
6157 * data. We skip over them.
6160 while (header != NULL && IGNORE(header))
6161 header = header->down;
6162 if (header != NULL && EXISTS(header)) {
6163 unsigned int flags = 0;
6165 result = ISC_R_SUCCESS;
6166 if ((options & DNS_DBSUB_EXACT) != 0) {
6167 flags |= DNS_RDATASLAB_EXACT;
6168 if (newheader->rdh_ttl != header->rdh_ttl)
6169 result = DNS_R_NOTEXACT;
6171 if (result == ISC_R_SUCCESS)
6172 result = dns_rdataslab_subtract(
6173 (unsigned char *)header,
6174 (unsigned char *)newheader,
6175 (unsigned int)(sizeof(*newheader)),
6177 rbtdb->common.rdclass,
6178 (dns_rdatatype_t)header->type,
6180 if (result == ISC_R_SUCCESS) {
6181 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6182 newheader = (rdatasetheader_t *)subresult;
6183 init_rdataset(rbtdb, newheader);
6185 * We have to set the serial since the rdataslab
6186 * subtraction routine copies the reserved portion of
6187 * header, not newheader.
6189 newheader->serial = rbtversion->serial;
6191 * XXXJT: dns_rdataslab_subtract() copied the pointers
6192 * to additional info. We need to clear these fields
6193 * to avoid having duplicated references.
6195 newheader->additional_auth = NULL;
6196 newheader->additional_glue = NULL;
6197 } else if (result == DNS_R_NXRRSET) {
6199 * This subtraction would remove all of the rdata;
6200 * add a nonexistent header instead.
6202 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6203 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6204 if (newheader == NULL) {
6205 result = ISC_R_NOMEMORY;
6208 set_ttl(rbtdb, newheader, 0);
6209 newheader->type = topheader->type;
6210 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6211 newheader->trust = 0;
6212 newheader->serial = rbtversion->serial;
6213 newheader->noqname = NULL;
6214 newheader->closest = NULL;
6215 newheader->count = 0;
6216 newheader->additional_auth = NULL;
6217 newheader->additional_glue = NULL;
6218 newheader->node = rbtnode;
6219 newheader->resign = 0;
6220 newheader->last_used = 0;
6222 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6227 * If we're here, we want to link newheader in front of
6230 INSIST(rbtversion->serial >= topheader->serial);
6231 if (topheader_prev != NULL)
6232 topheader_prev->next = newheader;
6234 rbtnode->data = newheader;
6235 newheader->next = topheader->next;
6236 newheader->down = topheader;
6237 topheader->next = newheader;
6239 changed->dirty = ISC_TRUE;
6242 * The rdataset doesn't exist, so we don't need to do anything
6243 * to satisfy the deletion request.
6245 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6246 if ((options & DNS_DBSUB_EXACT) != 0)
6247 result = DNS_R_NOTEXACT;
6249 result = DNS_R_UNCHANGED;
6252 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6253 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6256 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6257 isc_rwlocktype_write);
6260 * Update the zone's secure status. If version is non-NULL
6261 * this is deferred until closeversion() is called.
6263 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6264 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6270 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6271 dns_rdatatype_t type, dns_rdatatype_t covers)
6273 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6274 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6275 rbtdb_version_t *rbtversion = version;
6276 isc_result_t result;
6277 rdatasetheader_t *newheader;
6279 REQUIRE(VALID_RBTDB(rbtdb));
6281 if (type == dns_rdatatype_any)
6282 return (ISC_R_NOTIMPLEMENTED);
6283 if (type == dns_rdatatype_rrsig && covers == 0)
6284 return (ISC_R_NOTIMPLEMENTED);
6286 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6287 if (newheader == NULL)
6288 return (ISC_R_NOMEMORY);
6289 set_ttl(rbtdb, newheader, 0);
6290 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6291 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6292 newheader->trust = 0;
6293 newheader->noqname = NULL;
6294 newheader->closest = NULL;
6295 newheader->additional_auth = NULL;
6296 newheader->additional_glue = NULL;
6297 if (rbtversion != NULL)
6298 newheader->serial = rbtversion->serial;
6300 newheader->serial = 0;
6301 newheader->count = 0;
6302 newheader->last_used = 0;
6303 newheader->node = rbtnode;
6305 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6306 isc_rwlocktype_write);
6308 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6309 ISC_FALSE, NULL, 0);
6311 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6312 isc_rwlocktype_write);
6315 * Update the zone's secure status. If version is non-NULL
6316 * this is deferred until closeversion() is called.
6318 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6319 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6325 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6326 rbtdb_load_t *loadctx = arg;
6327 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6328 dns_rbtnode_t *node;
6329 isc_result_t result;
6330 isc_region_t region;
6331 rdatasetheader_t *newheader;
6334 * This routine does no node locking. See comments in
6335 * 'load' below for more information on loading and
6341 * SOA records are only allowed at top of zone.
6343 if (rdataset->type == dns_rdatatype_soa &&
6344 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6345 return (DNS_R_NOTZONETOP);
6347 if (rdataset->type != dns_rdatatype_nsec3 &&
6348 rdataset->covers != dns_rdatatype_nsec3)
6349 add_empty_wildcards(rbtdb, name);
6351 if (dns_name_iswildcard(name)) {
6353 * NS record owners cannot legally be wild cards.
6355 if (rdataset->type == dns_rdatatype_ns)
6356 return (DNS_R_INVALIDNS);
6358 * NSEC3 record owners cannot legally be wild cards.
6360 if (rdataset->type == dns_rdatatype_nsec3)
6361 return (DNS_R_INVALIDNSEC3);
6362 result = add_wildcard_magic(rbtdb, name);
6363 if (result != ISC_R_SUCCESS)
6368 if (rdataset->type == dns_rdatatype_nsec3 ||
6369 rdataset->covers == dns_rdatatype_nsec3) {
6370 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6371 if (result == ISC_R_SUCCESS)
6374 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6375 if (result == ISC_R_SUCCESS)
6378 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6380 if (result != ISC_R_EXISTS) {
6381 dns_name_t foundname;
6382 dns_name_init(&foundname, NULL);
6383 dns_rbt_namefromnode(node, &foundname);
6384 #ifdef DNS_RBT_USEHASH
6385 node->locknum = node->hashval % rbtdb->node_lock_count;
6387 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6388 rbtdb->node_lock_count;
6392 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6394 sizeof(rdatasetheader_t));
6395 if (result != ISC_R_SUCCESS)
6397 newheader = (rdatasetheader_t *)region.base;
6398 init_rdataset(rbtdb, newheader);
6399 set_ttl(rbtdb, newheader,
6400 rdataset->ttl + loadctx->now); /* XXX overflow check */
6401 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6403 newheader->attributes = 0;
6404 newheader->trust = rdataset->trust;
6405 newheader->serial = 1;
6406 newheader->noqname = NULL;
6407 newheader->closest = NULL;
6408 newheader->count = init_count++;
6409 newheader->additional_auth = NULL;
6410 newheader->additional_glue = NULL;
6411 newheader->last_used = 0;
6412 newheader->node = node;
6413 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6414 newheader->attributes |= RDATASET_ATTR_RESIGN;
6415 newheader->resign = rdataset->resign;
6417 newheader->resign = 0;
6419 result = add(rbtdb, node, rbtdb->current_version, newheader,
6420 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6421 if (result == ISC_R_SUCCESS &&
6422 delegating_type(rbtdb, node, rdataset->type))
6423 node->find_callback = 1;
6424 else if (result == DNS_R_UNCHANGED)
6425 result = ISC_R_SUCCESS;
6431 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6432 rbtdb_load_t *loadctx;
6435 rbtdb = (dns_rbtdb_t *)db;
6437 REQUIRE(VALID_RBTDB(rbtdb));
6439 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6440 if (loadctx == NULL)
6441 return (ISC_R_NOMEMORY);
6443 loadctx->rbtdb = rbtdb;
6444 if (IS_CACHE(rbtdb))
6445 isc_stdtime_get(&loadctx->now);
6449 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6451 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6453 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6455 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6457 *addp = loading_addrdataset;
6460 return (ISC_R_SUCCESS);
6464 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6465 rbtdb_load_t *loadctx;
6466 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6468 REQUIRE(VALID_RBTDB(rbtdb));
6469 REQUIRE(dbloadp != NULL);
6471 REQUIRE(loadctx->rbtdb == rbtdb);
6473 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6475 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6476 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6478 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6479 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6481 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6484 * If there's a KEY rdataset at the zone origin containing a
6485 * zone key, we consider the zone secure.
6487 if (! IS_CACHE(rbtdb))
6488 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6492 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6494 return (ISC_R_SUCCESS);
6498 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6499 dns_masterformat_t masterformat) {
6502 rbtdb = (dns_rbtdb_t *)db;
6504 REQUIRE(VALID_RBTDB(rbtdb));
6506 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6507 &dns_master_style_default,
6508 filename, masterformat));
6512 delete_callback(void *data, void *arg) {
6513 dns_rbtdb_t *rbtdb = arg;
6514 rdatasetheader_t *current, *next;
6515 unsigned int locknum;
6518 locknum = current->node->locknum;
6519 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6520 while (current != NULL) {
6521 next = current->next;
6522 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6525 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6528 static isc_boolean_t
6529 issecure(dns_db_t *db) {
6531 isc_boolean_t secure;
6533 rbtdb = (dns_rbtdb_t *)db;
6535 REQUIRE(VALID_RBTDB(rbtdb));
6537 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6538 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6539 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6544 static isc_boolean_t
6545 isdnssec(dns_db_t *db) {
6547 isc_boolean_t dnssec;
6549 rbtdb = (dns_rbtdb_t *)db;
6551 REQUIRE(VALID_RBTDB(rbtdb));
6553 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6554 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6555 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6561 nodecount(dns_db_t *db) {
6565 rbtdb = (dns_rbtdb_t *)db;
6567 REQUIRE(VALID_RBTDB(rbtdb));
6569 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6570 count = dns_rbt_nodecount(rbtdb->tree);
6571 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6577 settask(dns_db_t *db, isc_task_t *task) {
6580 rbtdb = (dns_rbtdb_t *)db;
6582 REQUIRE(VALID_RBTDB(rbtdb));
6584 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6585 if (rbtdb->task != NULL)
6586 isc_task_detach(&rbtdb->task);
6588 isc_task_attach(task, &rbtdb->task);
6589 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6592 static isc_boolean_t
6593 ispersistent(dns_db_t *db) {
6599 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6600 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6601 dns_rbtnode_t *onode;
6602 isc_result_t result = ISC_R_SUCCESS;
6604 REQUIRE(VALID_RBTDB(rbtdb));
6605 REQUIRE(nodep != NULL && *nodep == NULL);
6607 /* Note that the access to origin_node doesn't require a DB lock */
6608 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6609 if (onode != NULL) {
6610 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6611 new_reference(rbtdb, onode);
6612 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6614 *nodep = rbtdb->origin_node;
6616 INSIST(IS_CACHE(rbtdb));
6617 result = ISC_R_NOTFOUND;
6624 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6625 isc_uint8_t *flags, isc_uint16_t *iterations,
6626 unsigned char *salt, size_t *salt_length)
6629 isc_result_t result = ISC_R_NOTFOUND;
6630 rbtdb_version_t *rbtversion = version;
6632 rbtdb = (dns_rbtdb_t *)db;
6634 REQUIRE(VALID_RBTDB(rbtdb));
6636 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6638 if (rbtversion == NULL)
6639 rbtversion = rbtdb->current_version;
6641 if (rbtversion->havensec3) {
6643 *hash = rbtversion->hash;
6644 if (salt != NULL && salt_length != NULL) {
6645 REQUIRE(*salt_length >= rbtversion->salt_length);
6646 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6648 if (salt_length != NULL)
6649 *salt_length = rbtversion->salt_length;
6650 if (iterations != NULL)
6651 *iterations = rbtversion->iterations;
6653 *flags = rbtversion->flags;
6654 result = ISC_R_SUCCESS;
6656 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6662 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6663 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6664 isc_stdtime_t oldresign;
6665 isc_result_t result = ISC_R_SUCCESS;
6666 rdatasetheader_t *header;
6668 REQUIRE(VALID_RBTDB(rbtdb));
6669 REQUIRE(!IS_CACHE(rbtdb));
6670 REQUIRE(rdataset != NULL);
6672 header = rdataset->private3;
6675 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6676 isc_rwlocktype_write);
6678 oldresign = header->resign;
6679 header->resign = resign;
6680 if (header->heap_index != 0) {
6681 INSIST(RESIGN(header));
6683 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6684 header->heap_index);
6685 header->heap_index = 0;
6686 } else if (resign < oldresign)
6687 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6688 header->heap_index);
6690 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6691 header->heap_index);
6692 } else if (resign && header->heap_index == 0) {
6693 header->attributes |= RDATASET_ATTR_RESIGN;
6694 result = resign_insert(rbtdb, header->node->locknum, header);
6696 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6697 isc_rwlocktype_write);
6702 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6703 dns_name_t *foundname)
6705 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6706 rdatasetheader_t *header = NULL, *this;
6708 isc_result_t result = ISC_R_NOTFOUND;
6709 unsigned int locknum;
6711 REQUIRE(VALID_RBTDB(rbtdb));
6713 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6715 for (i = 0; i < rbtdb->node_lock_count; i++) {
6716 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6717 this = isc_heap_element(rbtdb->heaps[i], 1);
6719 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6720 isc_rwlocktype_read);
6725 else if (isc_serial_lt(this->resign, header->resign)) {
6726 locknum = header->node->locknum;
6727 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6728 isc_rwlocktype_read);
6731 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6732 isc_rwlocktype_read);
6738 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6740 if (foundname != NULL)
6741 dns_rbt_fullnamefromnode(header->node, foundname);
6743 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6744 isc_rwlocktype_read);
6746 result = ISC_R_SUCCESS;
6749 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6755 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6757 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6758 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6759 dns_rbtnode_t *node;
6760 rdatasetheader_t *header;
6762 REQUIRE(VALID_RBTDB(rbtdb));
6763 REQUIRE(rdataset != NULL);
6764 REQUIRE(rbtdb->future_version == rbtversion);
6765 REQUIRE(rbtversion->writer);
6767 node = rdataset->private2;
6768 header = rdataset->private3;
6771 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6772 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6773 isc_rwlocktype_write);
6775 * Delete from heap and save to re-signed list so that it can
6776 * be restored if we backout of this change.
6778 new_reference(rbtdb, node);
6779 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6780 header->heap_index = 0;
6781 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6783 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6784 isc_rwlocktype_write);
6785 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6788 static dns_stats_t *
6789 getrrsetstats(dns_db_t *db) {
6790 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6792 REQUIRE(VALID_RBTDB(rbtdb));
6793 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6795 return (rbtdb->rrsetstats);
6798 static dns_dbmethods_t zone_methods = {
6837 static dns_dbmethods_t cache_methods = {
6877 #ifdef DNS_RBTDB_VERSION64
6882 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6883 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6884 void *driverarg, dns_db_t **dbp)
6887 isc_result_t result;
6890 isc_boolean_t (*sooner)(void *, void *);
6892 /* Keep the compiler happy. */
6897 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6899 return (ISC_R_NOMEMORY);
6901 memset(rbtdb, '\0', sizeof(*rbtdb));
6902 dns_name_init(&rbtdb->common.origin, NULL);
6903 rbtdb->common.attributes = 0;
6904 if (type == dns_dbtype_cache) {
6905 rbtdb->common.methods = &cache_methods;
6906 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6907 } else if (type == dns_dbtype_stub) {
6908 rbtdb->common.methods = &zone_methods;
6909 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6911 rbtdb->common.methods = &zone_methods;
6912 rbtdb->common.rdclass = rdclass;
6913 rbtdb->common.mctx = NULL;
6915 result = RBTDB_INITLOCK(&rbtdb->lock);
6916 if (result != ISC_R_SUCCESS)
6919 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6920 if (result != ISC_R_SUCCESS)
6924 * Initialize node_lock_count in a generic way to support future
6925 * extension which allows the user to specify this value on creation.
6926 * Note that when specified for a cache DB it must be larger than 1
6927 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6929 if (rbtdb->node_lock_count == 0) {
6930 if (IS_CACHE(rbtdb))
6931 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6933 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6934 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6935 result = ISC_R_RANGE;
6936 goto cleanup_tree_lock;
6938 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6939 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6940 sizeof(rbtdb_nodelock_t));
6941 if (rbtdb->node_locks == NULL) {
6942 result = ISC_R_NOMEMORY;
6943 goto cleanup_tree_lock;
6946 rbtdb->rrsetstats = NULL;
6947 if (IS_CACHE(rbtdb)) {
6948 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6949 if (result != ISC_R_SUCCESS)
6950 goto cleanup_node_locks;
6951 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6952 sizeof(rdatasetheaderlist_t));
6953 if (rbtdb->rdatasets == NULL) {
6954 result = ISC_R_NOMEMORY;
6955 goto cleanup_rrsetstats;
6957 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6958 ISC_LIST_INIT(rbtdb->rdatasets[i]);
6960 rbtdb->rdatasets = NULL;
6965 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6966 sizeof(isc_heap_t *));
6967 if (rbtdb->heaps == NULL) {
6968 result = ISC_R_NOMEMORY;
6969 goto cleanup_rdatasets;
6971 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6972 rbtdb->heaps[i] = NULL;
6973 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6974 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6975 result = isc_heap_create(mctx, sooner, set_index, 0,
6977 if (result != ISC_R_SUCCESS)
6982 * Create deadnode lists.
6984 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6985 sizeof(rbtnodelist_t));
6986 if (rbtdb->deadnodes == NULL) {
6987 result = ISC_R_NOMEMORY;
6990 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6991 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6993 rbtdb->active = rbtdb->node_lock_count;
6995 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
6996 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
6997 if (result == ISC_R_SUCCESS) {
6998 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
6999 if (result != ISC_R_SUCCESS)
7000 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7002 if (result != ISC_R_SUCCESS) {
7004 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7005 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7006 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7008 goto cleanup_deadnodes;
7010 rbtdb->node_locks[i].exiting = ISC_FALSE;
7014 * Attach to the mctx. The database will persist so long as there
7015 * are references to it, and attaching to the mctx ensures that our
7016 * mctx won't disappear out from under us.
7018 isc_mem_attach(mctx, &rbtdb->common.mctx);
7021 * Must be initialized before free_rbtdb() is called.
7023 isc_ondestroy_init(&rbtdb->common.ondest);
7026 * Make a copy of the origin name.
7028 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7029 if (result != ISC_R_SUCCESS) {
7030 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7035 * Make the Red-Black Trees.
7037 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7038 if (result != ISC_R_SUCCESS) {
7039 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7043 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7044 if (result != ISC_R_SUCCESS) {
7045 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7050 * In order to set the node callback bit correctly in zone databases,
7051 * we need to know if the node has the origin name of the zone.
7052 * In loading_addrdataset() we could simply compare the new name
7053 * to the origin name, but this is expensive. Also, we don't know the
7054 * node name in addrdataset(), so we need another way of knowing the
7057 * We now explicitly create a node for the zone's origin, and then
7058 * we simply remember the node's address. This is safe, because
7059 * the top-of-zone node can never be deleted, nor can its address
7062 if (!IS_CACHE(rbtdb)) {
7063 rbtdb->origin_node = NULL;
7064 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7065 &rbtdb->origin_node);
7066 if (result != ISC_R_SUCCESS) {
7067 INSIST(result != ISC_R_EXISTS);
7068 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7071 rbtdb->origin_node->nsec3 = 0;
7073 * We need to give the origin node the right locknum.
7075 dns_name_init(&name, NULL);
7076 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7077 #ifdef DNS_RBT_USEHASH
7078 rbtdb->origin_node->locknum =
7079 rbtdb->origin_node->hashval %
7080 rbtdb->node_lock_count;
7082 rbtdb->origin_node->locknum =
7083 dns_name_hash(&name, ISC_TRUE) %
7084 rbtdb->node_lock_count;
7089 * Misc. Initialization.
7091 result = isc_refcount_init(&rbtdb->references, 1);
7092 if (result != ISC_R_SUCCESS) {
7093 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7096 rbtdb->attributes = 0;
7097 rbtdb->overmem = ISC_FALSE;
7101 * Version Initialization.
7103 rbtdb->current_serial = 1;
7104 rbtdb->least_serial = 1;
7105 rbtdb->next_serial = 2;
7106 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7107 if (rbtdb->current_version == NULL) {
7108 isc_refcount_decrement(&rbtdb->references, NULL);
7109 isc_refcount_destroy(&rbtdb->references);
7110 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7111 return (ISC_R_NOMEMORY);
7113 rbtdb->current_version->secure = dns_db_insecure;
7114 rbtdb->current_version->havensec3 = ISC_FALSE;
7115 rbtdb->current_version->flags = 0;
7116 rbtdb->current_version->iterations = 0;
7117 rbtdb->current_version->hash = 0;
7118 rbtdb->current_version->salt_length = 0;
7119 memset(rbtdb->current_version->salt, 0,
7120 sizeof(rbtdb->current_version->salt));
7121 rbtdb->future_version = NULL;
7122 ISC_LIST_INIT(rbtdb->open_versions);
7124 * Keep the current version in the open list so that list operation
7125 * won't happen in normal lookup operations.
7127 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7129 rbtdb->common.magic = DNS_DB_MAGIC;
7130 rbtdb->common.impmagic = RBTDB_MAGIC;
7132 *dbp = (dns_db_t *)rbtdb;
7134 return (ISC_R_SUCCESS);
7137 isc_mem_put(mctx, rbtdb->deadnodes,
7138 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7141 if (rbtdb->heaps != NULL) {
7142 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7143 if (rbtdb->heaps[i] != NULL)
7144 isc_heap_destroy(&rbtdb->heaps[i]);
7145 isc_mem_put(mctx, rbtdb->heaps,
7146 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7150 if (rbtdb->rdatasets != NULL)
7151 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7152 sizeof(rdatasetheaderlist_t));
7154 if (rbtdb->rrsetstats != NULL)
7155 dns_stats_detach(&rbtdb->rrsetstats);
7158 isc_mem_put(mctx, rbtdb->node_locks,
7159 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7162 isc_rwlock_destroy(&rbtdb->tree_lock);
7165 RBTDB_DESTROYLOCK(&rbtdb->lock);
7168 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7174 * Slabbed Rdataset Methods
7178 rdataset_disassociate(dns_rdataset_t *rdataset) {
7179 dns_db_t *db = rdataset->private1;
7180 dns_dbnode_t *node = rdataset->private2;
7182 detachnode(db, &node);
7186 rdataset_first(dns_rdataset_t *rdataset) {
7187 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7190 count = raw[0] * 256 + raw[1];
7192 rdataset->private5 = NULL;
7193 return (ISC_R_NOMORE);
7196 #if DNS_RDATASET_FIXED
7197 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7198 raw += 2 + (4 * count);
7204 * The privateuint4 field is the number of rdata beyond the
7205 * cursor position, so we decrement the total count by one
7206 * before storing it.
7208 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7209 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7210 * to the first entry in the offset table.
7213 rdataset->privateuint4 = count;
7214 rdataset->private5 = raw;
7216 return (ISC_R_SUCCESS);
7220 rdataset_next(dns_rdataset_t *rdataset) {
7222 unsigned int length;
7223 unsigned char *raw; /* RDATASLAB */
7225 count = rdataset->privateuint4;
7227 return (ISC_R_NOMORE);
7229 rdataset->privateuint4 = count;
7232 * Skip forward one record (length + 4) or one offset (4).
7234 raw = rdataset->private5;
7235 #if DNS_RDATASET_FIXED
7236 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7238 length = raw[0] * 256 + raw[1];
7240 #if DNS_RDATASET_FIXED
7242 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7244 rdataset->private5 = raw + 2; /* length(2) */
7247 return (ISC_R_SUCCESS);
7251 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7252 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7253 #if DNS_RDATASET_FIXED
7254 unsigned int offset;
7256 unsigned int length;
7258 unsigned int flags = 0;
7260 REQUIRE(raw != NULL);
7263 * Find the start of the record if not already in private5
7264 * then skip the length and order fields.
7266 #if DNS_RDATASET_FIXED
7267 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7268 offset = (raw[0] << 24) + (raw[1] << 16) +
7269 (raw[2] << 8) + raw[3];
7270 raw = rdataset->private3;
7274 length = raw[0] * 256 + raw[1];
7275 #if DNS_RDATASET_FIXED
7280 if (rdataset->type == dns_rdatatype_rrsig) {
7281 if (*raw & DNS_RDATASLAB_OFFLINE)
7282 flags |= DNS_RDATA_OFFLINE;
7288 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7289 rdata->flags |= flags;
7293 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7294 dns_db_t *db = source->private1;
7295 dns_dbnode_t *node = source->private2;
7296 dns_dbnode_t *cloned_node = NULL;
7298 attachnode(db, node, &cloned_node);
7302 * Reset iterator state.
7304 target->privateuint4 = 0;
7305 target->private5 = NULL;
7309 rdataset_count(dns_rdataset_t *rdataset) {
7310 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7313 count = raw[0] * 256 + raw[1];
7319 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7320 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7322 dns_db_t *db = rdataset->private1;
7323 dns_dbnode_t *node = rdataset->private2;
7324 dns_dbnode_t *cloned_node;
7325 struct noqname *noqname = rdataset->private6;
7328 attachnode(db, node, &cloned_node);
7329 nsec->methods = &rdataset_methods;
7330 nsec->rdclass = db->rdclass;
7331 nsec->type = noqname->type;
7333 nsec->ttl = rdataset->ttl;
7334 nsec->trust = rdataset->trust;
7335 nsec->private1 = rdataset->private1;
7336 nsec->private2 = rdataset->private2;
7337 nsec->private3 = noqname->neg;
7338 nsec->privateuint4 = 0;
7339 nsec->private5 = NULL;
7340 nsec->private6 = NULL;
7341 nsec->private7 = NULL;
7344 attachnode(db, node, &cloned_node);
7345 nsecsig->methods = &rdataset_methods;
7346 nsecsig->rdclass = db->rdclass;
7347 nsecsig->type = dns_rdatatype_rrsig;
7348 nsecsig->covers = noqname->type;
7349 nsecsig->ttl = rdataset->ttl;
7350 nsecsig->trust = rdataset->trust;
7351 nsecsig->private1 = rdataset->private1;
7352 nsecsig->private2 = rdataset->private2;
7353 nsecsig->private3 = noqname->negsig;
7354 nsecsig->privateuint4 = 0;
7355 nsecsig->private5 = NULL;
7356 nsec->private6 = NULL;
7357 nsec->private7 = NULL;
7359 dns_name_clone(&noqname->name, name);
7361 return (ISC_R_SUCCESS);
7365 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7366 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7368 dns_db_t *db = rdataset->private1;
7369 dns_dbnode_t *node = rdataset->private2;
7370 dns_dbnode_t *cloned_node;
7371 struct noqname *closest = rdataset->private7;
7374 attachnode(db, node, &cloned_node);
7375 nsec->methods = &rdataset_methods;
7376 nsec->rdclass = db->rdclass;
7377 nsec->type = closest->type;
7379 nsec->ttl = rdataset->ttl;
7380 nsec->trust = rdataset->trust;
7381 nsec->private1 = rdataset->private1;
7382 nsec->private2 = rdataset->private2;
7383 nsec->private3 = closest->neg;
7384 nsec->privateuint4 = 0;
7385 nsec->private5 = NULL;
7386 nsec->private6 = NULL;
7387 nsec->private7 = NULL;
7390 attachnode(db, node, &cloned_node);
7391 nsecsig->methods = &rdataset_methods;
7392 nsecsig->rdclass = db->rdclass;
7393 nsecsig->type = dns_rdatatype_rrsig;
7394 nsecsig->covers = closest->type;
7395 nsecsig->ttl = rdataset->ttl;
7396 nsecsig->trust = rdataset->trust;
7397 nsecsig->private1 = rdataset->private1;
7398 nsecsig->private2 = rdataset->private2;
7399 nsecsig->private3 = closest->negsig;
7400 nsecsig->privateuint4 = 0;
7401 nsecsig->private5 = NULL;
7402 nsec->private6 = NULL;
7403 nsec->private7 = NULL;
7405 dns_name_clone(&closest->name, name);
7407 return (ISC_R_SUCCESS);
7411 * Rdataset Iterator Methods
7415 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7416 rbtdb_rdatasetiter_t *rbtiterator;
7418 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7420 if (rbtiterator->common.version != NULL)
7421 closeversion(rbtiterator->common.db,
7422 &rbtiterator->common.version, ISC_FALSE);
7423 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7424 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7425 sizeof(*rbtiterator));
7431 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7432 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7433 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7434 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7435 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7436 rdatasetheader_t *header, *top_next;
7437 rbtdb_serial_t serial;
7440 if (IS_CACHE(rbtdb)) {
7442 now = rbtiterator->common.now;
7444 serial = rbtversion->serial;
7448 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7449 isc_rwlocktype_read);
7451 for (header = rbtnode->data; header != NULL; header = top_next) {
7452 top_next = header->next;
7454 if (header->serial <= serial && !IGNORE(header)) {
7456 * Is this a "this rdataset doesn't exist"
7457 * record? Or is it too old in the cache?
7459 * Note: unlike everywhere else, we
7460 * check for now > header->rdh_ttl instead
7461 * of now >= header->rdh_ttl. This allows
7462 * ANY and RRSIG queries for 0 TTL
7463 * rdatasets to work.
7465 if (NONEXISTENT(header) ||
7466 (now != 0 && now > header->rdh_ttl))
7470 header = header->down;
7471 } while (header != NULL);
7476 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7477 isc_rwlocktype_read);
7479 rbtiterator->current = header;
7482 return (ISC_R_NOMORE);
7484 return (ISC_R_SUCCESS);
7488 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7489 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7490 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7491 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7492 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7493 rdatasetheader_t *header, *top_next;
7494 rbtdb_serial_t serial;
7496 rbtdb_rdatatype_t type, negtype;
7497 dns_rdatatype_t rdtype, covers;
7499 header = rbtiterator->current;
7501 return (ISC_R_NOMORE);
7503 if (IS_CACHE(rbtdb)) {
7505 now = rbtiterator->common.now;
7507 serial = rbtversion->serial;
7511 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7512 isc_rwlocktype_read);
7514 type = header->type;
7515 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7517 covers = RBTDB_RDATATYPE_EXT(header->type);
7518 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7520 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7521 for (header = header->next; header != NULL; header = top_next) {
7522 top_next = header->next;
7524 * If not walking back up the down list.
7526 if (header->type != type && header->type != negtype) {
7528 if (header->serial <= serial &&
7531 * Is this a "this rdataset doesn't
7534 * Note: unlike everywhere else, we
7535 * check for now > header->ttl instead
7536 * of now >= header->ttl. This allows
7537 * ANY and RRSIG queries for 0 TTL
7538 * rdatasets to work.
7540 if ((header->attributes &
7541 RDATASET_ATTR_NONEXISTENT) != 0 ||
7542 (now != 0 && now > header->rdh_ttl))
7546 header = header->down;
7547 } while (header != NULL);
7553 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7554 isc_rwlocktype_read);
7556 rbtiterator->current = header;
7559 return (ISC_R_NOMORE);
7561 return (ISC_R_SUCCESS);
7565 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7566 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7567 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7568 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7569 rdatasetheader_t *header;
7571 header = rbtiterator->current;
7572 REQUIRE(header != NULL);
7574 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7575 isc_rwlocktype_read);
7577 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7580 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7581 isc_rwlocktype_read);
7586 * Database Iterator Methods
7590 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7591 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7592 dns_rbtnode_t *node = rbtdbiter->node;
7597 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7598 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7602 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7603 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7604 dns_rbtnode_t *node = rbtdbiter->node;
7610 lock = &rbtdb->node_locks[node->locknum].lock;
7611 NODE_LOCK(lock, isc_rwlocktype_read);
7612 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7613 rbtdbiter->tree_locked, ISC_FALSE);
7614 NODE_UNLOCK(lock, isc_rwlocktype_read);
7616 rbtdbiter->node = NULL;
7620 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7621 dns_rbtnode_t *node;
7622 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7623 isc_boolean_t was_read_locked = ISC_FALSE;
7627 if (rbtdbiter->delete != 0) {
7629 * Note that "%d node of %d in tree" can report things like
7630 * "flush_deletions: 59 nodes of 41 in tree". This means
7631 * That some nodes appear on the deletions list more than
7632 * once. Only the last occurence will actually be deleted.
7634 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7635 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7636 "flush_deletions: %d nodes of %d in tree",
7638 dns_rbt_nodecount(rbtdb->tree));
7640 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7641 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7642 was_read_locked = ISC_TRUE;
7644 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7645 rbtdbiter->tree_locked = isc_rwlocktype_write;
7647 for (i = 0; i < rbtdbiter->delete; i++) {
7648 node = rbtdbiter->deletions[i];
7649 lock = &rbtdb->node_locks[node->locknum].lock;
7651 NODE_LOCK(lock, isc_rwlocktype_read);
7652 decrement_reference(rbtdb, node, 0,
7653 isc_rwlocktype_read,
7654 rbtdbiter->tree_locked, ISC_FALSE);
7655 NODE_UNLOCK(lock, isc_rwlocktype_read);
7658 rbtdbiter->delete = 0;
7660 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7661 if (was_read_locked) {
7662 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7663 rbtdbiter->tree_locked = isc_rwlocktype_read;
7666 rbtdbiter->tree_locked = isc_rwlocktype_none;
7672 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7673 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7675 REQUIRE(rbtdbiter->paused);
7676 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7678 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7679 rbtdbiter->tree_locked = isc_rwlocktype_read;
7681 rbtdbiter->paused = ISC_FALSE;
7685 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7686 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7687 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7688 dns_db_t *db = NULL;
7690 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7691 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7692 rbtdbiter->tree_locked = isc_rwlocktype_none;
7694 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7696 dereference_iter_node(rbtdbiter);
7698 flush_deletions(rbtdbiter);
7700 dns_db_attach(rbtdbiter->common.db, &db);
7701 dns_db_detach(&rbtdbiter->common.db);
7703 dns_rbtnodechain_reset(&rbtdbiter->chain);
7704 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7705 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7712 dbiterator_first(dns_dbiterator_t *iterator) {
7713 isc_result_t result;
7714 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7715 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7716 dns_name_t *name, *origin;
7718 if (rbtdbiter->result != ISC_R_SUCCESS &&
7719 rbtdbiter->result != ISC_R_NOMORE)
7720 return (rbtdbiter->result);
7722 if (rbtdbiter->paused)
7723 resume_iteration(rbtdbiter);
7725 dereference_iter_node(rbtdbiter);
7727 name = dns_fixedname_name(&rbtdbiter->name);
7728 origin = dns_fixedname_name(&rbtdbiter->origin);
7729 dns_rbtnodechain_reset(&rbtdbiter->chain);
7730 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7732 if (rbtdbiter->nsec3only) {
7733 rbtdbiter->current = &rbtdbiter->nsec3chain;
7734 result = dns_rbtnodechain_first(rbtdbiter->current,
7735 rbtdb->nsec3, name, origin);
7737 rbtdbiter->current = &rbtdbiter->chain;
7738 result = dns_rbtnodechain_first(rbtdbiter->current,
7739 rbtdb->tree, name, origin);
7740 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7741 rbtdbiter->current = &rbtdbiter->nsec3chain;
7742 result = dns_rbtnodechain_first(rbtdbiter->current,
7747 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7748 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7749 NULL, &rbtdbiter->node);
7750 if (result == ISC_R_SUCCESS) {
7751 rbtdbiter->new_origin = ISC_TRUE;
7752 reference_iter_node(rbtdbiter);
7755 INSIST(result == ISC_R_NOTFOUND);
7756 result = ISC_R_NOMORE; /* The tree is empty. */
7759 rbtdbiter->result = result;
7765 dbiterator_last(dns_dbiterator_t *iterator) {
7766 isc_result_t result;
7767 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7768 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7769 dns_name_t *name, *origin;
7771 if (rbtdbiter->result != ISC_R_SUCCESS &&
7772 rbtdbiter->result != ISC_R_NOMORE)
7773 return (rbtdbiter->result);
7775 if (rbtdbiter->paused)
7776 resume_iteration(rbtdbiter);
7778 dereference_iter_node(rbtdbiter);
7780 name = dns_fixedname_name(&rbtdbiter->name);
7781 origin = dns_fixedname_name(&rbtdbiter->origin);
7782 dns_rbtnodechain_reset(&rbtdbiter->chain);
7783 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7785 result = ISC_R_NOTFOUND;
7786 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7787 rbtdbiter->current = &rbtdbiter->nsec3chain;
7788 result = dns_rbtnodechain_last(rbtdbiter->current,
7789 rbtdb->nsec3, name, origin);
7791 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7792 rbtdbiter->current = &rbtdbiter->chain;
7793 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7796 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7797 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7798 NULL, &rbtdbiter->node);
7799 if (result == ISC_R_SUCCESS) {
7800 rbtdbiter->new_origin = ISC_TRUE;
7801 reference_iter_node(rbtdbiter);
7804 INSIST(result == ISC_R_NOTFOUND);
7805 result = ISC_R_NOMORE; /* The tree is empty. */
7808 rbtdbiter->result = result;
7814 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7815 isc_result_t result;
7816 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7817 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7818 dns_name_t *iname, *origin;
7820 if (rbtdbiter->result != ISC_R_SUCCESS &&
7821 rbtdbiter->result != ISC_R_NOTFOUND &&
7822 rbtdbiter->result != ISC_R_NOMORE)
7823 return (rbtdbiter->result);
7825 if (rbtdbiter->paused)
7826 resume_iteration(rbtdbiter);
7828 dereference_iter_node(rbtdbiter);
7830 iname = dns_fixedname_name(&rbtdbiter->name);
7831 origin = dns_fixedname_name(&rbtdbiter->origin);
7832 dns_rbtnodechain_reset(&rbtdbiter->chain);
7833 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7835 if (rbtdbiter->nsec3only) {
7836 rbtdbiter->current = &rbtdbiter->nsec3chain;
7837 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7840 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7841 } else if (rbtdbiter->nonsec3) {
7842 rbtdbiter->current = &rbtdbiter->chain;
7843 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7846 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7849 * Stay on main chain if not found on either chain.
7851 rbtdbiter->current = &rbtdbiter->chain;
7852 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7855 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7856 if (result == DNS_R_PARTIALMATCH) {
7857 dns_rbtnode_t *node = NULL;
7858 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7859 &node, &rbtdbiter->nsec3chain,
7860 DNS_RBTFIND_EMPTYDATA,
7862 if (result == ISC_R_SUCCESS) {
7863 rbtdbiter->node = node;
7864 rbtdbiter->current = &rbtdbiter->nsec3chain;
7870 if (result == ISC_R_SUCCESS) {
7871 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7873 if (result == ISC_R_SUCCESS) {
7874 rbtdbiter->new_origin = ISC_TRUE;
7875 reference_iter_node(rbtdbiter);
7877 } else if (result == DNS_R_PARTIALMATCH) {
7878 result = ISC_R_NOTFOUND;
7879 rbtdbiter->node = NULL;
7882 rbtdbiter->result = result;
7884 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7885 isc_result_t tresult;
7886 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7888 if (tresult == ISC_R_SUCCESS) {
7889 rbtdbiter->new_origin = ISC_TRUE;
7890 reference_iter_node(rbtdbiter);
7893 rbtdbiter->node = NULL;
7896 rbtdbiter->node = NULL;
7898 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7899 ISC_R_SUCCESS : result;
7906 dbiterator_prev(dns_dbiterator_t *iterator) {
7907 isc_result_t result;
7908 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7909 dns_name_t *name, *origin;
7910 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7912 REQUIRE(rbtdbiter->node != NULL);
7914 if (rbtdbiter->result != ISC_R_SUCCESS)
7915 return (rbtdbiter->result);
7917 if (rbtdbiter->paused)
7918 resume_iteration(rbtdbiter);
7920 name = dns_fixedname_name(&rbtdbiter->name);
7921 origin = dns_fixedname_name(&rbtdbiter->origin);
7922 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7923 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7924 !rbtdbiter->nonsec3 &&
7925 &rbtdbiter->nsec3chain == rbtdbiter->current) {
7926 rbtdbiter->current = &rbtdbiter->chain;
7927 dns_rbtnodechain_reset(rbtdbiter->current);
7928 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7930 if (result == ISC_R_NOTFOUND)
7931 result = ISC_R_NOMORE;
7934 dereference_iter_node(rbtdbiter);
7936 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7937 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7938 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7939 NULL, &rbtdbiter->node);
7942 if (result == ISC_R_SUCCESS)
7943 reference_iter_node(rbtdbiter);
7945 rbtdbiter->result = result;
7951 dbiterator_next(dns_dbiterator_t *iterator) {
7952 isc_result_t result;
7953 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7954 dns_name_t *name, *origin;
7955 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7957 REQUIRE(rbtdbiter->node != NULL);
7959 if (rbtdbiter->result != ISC_R_SUCCESS)
7960 return (rbtdbiter->result);
7962 if (rbtdbiter->paused)
7963 resume_iteration(rbtdbiter);
7965 name = dns_fixedname_name(&rbtdbiter->name);
7966 origin = dns_fixedname_name(&rbtdbiter->origin);
7967 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
7968 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7969 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
7970 rbtdbiter->current = &rbtdbiter->nsec3chain;
7971 dns_rbtnodechain_reset(rbtdbiter->current);
7972 result = dns_rbtnodechain_first(rbtdbiter->current,
7973 rbtdb->nsec3, name, origin);
7974 if (result == ISC_R_NOTFOUND)
7975 result = ISC_R_NOMORE;
7978 dereference_iter_node(rbtdbiter);
7980 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7981 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7982 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7983 NULL, &rbtdbiter->node);
7985 if (result == ISC_R_SUCCESS)
7986 reference_iter_node(rbtdbiter);
7988 rbtdbiter->result = result;
7994 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
7997 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7998 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7999 dns_rbtnode_t *node = rbtdbiter->node;
8000 isc_result_t result;
8001 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8002 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8004 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8005 REQUIRE(rbtdbiter->node != NULL);
8007 if (rbtdbiter->paused)
8008 resume_iteration(rbtdbiter);
8011 if (rbtdbiter->common.relative_names)
8013 result = dns_name_concatenate(nodename, origin, name, NULL);
8014 if (result != ISC_R_SUCCESS)
8016 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8017 result = DNS_R_NEWORIGIN;
8019 result = ISC_R_SUCCESS;
8021 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8022 new_reference(rbtdb, node);
8023 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8025 *nodep = rbtdbiter->node;
8027 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8028 isc_result_t expire_result;
8031 * If the deletion array is full, flush it before trying
8032 * to expire the current node. The current node can't
8033 * fully deleted while the iteration cursor is still on it.
8035 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8036 flush_deletions(rbtdbiter);
8038 expire_result = expirenode(iterator->db, *nodep, 0);
8041 * expirenode() currently always returns success.
8043 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8046 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8047 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8048 dns_rbtnode_refincrement(node, &refs);
8050 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8058 dbiterator_pause(dns_dbiterator_t *iterator) {
8059 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8060 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8062 if (rbtdbiter->result != ISC_R_SUCCESS &&
8063 rbtdbiter->result != ISC_R_NOMORE)
8064 return (rbtdbiter->result);
8066 if (rbtdbiter->paused)
8067 return (ISC_R_SUCCESS);
8069 rbtdbiter->paused = ISC_TRUE;
8071 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8072 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8073 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8074 rbtdbiter->tree_locked = isc_rwlocktype_none;
8077 flush_deletions(rbtdbiter);
8079 return (ISC_R_SUCCESS);
8083 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8084 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8085 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8087 if (rbtdbiter->result != ISC_R_SUCCESS)
8088 return (rbtdbiter->result);
8090 return (dns_name_copy(origin, name, NULL));
8094 * Additional cache routines.
8097 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8098 dns_rdatatype_t qtype, dns_acache_t *acache,
8099 dns_zone_t **zonep, dns_db_t **dbp,
8100 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8101 dns_name_t *fname, dns_message_t *msg,
8104 dns_rbtdb_t *rbtdb = rdataset->private1;
8105 dns_rbtnode_t *rbtnode = rdataset->private2;
8106 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8107 unsigned int current_count = rdataset->privateuint4;
8109 rdatasetheader_t *header;
8110 nodelock_t *nodelock;
8111 unsigned int total_count;
8112 acachectl_t *acarray;
8113 dns_acacheentry_t *entry;
8114 isc_result_t result;
8116 UNUSED(qtype); /* we do not use this value at least for now */
8119 header = (struct rdatasetheader *)(raw - sizeof(*header));
8121 total_count = raw[0] * 256 + raw[1];
8122 INSIST(total_count > current_count);
8123 count = total_count - current_count - 1;
8127 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8128 NODE_LOCK(nodelock, isc_rwlocktype_read);
8131 case dns_rdatasetadditional_fromauth:
8132 acarray = header->additional_auth;
8134 case dns_rdatasetadditional_fromcache:
8137 case dns_rdatasetadditional_fromglue:
8138 acarray = header->additional_glue;
8144 if (acarray == NULL) {
8145 if (type != dns_rdatasetadditional_fromcache)
8146 dns_acache_countquerymiss(acache);
8147 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8148 return (ISC_R_NOTFOUND);
8151 if (acarray[count].entry == NULL) {
8152 dns_acache_countquerymiss(acache);
8153 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8154 return (ISC_R_NOTFOUND);
8158 dns_acache_attachentry(acarray[count].entry, &entry);
8160 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8162 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8163 nodep, fname, msg, now);
8165 dns_acache_detachentry(&entry);
8171 acache_callback(dns_acacheentry_t *entry, void **arg) {
8173 dns_rbtnode_t *rbtnode;
8174 nodelock_t *nodelock;
8175 acachectl_t *acarray = NULL;
8176 acache_cbarg_t *cbarg;
8179 REQUIRE(arg != NULL);
8183 * The caller must hold the entry lock.
8186 rbtdb = (dns_rbtdb_t *)cbarg->db;
8187 rbtnode = (dns_rbtnode_t *)cbarg->node;
8189 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8190 NODE_LOCK(nodelock, isc_rwlocktype_write);
8192 switch (cbarg->type) {
8193 case dns_rdatasetadditional_fromauth:
8194 acarray = cbarg->header->additional_auth;
8196 case dns_rdatasetadditional_fromglue:
8197 acarray = cbarg->header->additional_glue;
8203 count = cbarg->count;
8204 if (acarray != NULL && acarray[count].entry == entry) {
8205 acarray[count].entry = NULL;
8206 INSIST(acarray[count].cbarg == cbarg);
8207 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8208 acarray[count].cbarg = NULL;
8210 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8212 dns_acache_detachentry(&entry);
8214 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8216 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8217 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8223 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8224 acache_cbarg_t **cbargp)
8226 acache_cbarg_t *cbarg;
8228 REQUIRE(mctx != NULL);
8229 REQUIRE(entry != NULL);
8230 REQUIRE(cbargp != NULL && *cbargp != NULL);
8234 dns_acache_cancelentry(entry);
8235 dns_db_detachnode(cbarg->db, &cbarg->node);
8236 dns_db_detach(&cbarg->db);
8238 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8244 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8245 dns_rdatatype_t qtype, dns_acache_t *acache,
8246 dns_zone_t *zone, dns_db_t *db,
8247 dns_dbversion_t *version, dns_dbnode_t *node,
8250 dns_rbtdb_t *rbtdb = rdataset->private1;
8251 dns_rbtnode_t *rbtnode = rdataset->private2;
8252 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8253 unsigned int current_count = rdataset->privateuint4;
8254 rdatasetheader_t *header;
8255 unsigned int total_count, count;
8256 nodelock_t *nodelock;
8257 isc_result_t result;
8258 acachectl_t *acarray;
8259 dns_acacheentry_t *newentry, *oldentry = NULL;
8260 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8264 if (type == dns_rdatasetadditional_fromcache)
8265 return (ISC_R_SUCCESS);
8267 header = (struct rdatasetheader *)(raw - sizeof(*header));
8269 total_count = raw[0] * 256 + raw[1];
8270 INSIST(total_count > current_count);
8271 count = total_count - current_count - 1; /* should be private data */
8273 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8274 if (newcbarg == NULL)
8275 return (ISC_R_NOMEMORY);
8276 newcbarg->type = type;
8277 newcbarg->count = count;
8278 newcbarg->header = header;
8279 newcbarg->db = NULL;
8280 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8281 newcbarg->node = NULL;
8282 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8285 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8286 acache_callback, newcbarg, &newentry);
8287 if (result != ISC_R_SUCCESS)
8289 /* Set cache data in the new entry. */
8290 result = dns_acache_setentry(acache, newentry, zone, db,
8291 version, node, fname);
8292 if (result != ISC_R_SUCCESS)
8295 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8296 NODE_LOCK(nodelock, isc_rwlocktype_write);
8300 case dns_rdatasetadditional_fromauth:
8301 acarray = header->additional_auth;
8303 case dns_rdatasetadditional_fromglue:
8304 acarray = header->additional_glue;
8310 if (acarray == NULL) {
8313 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8314 sizeof(acachectl_t));
8316 if (acarray == NULL) {
8317 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8321 for (i = 0; i < total_count; i++) {
8322 acarray[i].entry = NULL;
8323 acarray[i].cbarg = NULL;
8327 case dns_rdatasetadditional_fromauth:
8328 header->additional_auth = acarray;
8330 case dns_rdatasetadditional_fromglue:
8331 header->additional_glue = acarray;
8337 if (acarray[count].entry != NULL) {
8339 * Swap the entry. Delay cleaning-up the old entry since
8340 * it would require a node lock.
8342 oldentry = acarray[count].entry;
8343 INSIST(acarray[count].cbarg != NULL);
8344 oldcbarg = acarray[count].cbarg;
8346 acarray[count].entry = newentry;
8347 acarray[count].cbarg = newcbarg;
8349 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8351 if (oldentry != NULL) {
8352 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8353 dns_acache_detachentry(&oldentry);
8356 return (ISC_R_SUCCESS);
8359 if (newcbarg != NULL) {
8360 if (newentry != NULL) {
8361 acache_cancelentry(rbtdb->common.mctx, newentry,
8363 dns_acache_detachentry(&newentry);
8365 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8366 dns_db_detach(&newcbarg->db);
8367 isc_mem_put(rbtdb->common.mctx, newcbarg,
8376 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8377 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8379 dns_rbtdb_t *rbtdb = rdataset->private1;
8380 dns_rbtnode_t *rbtnode = rdataset->private2;
8381 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8382 unsigned int current_count = rdataset->privateuint4;
8383 rdatasetheader_t *header;
8384 nodelock_t *nodelock;
8385 unsigned int total_count, count;
8386 acachectl_t *acarray;
8387 dns_acacheentry_t *entry;
8388 acache_cbarg_t *cbarg;
8390 UNUSED(qtype); /* we do not use this value at least for now */
8393 if (type == dns_rdatasetadditional_fromcache)
8394 return (ISC_R_SUCCESS);
8396 header = (struct rdatasetheader *)(raw - sizeof(*header));
8398 total_count = raw[0] * 256 + raw[1];
8399 INSIST(total_count > current_count);
8400 count = total_count - current_count - 1;
8405 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8406 NODE_LOCK(nodelock, isc_rwlocktype_write);
8409 case dns_rdatasetadditional_fromauth:
8410 acarray = header->additional_auth;
8412 case dns_rdatasetadditional_fromglue:
8413 acarray = header->additional_glue;
8419 if (acarray == NULL) {
8420 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8421 return (ISC_R_NOTFOUND);
8424 entry = acarray[count].entry;
8425 if (entry == NULL) {
8426 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8427 return (ISC_R_NOTFOUND);
8430 acarray[count].entry = NULL;
8431 cbarg = acarray[count].cbarg;
8432 acarray[count].cbarg = NULL;
8434 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8436 if (entry != NULL) {
8438 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8439 dns_acache_detachentry(&entry);
8442 return (ISC_R_SUCCESS);
8446 * Routines for LRU-based cache management.
8450 * See if a given cache entry that is being reused needs to be updated
8451 * in the LRU-list. From the LRU management point of view, this function is
8452 * expected to return true for almost all cases. When used with threads,
8453 * however, this may cause a non-negligible performance penalty because a
8454 * writer lock will have to be acquired before updating the list.
8455 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8456 * function returns true if the entry has not been updated for some period of
8457 * time. We differentiate the NS or glue address case and the others since
8458 * experiments have shown that the former tends to be accessed relatively
8459 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8460 * may cause external queries at a higher level zone, involving more
8463 * Caller must hold the node (read or write) lock.
8465 static inline isc_boolean_t
8466 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8467 if ((header->attributes &
8468 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8471 #if DNS_RBTDB_LIMITLRUUPDATE
8472 if (header->type == dns_rdatatype_ns ||
8473 (header->trust == dns_trust_glue &&
8474 (header->type == dns_rdatatype_a ||
8475 header->type == dns_rdatatype_aaaa))) {
8477 * Glue records are updated if at least 60 seconds have passed
8478 * since the previous update time.
8480 return (header->last_used + 60 <= now);
8483 /* Other records are updated if 5 minutes have passed. */
8484 return (header->last_used + 300 <= now);
8493 * Update the timestamp of a given cache entry and move it to the head
8494 * of the corresponding LRU list.
8496 * Caller must hold the node (write) lock.
8498 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8501 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8504 INSIST(IS_CACHE(rbtdb));
8506 /* To be checked: can we really assume this? XXXMLG */
8507 INSIST(ISC_LINK_LINKED(header, link));
8509 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8510 header->last_used = now;
8511 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8515 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8516 * under an overmem condition. To recover from this condition quickly, up to
8517 * 2 entries will be purged. This process is triggered while adding a new
8518 * entry, and we specifically avoid purging entries in the same LRU bucket as
8519 * the one to which the new entry will belong. Otherwise, we might purge
8520 * entries of the same name of different RR types while adding RRsets from a
8521 * single response (consider the case where we're adding A and AAAA glue records
8522 * of the same NS name).
8525 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8526 isc_stdtime_t now, isc_boolean_t tree_locked)
8528 rdatasetheader_t *header, *header_prev;
8529 unsigned int locknum;
8532 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8533 locknum != locknum_start && purgecount > 0;
8534 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8535 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8536 isc_rwlocktype_write);
8538 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8539 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8540 expire_header(rbtdb, header, tree_locked);
8544 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8545 header != NULL && purgecount > 0;
8546 header = header_prev) {
8547 header_prev = ISC_LIST_PREV(header, link);
8549 * Unlink the entry at this point to avoid checking it
8550 * again even if it's currently used someone else and
8551 * cannot be purged at this moment. This entry won't be
8552 * referenced any more (so unlinking is safe) since the
8553 * TTL was reset to 0.
8555 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8557 expire_header(rbtdb, header, tree_locked);
8561 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8562 isc_rwlocktype_write);
8567 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8568 isc_boolean_t tree_locked)
8570 set_ttl(rbtdb, header, 0);
8571 header->attributes |= RDATASET_ATTR_STALE;
8572 header->node->dirty = 1;
8575 * Caller must hold the node (write) lock.
8578 if (dns_rbtnode_refcurrent(header->node) == 0) {
8580 * If no one else is using the node, we can clean it up now.
8581 * We first need to gain a new reference to the node to meet a
8582 * requirement of decrement_reference().
8584 new_reference(rbtdb, header->node);
8585 decrement_reference(rbtdb, header->node, 0,
8586 isc_rwlocktype_write,
8587 tree_locked ? isc_rwlocktype_write :
8588 isc_rwlocktype_none, ISC_FALSE);