2 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.270.12.6 2009/05/06 23:34:30 jinmei Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
65 #include <dns/zonekey.h>
67 #ifdef DNS_RBTDB_VERSION64
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
80 * Note that "impmagic" is not the first four bytes of the struct, so
81 * ISC_MAGIC_VALID cannot be used.
83 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
84 (rbtdb)->common.impmagic == RBTDB_MAGIC)
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t rbtdb_serial_t;
89 * Make casting easier in symbolic debuggers by using different names
90 * for the 64 bit version.
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
96 typedef isc_uint32_t rbtdb_serial_t;
99 typedef isc_uint32_t rbtdb_rdatatype_t;
101 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
105 #define RBTDB_RDATATYPE_SIGNSEC \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
119 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120 * Using rwlock is effective with regard to lookup performance only when
121 * it is implemented in an efficient way.
122 * Otherwise, it is generally wise to stick to the simple locking since rwlock
123 * would require more memory or can even make lookups slower due to its own
124 * overhead (when it internally calls mutex locks).
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
129 #define DNS_RBTDB_USERWLOCK 0
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
138 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t) LOCK(l)
141 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
145 * Since node locking is sensitive to both performance and memory footprint,
146 * we need some trick here. If we have both high-performance rwlock and
147 * high performance and small-memory reference counters, we use rwlock for
148 * node lock and isc_refcount for node references. In this case, we don't have
149 * to protect the access to the counters by locks.
150 * Otherwise, we simply use ordinary mutex lock for node locking, and use
151 * simple integers as reference counters which is protected by the lock.
152 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153 * NODE_UNLOCK. In some other cases, however, we need to protect reference
154 * counters first and then protect other parts of a node as read-only data.
155 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156 * provided for these special cases. When we can use the efficient backend
157 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159 * section including the access to the reference counter.
160 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161 * section is also protected by NODE_STRONGLOCK().
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
166 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t) RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
172 #define NODE_STRONGLOCK(l) ((void)0)
173 #define NODE_STRONGUNLOCK(l) ((void)0)
174 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
178 typedef isc_mutex_t nodelock_t;
180 #define NODE_INITLOCK(l) isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
182 #define NODE_LOCK(l, t) LOCK(l)
183 #define NODE_UNLOCK(l, t) UNLOCK(l)
184 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
186 #define NODE_STRONGLOCK(l) LOCK(l)
187 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t) ((void)0)
189 #define NODE_WEAKUNLOCK(l, t) ((void)0)
190 #define NODE_WEAKDOWNGRADE(l) ((void)0)
194 * Whether to rate-limit updating the LRU to avoid possible thread contention.
195 * Our performance measurement has shown the cost is marginal, so it's defined
196 * to be 0 by default either with or without threads.
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 * Allow clients with a virtual time of up to 5 minutes in the past to see
204 * records that would have otherwise have expired.
206 #define RBTDB_VIRTUAL 300
212 dns_rdatatype_t type;
215 typedef struct acachectl acachectl_t;
217 typedef struct rdatasetheader {
219 * Locked by the owning node's lock.
221 rbtdb_serial_t serial;
223 rbtdb_rdatatype_t type;
224 isc_uint16_t attributes;
226 struct noqname *noqname;
227 struct noqname *closest;
229 * We don't use the LIST macros, because the LIST structure has
230 * both head and tail pointers, and is doubly linked.
233 struct rdatasetheader *next;
235 * If this is the top header for an rdataset, 'next' points
236 * to the top header for the next rdataset (i.e., the next type).
237 * Otherwise, it points up to the header whose down pointer points
241 struct rdatasetheader *down;
243 * Points to the header for the next older version of
249 * Monotonously increased every time this rdataset is bound so that
250 * it is used as the base of the starting point in DNS responses
251 * when the "cyclic" rrset-order is required. Since the ordering
252 * should not be so crucial, no lock is set for the counter for
253 * performance reasons.
256 acachectl_t *additional_auth;
257 acachectl_t *additional_glue;
260 isc_stdtime_t last_used;
261 ISC_LINK(struct rdatasetheader) lru_link;
263 * Used for LRU-based cache management. We should probably make
264 * these cache-DB specific. We might also make it a pointer and
265 * ensure only the top header has a valid link to save memory.
266 * The linked-list is locked by the rbtdb->lrulock.
270 * It's possible this should not be here anymore, but instead
271 * referenced from the bucket's heap directly.
276 unsigned int heap_index;
278 * Used for TTL-based cache cleaning.
280 isc_stdtime_t resign;
283 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
284 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
286 #define RDATASET_ATTR_NONEXISTENT 0x0001
287 #define RDATASET_ATTR_STALE 0x0002
288 #define RDATASET_ATTR_IGNORE 0x0004
289 #define RDATASET_ATTR_RETAIN 0x0008
290 #define RDATASET_ATTR_NXDOMAIN 0x0010
291 #define RDATASET_ATTR_RESIGN 0x0020
292 #define RDATASET_ATTR_STATCOUNT 0x0040
293 #define RDATASET_ATTR_OPTOUT 0x0080
295 typedef struct acache_cbarg {
296 dns_rdatasetadditional_t type;
300 rdatasetheader_t *header;
304 dns_acacheentry_t *entry;
305 acache_cbarg_t *cbarg;
310 * When the cache will pre-expire data (due to memory low or other
311 * situations) before the rdataset's TTL has expired, it MUST
312 * respect the RETAIN bit and not expire the data until its TTL is
316 #undef IGNORE /* WIN32 winbase.h defines this. */
318 #define EXISTS(header) \
319 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
320 #define NONEXISTENT(header) \
321 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
322 #define IGNORE(header) \
323 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
324 #define RETAIN(header) \
325 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
326 #define NXDOMAIN(header) \
327 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
328 #define RESIGN(header) \
329 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
330 #define OPTOUT(header) \
331 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
333 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
336 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
337 * There is a tradeoff issue about configuring this value: if this is too
338 * small, it may cause heavier contention between threads; if this is too large,
339 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
340 * The default value should work well for most environments, but this can
341 * also be configurable at compilation time via the
342 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
343 * 1 due to the assumption of overmem_purge().
345 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
346 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
347 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
349 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
352 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
353 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
357 /* Protected in the refcount routines. */
358 isc_refcount_t references;
359 /* Locked by lock. */
360 isc_boolean_t exiting;
363 typedef struct rbtdb_changed {
364 dns_rbtnode_t * node;
366 ISC_LINK(struct rbtdb_changed) link;
369 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
377 typedef struct rbtdb_version {
379 rbtdb_serial_t serial;
381 * Protected in the refcount routines.
382 * XXXJT: should we change the lock policy based on the refcount
385 isc_refcount_t references;
386 /* Locked by database lock. */
387 isc_boolean_t writer;
388 isc_boolean_t commit_ok;
389 rbtdb_changedlist_t changed_list;
390 rdatasetheaderlist_t resigned_list;
391 ISC_LINK(struct rbtdb_version) link;
392 dns_db_secure_t secure;
393 isc_boolean_t havensec3;
394 /* NSEC3 parameters */
397 isc_uint16_t iterations;
398 isc_uint8_t salt_length;
399 unsigned char salt[NSEC3_MAX_HASH_LENGTH];
402 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
407 #if DNS_RBTDB_USERWLOCK
412 isc_rwlock_t tree_lock;
413 unsigned int node_lock_count;
414 rbtdb_nodelock_t * node_locks;
415 dns_rbtnode_t * origin_node;
416 dns_stats_t * rrsetstats; /* cache DB only */
417 /* Locked by lock. */
419 isc_refcount_t references;
420 unsigned int attributes;
421 rbtdb_serial_t current_serial;
422 rbtdb_serial_t least_serial;
423 rbtdb_serial_t next_serial;
424 rbtdb_version_t * current_version;
425 rbtdb_version_t * future_version;
426 rbtdb_versionlist_t open_versions;
427 isc_boolean_t overmem;
429 dns_dbnode_t *soanode;
430 dns_dbnode_t *nsnode;
433 * This is a linked list used to implement the LRU cache. There will
434 * be node_lock_count linked lists here. Nodes in bucket 1 will be
435 * placed on the linked list rdatasets[1].
437 rdatasetheaderlist_t *rdatasets;
440 * Temporary storage for stale cache nodes and dynamically deleted
441 * nodes that await being cleaned up.
443 rbtnodelist_t *deadnodes;
446 * Heaps. Each of these is used for TTL based expiry.
450 /* Locked by tree_lock. */
455 unsigned int quantum;
458 #define RBTDB_ATTR_LOADED 0x01
459 #define RBTDB_ATTR_LOADING 0x02
466 rbtdb_version_t * rbtversion;
467 rbtdb_serial_t serial;
468 unsigned int options;
469 dns_rbtnodechain_t chain;
470 isc_boolean_t copy_name;
471 isc_boolean_t need_cleanup;
473 dns_rbtnode_t * zonecut;
474 rdatasetheader_t * zonecut_rdataset;
475 rdatasetheader_t * zonecut_sigrdataset;
476 dns_fixedname_t zonecut_name;
488 static void rdataset_disassociate(dns_rdataset_t *rdataset);
489 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
490 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
491 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
492 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
493 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
494 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
497 dns_rdataset_t *negsig);
498 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
501 dns_rdataset_t *negsig);
502 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
503 dns_rdatasetadditional_t type,
504 dns_rdatatype_t qtype,
505 dns_acache_t *acache,
508 dns_dbversion_t **versionp,
509 dns_dbnode_t **nodep,
513 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
514 dns_rdatasetadditional_t type,
515 dns_rdatatype_t qtype,
516 dns_acache_t *acache,
519 dns_dbversion_t *version,
522 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
523 dns_rdataset_t *rdataset,
524 dns_rdatasetadditional_t type,
525 dns_rdatatype_t qtype);
526 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
528 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
530 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
531 isc_boolean_t tree_locked);
532 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
533 isc_stdtime_t now, isc_boolean_t tree_locked);
534 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
535 rdatasetheader_t *newheader);
536 static void prune_tree(isc_task_t *task, isc_event_t *event);
538 static dns_rdatasetmethods_t rdataset_methods = {
539 rdataset_disassociate,
549 rdataset_getadditional,
550 rdataset_setadditional,
551 rdataset_putadditional
554 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
555 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
556 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
557 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
558 dns_rdataset_t *rdataset);
560 static dns_rdatasetitermethods_t rdatasetiter_methods = {
561 rdatasetiter_destroy,
567 typedef struct rbtdb_rdatasetiter {
568 dns_rdatasetiter_t common;
569 rdatasetheader_t * current;
570 } rbtdb_rdatasetiter_t;
572 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
573 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
574 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
575 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
577 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
578 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
579 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
580 dns_dbnode_t **nodep,
582 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
583 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
586 static dns_dbiteratormethods_t dbiterator_methods = {
598 #define DELETION_BATCH_MAX 64
601 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
603 typedef struct rbtdb_dbiterator {
604 dns_dbiterator_t common;
605 isc_boolean_t paused;
606 isc_boolean_t new_origin;
607 isc_rwlocktype_t tree_locked;
609 dns_fixedname_t name;
610 dns_fixedname_t origin;
611 dns_rbtnodechain_t chain;
612 dns_rbtnodechain_t nsec3chain;
613 dns_rbtnodechain_t *current;
615 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
617 isc_boolean_t nsec3only;
618 isc_boolean_t nonsec3;
619 } rbtdb_dbiterator_t;
622 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
623 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
625 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
627 static void overmem(dns_db_t *db, isc_boolean_t overmem);
628 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
629 isc_boolean_t *nsec3createflag);
632 * 'init_count' is used to initialize 'newheader->count' which inturn
633 * is used to determine where in the cycle rrset-order cyclic starts.
634 * We don't lock this as we don't care about simultaneous updates.
637 * Both init_count and header->count can be ISC_UINT32_MAX.
638 * The count on the returned rdataset however can't be as
639 * that indicates that the database does not implement cyclic
642 static unsigned int init_count;
647 * If a routine is going to lock more than one lock in this module, then
648 * the locking must be done in the following order:
652 * Node Lock (Only one from the set may be locked at one time by
657 * Failure to follow this hierarchy can result in deadlock.
663 * For zone databases the node for the origin of the zone MUST NOT be deleted.
672 attach(dns_db_t *source, dns_db_t **targetp) {
673 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
675 REQUIRE(VALID_RBTDB(rbtdb));
677 isc_refcount_increment(&rbtdb->references, NULL);
683 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
684 dns_rbtdb_t *rbtdb = event->ev_arg;
688 free_rbtdb(rbtdb, ISC_TRUE, event);
692 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
693 isc_boolean_t increment)
695 dns_rdatastatstype_t statattributes = 0;
696 dns_rdatastatstype_t base = 0;
697 dns_rdatastatstype_t type;
699 /* At the moment we count statistics only for cache DB */
700 INSIST(IS_CACHE(rbtdb));
702 if (NXDOMAIN(header))
703 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
704 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
705 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
706 base = RBTDB_RDATATYPE_EXT(header->type);
708 base = RBTDB_RDATATYPE_BASE(header->type);
710 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
712 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
714 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
718 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
723 oldttl = header->rdh_ttl;
724 header->rdh_ttl = newttl;
726 if (!IS_CACHE(rbtdb))
730 * It's possible the rbtdb is not a cache. If this is the case,
731 * we will not have a heap, and we move on. If we do, though,
732 * we might need to adjust things.
734 if (header->heap_index == 0 || newttl == oldttl)
736 idx = header->node->locknum;
737 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
739 heap = rbtdb->heaps[idx];
742 isc_heap_increased(heap, header->heap_index);
744 isc_heap_decreased(heap, header->heap_index);
748 * These functions allow the heap code to rank the priority of each
749 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
752 ttl_sooner(void *v1, void *v2) {
753 rdatasetheader_t *h1 = v1;
754 rdatasetheader_t *h2 = v2;
756 if (h1->rdh_ttl < h2->rdh_ttl)
762 resign_sooner(void *v1, void *v2) {
763 rdatasetheader_t *h1 = v1;
764 rdatasetheader_t *h2 = v2;
766 if (h1->resign < h2->resign)
772 * This function sets the heap index into the header.
775 set_index(void *what, unsigned int index) {
776 rdatasetheader_t *h = what;
778 h->heap_index = index;
782 * Work out how many nodes can be deleted in the time between two
783 * requests to the nameserver. Smooth the resulting number and use it
784 * as a estimate for the number of nodes to be deleted in the next
788 adjust_quantum(unsigned int old, isc_time_t *start) {
789 unsigned int pps = dns_pps; /* packets per second */
790 unsigned int interval;
799 interval = 1000000 / pps; /* interval in usec */
802 usecs = isc_time_microdiff(&end, start);
805 * We were unable to measure the amount of time taken.
806 * Double the nodes deleted next time.
813 new = old * interval;
814 new /= (unsigned int)usecs;
821 new = (new + old * 3) / 4;
823 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
824 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
830 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
832 isc_ondestroy_t ondest;
834 char buf[DNS_NAME_FORMATSIZE];
837 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
838 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
840 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
841 REQUIRE(rbtdb->future_version == NULL);
843 if (rbtdb->current_version != NULL) {
846 isc_refcount_decrement(&rbtdb->current_version->references,
849 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
850 isc_refcount_destroy(&rbtdb->current_version->references);
851 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
852 sizeof(rbtdb_version_t));
856 * We assume the number of remaining dead nodes is reasonably small;
857 * the overhead of unlinking all nodes here should be negligible.
859 for (i = 0; i < rbtdb->node_lock_count; i++) {
862 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
863 while (node != NULL) {
864 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
865 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
870 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
872 if (rbtdb->tree != NULL) {
873 isc_time_now(&start);
874 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
875 if (result == ISC_R_QUOTA) {
876 INSIST(rbtdb->task != NULL);
877 if (rbtdb->quantum != 0)
878 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
881 event = isc_event_allocate(rbtdb->common.mctx,
883 DNS_EVENT_FREESTORAGE,
886 sizeof(isc_event_t));
889 isc_task_send(rbtdb->task, &event);
892 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
895 if (rbtdb->nsec3 != NULL) {
896 isc_time_now(&start);
897 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
898 if (result == ISC_R_QUOTA) {
899 INSIST(rbtdb->task != NULL);
900 if (rbtdb->quantum != 0)
901 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
904 event = isc_event_allocate(rbtdb->common.mctx,
906 DNS_EVENT_FREESTORAGE,
909 sizeof(isc_event_t));
912 isc_task_send(rbtdb->task, &event);
915 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
919 isc_event_free(&event);
921 if (dns_name_dynamic(&rbtdb->common.origin))
922 dns_name_format(&rbtdb->common.origin, buf,
925 strcpy(buf, "<UNKNOWN>");
926 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
927 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
928 "done free_rbtdb(%s)", buf);
930 if (dns_name_dynamic(&rbtdb->common.origin))
931 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
932 for (i = 0; i < rbtdb->node_lock_count; i++) {
933 isc_refcount_destroy(&rbtdb->node_locks[i].references);
934 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
938 * Clean up LRU / re-signing order lists.
940 if (rbtdb->rdatasets != NULL) {
941 for (i = 0; i < rbtdb->node_lock_count; i++)
942 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
943 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
944 rbtdb->node_lock_count *
945 sizeof(rdatasetheaderlist_t));
948 * Clean up dead node buckets.
950 if (rbtdb->deadnodes != NULL) {
951 for (i = 0; i < rbtdb->node_lock_count; i++)
952 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
953 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
954 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
957 * Clean up heap objects.
959 if (rbtdb->heaps != NULL) {
960 for (i = 0; i < rbtdb->node_lock_count; i++)
961 isc_heap_destroy(&rbtdb->heaps[i]);
962 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
963 rbtdb->node_lock_count *
964 sizeof(isc_heap_t *));
967 if (rbtdb->rrsetstats != NULL)
968 dns_stats_detach(&rbtdb->rrsetstats);
970 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
971 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
972 isc_rwlock_destroy(&rbtdb->tree_lock);
973 isc_refcount_destroy(&rbtdb->references);
974 if (rbtdb->task != NULL)
975 isc_task_detach(&rbtdb->task);
977 RBTDB_DESTROYLOCK(&rbtdb->lock);
978 rbtdb->common.magic = 0;
979 rbtdb->common.impmagic = 0;
980 ondest = rbtdb->common.ondest;
981 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
982 isc_ondestroy_notify(&ondest, rbtdb);
986 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
987 isc_boolean_t want_free = ISC_FALSE;
989 unsigned int inactive = 0;
991 /* XXX check for open versions here */
993 if (rbtdb->soanode != NULL)
994 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
995 if (rbtdb->nsnode != NULL)
996 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
999 * Even though there are no external direct references, there still
1000 * may be nodes in use.
1002 for (i = 0; i < rbtdb->node_lock_count; i++) {
1003 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1004 rbtdb->node_locks[i].exiting = ISC_TRUE;
1005 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1006 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1012 if (inactive != 0) {
1013 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1014 rbtdb->active -= inactive;
1015 if (rbtdb->active == 0)
1016 want_free = ISC_TRUE;
1017 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1019 char buf[DNS_NAME_FORMATSIZE];
1020 if (dns_name_dynamic(&rbtdb->common.origin))
1021 dns_name_format(&rbtdb->common.origin, buf,
1024 strcpy(buf, "<UNKNOWN>");
1025 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1026 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1027 "calling free_rbtdb(%s)", buf);
1028 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1034 detach(dns_db_t **dbp) {
1035 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1038 REQUIRE(VALID_RBTDB(rbtdb));
1040 isc_refcount_decrement(&rbtdb->references, &refs);
1043 maybe_free_rbtdb(rbtdb);
1049 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1050 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1051 rbtdb_version_t *version;
1054 REQUIRE(VALID_RBTDB(rbtdb));
1056 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1057 version = rbtdb->current_version;
1058 isc_refcount_increment(&version->references, &refs);
1059 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1061 *versionp = (dns_dbversion_t *)version;
1064 static inline rbtdb_version_t *
1065 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1066 unsigned int references, isc_boolean_t writer)
1068 isc_result_t result;
1069 rbtdb_version_t *version;
1071 version = isc_mem_get(mctx, sizeof(*version));
1072 if (version == NULL)
1074 version->serial = serial;
1075 result = isc_refcount_init(&version->references, references);
1076 if (result != ISC_R_SUCCESS) {
1077 isc_mem_put(mctx, version, sizeof(*version));
1080 version->writer = writer;
1081 version->commit_ok = ISC_FALSE;
1082 ISC_LIST_INIT(version->changed_list);
1083 ISC_LIST_INIT(version->resigned_list);
1084 ISC_LINK_INIT(version, link);
1090 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1091 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1092 rbtdb_version_t *version;
1094 REQUIRE(VALID_RBTDB(rbtdb));
1095 REQUIRE(versionp != NULL && *versionp == NULL);
1096 REQUIRE(rbtdb->future_version == NULL);
1098 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1099 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1100 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1102 if (version != NULL) {
1103 version->commit_ok = ISC_TRUE;
1104 version->secure = rbtdb->current_version->secure;
1105 version->havensec3 = rbtdb->current_version->havensec3;
1106 if (version->havensec3) {
1107 version->flags = rbtdb->current_version->flags;
1108 version->iterations =
1109 rbtdb->current_version->iterations;
1110 version->hash = rbtdb->current_version->hash;
1111 version->salt_length =
1112 rbtdb->current_version->salt_length;
1113 memcpy(version->salt, rbtdb->current_version->salt,
1114 version->salt_length);
1117 version->iterations = 0;
1119 version->salt_length = 0;
1120 memset(version->salt, 0, sizeof(version->salt));
1122 rbtdb->next_serial++;
1123 rbtdb->future_version = version;
1125 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1127 if (version == NULL)
1128 return (ISC_R_NOMEMORY);
1130 *versionp = version;
1132 return (ISC_R_SUCCESS);
1136 attachversion(dns_db_t *db, dns_dbversion_t *source,
1137 dns_dbversion_t **targetp)
1139 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1140 rbtdb_version_t *rbtversion = source;
1143 REQUIRE(VALID_RBTDB(rbtdb));
1145 isc_refcount_increment(&rbtversion->references, &refs);
1148 *targetp = rbtversion;
1151 static rbtdb_changed_t *
1152 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1153 dns_rbtnode_t *node)
1155 rbtdb_changed_t *changed;
1159 * Caller must be holding the node lock if its reference must be
1160 * protected by the lock.
1163 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1165 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1167 REQUIRE(version->writer);
1169 if (changed != NULL) {
1170 dns_rbtnode_refincrement(node, &refs);
1172 changed->node = node;
1173 changed->dirty = ISC_FALSE;
1174 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1176 version->commit_ok = ISC_FALSE;
1178 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1184 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1189 unsigned char *raw; /* RDATASLAB */
1192 * The caller must be holding the corresponding node lock.
1198 raw = (unsigned char *)header + sizeof(*header);
1199 count = raw[0] * 256 + raw[1];
1202 * Sanity check: since an additional cache entry has a reference to
1203 * the original DB node (in the callback arg), there should be no
1204 * acache entries when the node can be freed.
1206 for (i = 0; i < count; i++)
1207 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1209 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1213 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1215 if (dns_name_dynamic(&(*noqname)->name))
1216 dns_name_free(&(*noqname)->name, mctx);
1217 if ((*noqname)->neg != NULL)
1218 isc_mem_put(mctx, (*noqname)->neg,
1219 dns_rdataslab_size((*noqname)->neg, 0));
1220 if ((*noqname)->negsig != NULL)
1221 isc_mem_put(mctx, (*noqname)->negsig,
1222 dns_rdataslab_size((*noqname)->negsig, 0));
1223 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1228 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1230 ISC_LINK_INIT(h, lru_link);
1234 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1235 fprintf(stderr, "initialized header: %p\n", h);
1241 static inline rdatasetheader_t *
1242 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1244 rdatasetheader_t *h;
1246 h = isc_mem_get(mctx, sizeof(*h));
1251 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1252 fprintf(stderr, "allocated header: %p\n", h);
1254 init_rdataset(rbtdb, h);
1259 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1264 if (EXISTS(rdataset) &&
1265 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1266 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1269 idx = rdataset->node->locknum;
1270 if (ISC_LINK_LINKED(rdataset, lru_link))
1271 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, lru_link);
1272 if (rdataset->heap_index != 0)
1273 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1274 rdataset->heap_index = 0;
1276 if (rdataset->noqname != NULL)
1277 free_noqname(mctx, &rdataset->noqname);
1278 if (rdataset->closest != NULL)
1279 free_noqname(mctx, &rdataset->closest);
1281 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1282 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1284 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1285 size = sizeof(*rdataset);
1287 size = dns_rdataslab_size((unsigned char *)rdataset,
1289 isc_mem_put(mctx, rdataset, size);
1293 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1294 rdatasetheader_t *header, *dcurrent;
1295 isc_boolean_t make_dirty = ISC_FALSE;
1298 * Caller must hold the node lock.
1302 * We set the IGNORE attribute on rdatasets with serial number
1303 * 'serial'. When the reference count goes to zero, these rdatasets
1304 * will be cleaned up; until that time, they will be ignored.
1306 for (header = node->data; header != NULL; header = header->next) {
1307 if (header->serial == serial) {
1308 header->attributes |= RDATASET_ATTR_IGNORE;
1309 make_dirty = ISC_TRUE;
1311 for (dcurrent = header->down;
1313 dcurrent = dcurrent->down) {
1314 if (dcurrent->serial == serial) {
1315 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1316 make_dirty = ISC_TRUE;
1325 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1327 rdatasetheader_t *d, *down_next;
1329 for (d = top->down; d != NULL; d = down_next) {
1330 down_next = d->down;
1331 free_rdataset(rbtdb, mctx, d);
1337 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1338 rdatasetheader_t *current, *top_prev, *top_next;
1339 isc_mem_t *mctx = rbtdb->common.mctx;
1342 * Caller must be holding the node lock.
1346 for (current = node->data; current != NULL; current = top_next) {
1347 top_next = current->next;
1348 clean_stale_headers(rbtdb, mctx, current);
1350 * If current is nonexistent or stale, we can clean it up.
1352 if ((current->attributes &
1353 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1354 if (top_prev != NULL)
1355 top_prev->next = current->next;
1357 node->data = current->next;
1358 free_rdataset(rbtdb, mctx, current);
1366 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1367 rbtdb_serial_t least_serial)
1369 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1370 rdatasetheader_t *top_prev, *top_next;
1371 isc_mem_t *mctx = rbtdb->common.mctx;
1372 isc_boolean_t still_dirty = ISC_FALSE;
1375 * Caller must be holding the node lock.
1377 REQUIRE(least_serial != 0);
1380 for (current = node->data; current != NULL; current = top_next) {
1381 top_next = current->next;
1384 * First, we clean up any instances of multiple rdatasets
1385 * with the same serial number, or that have the IGNORE
1389 for (dcurrent = current->down;
1391 dcurrent = down_next) {
1392 down_next = dcurrent->down;
1393 INSIST(dcurrent->serial <= dparent->serial);
1394 if (dcurrent->serial == dparent->serial ||
1396 if (down_next != NULL)
1397 down_next->next = dparent;
1398 dparent->down = down_next;
1399 free_rdataset(rbtdb, mctx, dcurrent);
1405 * We've now eliminated all IGNORE datasets with the possible
1406 * exception of current, which we now check.
1408 if (IGNORE(current)) {
1409 down_next = current->down;
1410 if (down_next == NULL) {
1411 if (top_prev != NULL)
1412 top_prev->next = current->next;
1414 node->data = current->next;
1415 free_rdataset(rbtdb, mctx, current);
1417 * current no longer exists, so we can
1418 * just continue with the loop.
1423 * Pull up current->down, making it the new
1426 if (top_prev != NULL)
1427 top_prev->next = down_next;
1429 node->data = down_next;
1430 down_next->next = top_next;
1431 free_rdataset(rbtdb, mctx, current);
1432 current = down_next;
1437 * We now try to find the first down node less than the
1441 for (dcurrent = current->down;
1443 dcurrent = down_next) {
1444 down_next = dcurrent->down;
1445 if (dcurrent->serial < least_serial)
1451 * If there is a such an rdataset, delete it and any older
1454 if (dcurrent != NULL) {
1456 down_next = dcurrent->down;
1457 INSIST(dcurrent->serial <= least_serial);
1458 free_rdataset(rbtdb, mctx, dcurrent);
1459 dcurrent = down_next;
1460 } while (dcurrent != NULL);
1461 dparent->down = NULL;
1465 * Note. The serial number of 'current' might be less than
1466 * least_serial too, but we cannot delete it because it is
1467 * the most recent version, unless it is a NONEXISTENT
1470 if (current->down != NULL) {
1471 still_dirty = ISC_TRUE;
1475 * If this is a NONEXISTENT rdataset, we can delete it.
1477 if (NONEXISTENT(current)) {
1478 if (top_prev != NULL)
1479 top_prev->next = current->next;
1481 node->data = current->next;
1482 free_rdataset(rbtdb, mctx, current);
1492 * Clean up dead nodes. These are nodes which have no references, and
1493 * have no data. They are dead but we could not or chose not to delete
1494 * them when we deleted all the data at that node because we did not want
1495 * to wait for the tree write lock.
1497 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1500 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1501 dns_rbtnode_t *node;
1502 isc_result_t result;
1503 int count = 10; /* XXXJT: should be adjustable */
1505 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1506 while (node != NULL && count > 0) {
1507 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1510 * Since we're holding a tree write lock, it should be
1511 * impossible for this node to be referenced by others.
1513 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1514 node->data == NULL);
1516 INSIST(!ISC_LINK_LINKED(node, deadlink));
1518 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1521 result = dns_rbt_deletenode(rbtdb->tree, node,
1523 if (result != ISC_R_SUCCESS)
1524 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1525 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1526 "cleanup_dead_nodes: "
1527 "dns_rbt_deletenode: %s",
1528 isc_result_totext(result));
1529 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1535 * Caller must be holding the node lock if its reference must be protected
1539 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1540 unsigned int lockrefs, noderefs;
1541 isc_refcount_t *lockref;
1543 dns_rbtnode_refincrement0(node, &noderefs);
1544 if (noderefs == 1) { /* this is the first reference to the node */
1545 lockref = &rbtdb->node_locks[node->locknum].references;
1546 isc_refcount_increment0(lockref, &lockrefs);
1547 INSIST(lockrefs != 0);
1549 INSIST(noderefs != 0);
1553 * This function is assumed to be called when a node is newly referenced
1554 * and can be in the deadnode list. In that case the node must be retrieved
1555 * from the list because it is going to be used. In addition, if the caller
1556 * happens to hold a write lock on the tree, it's a good chance to purge dead
1558 * Note: while a new reference is gained in multiple places, there are only very
1559 * few cases where the node can be in the deadnode list (only empty nodes can
1560 * have been added to the list).
1563 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1564 isc_rwlocktype_t treelocktype)
1566 isc_boolean_t need_relock = ISC_FALSE;
1568 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1569 new_reference(rbtdb, node);
1571 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1572 isc_rwlocktype_read);
1573 if (ISC_LINK_LINKED(node, deadlink))
1574 need_relock = ISC_TRUE;
1575 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1576 treelocktype == isc_rwlocktype_write)
1577 need_relock = ISC_TRUE;
1578 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1579 isc_rwlocktype_read);
1581 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1582 isc_rwlocktype_write);
1583 if (ISC_LINK_LINKED(node, deadlink))
1584 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1586 if (treelocktype == isc_rwlocktype_write)
1587 cleanup_dead_nodes(rbtdb, node->locknum);
1588 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1589 isc_rwlocktype_write);
1592 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1596 * Caller must be holding the node lock; either the "strong", read or write
1597 * lock. Note that the lock must be held even when node references are
1598 * atomically modified; in that case the decrement operation itself does not
1599 * have to be protected, but we must avoid a race condition where multiple
1600 * threads are decreasing the reference to zero simultaneously and at least
1601 * one of them is going to free the node.
1602 * This function returns ISC_TRUE if and only if the node reference decreases
1605 static isc_boolean_t
1606 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1607 rbtdb_serial_t least_serial,
1608 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1609 isc_boolean_t pruning)
1611 isc_result_t result;
1612 isc_boolean_t write_locked;
1613 rbtdb_nodelock_t *nodelock;
1614 unsigned int refs, nrefs;
1615 int bucket = node->locknum;
1616 isc_boolean_t no_reference;
1618 nodelock = &rbtdb->node_locks[bucket];
1620 /* Handle easy and typical case first. */
1621 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1622 dns_rbtnode_refdecrement(node, &nrefs);
1623 INSIST((int)nrefs >= 0);
1625 isc_refcount_decrement(&nodelock->references, &refs);
1626 INSIST((int)refs >= 0);
1628 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1631 /* Upgrade the lock? */
1632 if (nlock == isc_rwlocktype_read) {
1633 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1634 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1636 dns_rbtnode_refdecrement(node, &nrefs);
1637 INSIST((int)nrefs >= 0);
1639 /* Restore the lock? */
1640 if (nlock == isc_rwlocktype_read)
1641 NODE_WEAKDOWNGRADE(&nodelock->lock);
1645 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1646 if (IS_CACHE(rbtdb))
1647 clean_cache_node(rbtdb, node);
1649 if (least_serial == 0) {
1651 * Caller doesn't know the least serial.
1654 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1655 least_serial = rbtdb->least_serial;
1656 RBTDB_UNLOCK(&rbtdb->lock,
1657 isc_rwlocktype_read);
1659 clean_zone_node(rbtdb, node, least_serial);
1663 isc_refcount_decrement(&nodelock->references, &refs);
1664 INSIST((int)refs >= 0);
1667 * XXXDCL should this only be done for cache zones?
1669 if (node->data != NULL || node->down != NULL) {
1670 /* Restore the lock? */
1671 if (nlock == isc_rwlocktype_read)
1672 NODE_WEAKDOWNGRADE(&nodelock->lock);
1677 * Attempt to switch to a write lock on the tree. If this fails,
1678 * we will add this node to a linked list of nodes in this locking
1679 * bucket which we will free later.
1681 if (tlock != isc_rwlocktype_write) {
1683 * Locking hierarchy notwithstanding, we don't need to free
1684 * the node lock before acquiring the tree write lock because
1685 * we only do a trylock.
1687 if (tlock == isc_rwlocktype_read)
1688 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1690 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1691 isc_rwlocktype_write);
1692 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1693 result == ISC_R_LOCKBUSY);
1695 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1697 write_locked = ISC_TRUE;
1699 no_reference = ISC_TRUE;
1700 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1702 * We can now delete the node if the reference counter is
1703 * zero. This should be typically the case, but a different
1704 * thread may still gain a (new) reference just before the
1705 * current thread locks the tree (e.g., in findnode()).
1709 * If this node is the only one in the level it's in, deleting
1710 * this node may recursively make its parent the only node in
1711 * the parent level; if so, and if no one is currently using
1712 * the parent node, this is almost the only opportunity to
1713 * clean it up. But the recursive cleanup is not that trivial
1714 * since the child and parent may be in different lock buckets,
1715 * which would cause a lock order reversal problem. To avoid
1716 * the trouble, we'll dispatch a separate event for batch
1717 * cleaning. We need to check whether we're deleting the node
1718 * as a result of pruning to avoid infinite dispatching.
1719 * Note: pruning happens only when a task has been set for the
1720 * rbtdb. If the user of the rbtdb chooses not to set a task,
1721 * it's their responsibility to purge stale leaves (e.g. by
1722 * periodic walk-through).
1724 if (!pruning && node->parent != NULL &&
1725 node->parent->down == node && node->left == NULL &&
1726 node->right == NULL && rbtdb->task != NULL) {
1730 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1733 sizeof(isc_event_t));
1735 new_reference(rbtdb, node);
1737 attach((dns_db_t *)rbtdb, &db);
1739 isc_task_send(rbtdb->task, &ev);
1740 no_reference = ISC_FALSE;
1743 * XXX: this is a weird situation. We could
1744 * ignore this error case, but then the stale
1745 * node will unlikely be purged except via a
1746 * rare condition such as manual cleanup. So
1747 * we queue it in the deadnodes list, hoping
1748 * the memory shortage is temporary and the node
1749 * will be deleted later.
1751 isc_log_write(dns_lctx,
1752 DNS_LOGCATEGORY_DATABASE,
1753 DNS_LOGMODULE_CACHE,
1755 "decrement_reference: failed to "
1756 "allocate pruning event");
1757 INSIST(!ISC_LINK_LINKED(node, deadlink));
1758 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1762 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1763 char printname[DNS_NAME_FORMATSIZE];
1765 isc_log_write(dns_lctx,
1766 DNS_LOGCATEGORY_DATABASE,
1767 DNS_LOGMODULE_CACHE,
1769 "decrement_reference: "
1770 "delete from rbt: %p %s",
1772 dns_rbt_formatnodename(node,
1774 sizeof(printname)));
1777 INSIST(!ISC_LINK_LINKED(node, deadlink));
1779 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1782 result = dns_rbt_deletenode(rbtdb->tree, node,
1784 if (result != ISC_R_SUCCESS) {
1785 isc_log_write(dns_lctx,
1786 DNS_LOGCATEGORY_DATABASE,
1787 DNS_LOGMODULE_CACHE,
1789 "decrement_reference: "
1790 "dns_rbt_deletenode: %s",
1791 isc_result_totext(result));
1794 } else if (dns_rbtnode_refcurrent(node) == 0) {
1795 INSIST(!ISC_LINK_LINKED(node, deadlink));
1796 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1798 no_reference = ISC_FALSE;
1800 /* Restore the lock? */
1801 if (nlock == isc_rwlocktype_read)
1802 NODE_WEAKDOWNGRADE(&nodelock->lock);
1805 * Relock a read lock, or unlock the write lock if no lock was held.
1807 if (tlock == isc_rwlocktype_none)
1809 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1811 if (tlock == isc_rwlocktype_read)
1813 isc_rwlock_downgrade(&rbtdb->tree_lock);
1815 return (no_reference);
1819 * Prune the tree by recursively cleaning-up single leaves. In the worst
1820 * case, the number of iteration is the number of tree levels, which is at
1821 * most the maximum number of domain name labels, i.e, 127. In practice, this
1822 * should be much smaller (only a few times), and even the worst case would be
1823 * acceptable for a single event.
1826 prune_tree(isc_task_t *task, isc_event_t *event) {
1827 dns_rbtdb_t *rbtdb = event->ev_sender;
1828 dns_rbtnode_t *node = event->ev_arg;
1829 dns_rbtnode_t *parent;
1830 unsigned int locknum;
1834 isc_event_free(&event);
1836 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1837 locknum = node->locknum;
1838 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1840 parent = node->parent;
1841 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1842 isc_rwlocktype_write, ISC_TRUE);
1844 if (parent != NULL && parent->down == NULL) {
1846 * node was the only down child of the parent and has
1847 * just been removed. We'll then need to examine the
1848 * parent. Keep the lock if possible; otherwise,
1849 * release the old lock and acquire one for the parent.
1851 if (parent->locknum != locknum) {
1852 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1853 isc_rwlocktype_write);
1854 locknum = parent->locknum;
1855 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1856 isc_rwlocktype_write);
1860 * We need to gain a reference to the node before
1861 * decrementing it in the next iteration. In addition,
1862 * if the node is in the dead-nodes list, extract it
1863 * from the list beforehand as we do in
1864 * reactivate_node().
1866 new_reference(rbtdb, parent);
1867 if (ISC_LINK_LINKED(parent, deadlink)) {
1868 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1875 } while (node != NULL);
1876 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1877 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1879 detach((dns_db_t **)&rbtdb);
1883 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1884 rbtdb_changedlist_t *cleanup_list)
1887 * Caller must be holding the database lock.
1890 rbtdb->least_serial = version->serial;
1891 *cleanup_list = version->changed_list;
1892 ISC_LIST_INIT(version->changed_list);
1896 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1897 rbtdb_changed_t *changed, *next_changed;
1900 * If the changed record is dirty, then
1901 * an update created multiple versions of
1902 * a given rdataset. We keep this list
1903 * until we're the least open version, at
1904 * which point it's safe to get rid of any
1907 * If the changed record isn't dirty, then
1908 * we don't need it anymore since we're
1909 * committing and not rolling back.
1911 * The caller must be holding the database lock.
1913 for (changed = HEAD(version->changed_list);
1915 changed = next_changed) {
1916 next_changed = NEXT(changed, link);
1917 if (!changed->dirty) {
1918 UNLINK(version->changed_list,
1920 APPEND(*cleanup_list,
1927 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1928 dns_rdataset_t keyset;
1929 dns_rdataset_t nsecset, signsecset;
1930 dns_rdata_t rdata = DNS_RDATA_INIT;
1931 isc_boolean_t haszonekey = ISC_FALSE;
1932 isc_boolean_t hasnsec = ISC_FALSE;
1933 isc_boolean_t hasoptbit = ISC_FALSE;
1934 isc_boolean_t nsec3createflag = ISC_FALSE;
1935 isc_result_t result;
1937 dns_rdataset_init(&keyset);
1938 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1939 0, 0, &keyset, NULL);
1940 if (result == ISC_R_SUCCESS) {
1941 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1942 result = dns_rdataset_first(&keyset);
1943 while (result == ISC_R_SUCCESS) {
1944 dns_rdataset_current(&keyset, &keyrdata);
1945 if (dns_zonekey_iszonekey(&keyrdata)) {
1946 haszonekey = ISC_TRUE;
1949 result = dns_rdataset_next(&keyset);
1951 dns_rdataset_disassociate(&keyset);
1954 version->secure = dns_db_insecure;
1955 version->havensec3 = ISC_FALSE;
1959 dns_rdataset_init(&nsecset);
1960 dns_rdataset_init(&signsecset);
1961 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1962 0, 0, &nsecset, &signsecset);
1963 if (result == ISC_R_SUCCESS) {
1964 if (dns_rdataset_isassociated(&signsecset)) {
1966 result = dns_rdataset_first(&nsecset);
1967 if (result == ISC_R_SUCCESS) {
1968 dns_rdataset_current(&nsecset, &rdata);
1969 hasoptbit = dns_nsec_typepresent(&rdata,
1972 dns_rdataset_disassociate(&signsecset);
1974 dns_rdataset_disassociate(&nsecset);
1977 setnsec3parameters(db, version, &nsec3createflag);
1980 * Do we have a valid NSEC/NSEC3 chain?
1982 if (version->havensec3 || (hasnsec && !hasoptbit))
1983 version->secure = dns_db_secure;
1985 * Do we have a NSEC/NSEC3 chain under creation?
1987 else if (hasoptbit || nsec3createflag)
1988 version->secure = dns_db_partial;
1990 version->secure = dns_db_insecure;
1994 * Walk the origin node looking for NSEC3PARAM records.
1995 * Cache the nsec3 parameters.
1998 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1999 isc_boolean_t *nsec3createflag)
2001 dns_rbtnode_t *node;
2002 dns_rdata_nsec3param_t nsec3param;
2003 dns_rdata_t rdata = DNS_RDATA_INIT;
2004 isc_region_t region;
2005 isc_result_t result;
2006 rdatasetheader_t *header, *header_next;
2007 unsigned char *raw; /* RDATASLAB */
2008 unsigned int count, length;
2009 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2011 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2012 version->havensec3 = ISC_FALSE;
2013 node = rbtdb->origin_node;
2014 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2015 isc_rwlocktype_read);
2016 for (header = node->data;
2018 header = header_next) {
2019 header_next = header->next;
2021 if (header->serial <= version->serial &&
2023 if (NONEXISTENT(header))
2027 header = header->down;
2028 } while (header != NULL);
2030 if (header != NULL &&
2031 header->type == dns_rdatatype_nsec3param) {
2033 * Find A NSEC3PARAM with a supported algorithm.
2035 raw = (unsigned char *)header + sizeof(*header);
2036 count = raw[0] * 256 + raw[1]; /* count */
2037 #if DNS_RDATASET_FIXED
2038 raw += count * 4 + 2;
2042 while (count-- > 0U) {
2043 length = raw[0] * 256 + raw[1];
2044 #if DNS_RDATASET_FIXED
2050 region.length = length;
2052 dns_rdata_fromregion(&rdata,
2053 rbtdb->common.rdclass,
2054 dns_rdatatype_nsec3param,
2056 result = dns_rdata_tostruct(&rdata,
2059 INSIST(result == ISC_R_SUCCESS);
2060 dns_rdata_reset(&rdata);
2062 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2063 !dns_nsec3_supportedhash(nsec3param.hash))
2066 #ifdef RFC5155_STRICT
2067 if (nsec3param.flags != 0)
2070 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2072 *nsec3createflag = ISC_TRUE;
2073 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2078 INSIST(nsec3param.salt_length <=
2079 sizeof(version->salt));
2080 memcpy(version->salt, nsec3param.salt,
2081 nsec3param.salt_length);
2082 version->hash = nsec3param.hash;
2083 version->salt_length = nsec3param.salt_length;
2084 version->iterations = nsec3param.iterations;
2085 version->flags = nsec3param.flags;
2086 version->havensec3 = ISC_TRUE;
2088 * Look for a better algorithm than the
2089 * unknown test algorithm.
2091 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2097 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2098 isc_rwlocktype_read);
2099 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2103 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2104 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2105 rbtdb_version_t *version, *cleanup_version, *least_greater;
2106 isc_boolean_t rollback = ISC_FALSE;
2107 rbtdb_changedlist_t cleanup_list;
2108 rdatasetheaderlist_t resigned_list;
2109 rbtdb_changed_t *changed, *next_changed;
2110 rbtdb_serial_t serial, least_serial;
2111 dns_rbtnode_t *rbtnode;
2113 rdatasetheader_t *header;
2114 isc_boolean_t writer;
2116 REQUIRE(VALID_RBTDB(rbtdb));
2117 version = (rbtdb_version_t *)*versionp;
2119 cleanup_version = NULL;
2120 ISC_LIST_INIT(cleanup_list);
2121 ISC_LIST_INIT(resigned_list);
2123 isc_refcount_decrement(&version->references, &refs);
2124 if (refs > 0) { /* typical and easy case first */
2126 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2127 INSIST(!version->writer);
2128 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2133 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2134 serial = version->serial;
2135 writer = version->writer;
2136 if (version->writer) {
2139 rbtdb_version_t *cur_version;
2141 INSIST(version->commit_ok);
2142 INSIST(version == rbtdb->future_version);
2144 * The current version is going to be replaced.
2145 * Release the (likely last) reference to it from the
2146 * DB itself and unlink it from the open list.
2148 cur_version = rbtdb->current_version;
2149 isc_refcount_decrement(&cur_version->references,
2152 if (cur_version->serial == rbtdb->least_serial)
2153 INSIST(EMPTY(cur_version->changed_list));
2154 UNLINK(rbtdb->open_versions,
2157 if (EMPTY(rbtdb->open_versions)) {
2159 * We're going to become the least open
2162 make_least_version(rbtdb, version,
2166 * Some other open version is the
2167 * least version. We can't cleanup
2168 * records that were changed in this
2169 * version because the older versions
2170 * may still be in use by an open
2173 * We can, however, discard the
2174 * changed records for things that
2175 * we've added that didn't exist in
2178 cleanup_nondirty(version, &cleanup_list);
2181 * If the (soon to be former) current version
2182 * isn't being used by anyone, we can clean
2186 cleanup_version = cur_version;
2187 APPENDLIST(version->changed_list,
2188 cleanup_version->changed_list,
2192 * Become the current version.
2194 version->writer = ISC_FALSE;
2195 rbtdb->current_version = version;
2196 rbtdb->current_serial = version->serial;
2197 rbtdb->future_version = NULL;
2200 * Keep the current version in the open list, and
2201 * gain a reference for the DB itself (see the DB
2202 * creation function below). This must be the only
2203 * case where we need to increment the counter from
2204 * zero and need to use isc_refcount_increment0().
2206 isc_refcount_increment0(&version->references,
2208 INSIST(cur_ref == 1);
2209 PREPEND(rbtdb->open_versions,
2210 rbtdb->current_version, link);
2211 resigned_list = version->resigned_list;
2212 ISC_LIST_INIT(version->resigned_list);
2215 * We're rolling back this transaction.
2217 cleanup_list = version->changed_list;
2218 ISC_LIST_INIT(version->changed_list);
2219 resigned_list = version->resigned_list;
2220 ISC_LIST_INIT(version->resigned_list);
2221 rollback = ISC_TRUE;
2222 cleanup_version = version;
2223 rbtdb->future_version = NULL;
2226 if (version != rbtdb->current_version) {
2228 * There are no external or internal references
2229 * to this version and it can be cleaned up.
2231 cleanup_version = version;
2234 * Find the version with the least serial
2235 * number greater than ours.
2237 least_greater = PREV(version, link);
2238 if (least_greater == NULL)
2239 least_greater = rbtdb->current_version;
2241 INSIST(version->serial < least_greater->serial);
2243 * Is this the least open version?
2245 if (version->serial == rbtdb->least_serial) {
2247 * Yes. Install the new least open
2250 make_least_version(rbtdb,
2255 * Add any unexecuted cleanups to
2256 * those of the least greater version.
2258 APPENDLIST(least_greater->changed_list,
2259 version->changed_list,
2262 } else if (version->serial == rbtdb->least_serial)
2263 INSIST(EMPTY(version->changed_list));
2264 UNLINK(rbtdb->open_versions, version, link);
2266 least_serial = rbtdb->least_serial;
2267 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2270 * Update the zone's secure status.
2272 if (writer && commit && !IS_CACHE(rbtdb))
2273 iszonesecure(db, version, rbtdb->origin_node);
2275 if (cleanup_version != NULL) {
2276 INSIST(EMPTY(cleanup_version->changed_list));
2277 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2278 sizeof(*cleanup_version));
2282 * Commit/rollback re-signed headers.
2284 for (header = HEAD(resigned_list);
2286 header = HEAD(resigned_list)) {
2287 ISC_LIST_UNLINK(resigned_list, header, lru_link);
2290 lock = &rbtdb->node_locks[header->node->locknum].lock;
2291 NODE_LOCK(lock, isc_rwlocktype_write);
2292 resign_insert(rbtdb, header->node->locknum, header);
2293 NODE_UNLOCK(lock, isc_rwlocktype_write);
2295 decrement_reference(rbtdb, header->node, least_serial,
2296 isc_rwlocktype_write, isc_rwlocktype_none,
2300 if (!EMPTY(cleanup_list)) {
2302 * We acquire a tree write lock here in order to make sure
2303 * that stale nodes will be removed in decrement_reference().
2304 * If we didn't have the lock, those nodes could miss the
2305 * chance to be removed until the server stops. The write lock
2306 * is expensive, but this event should be rare enough to justify
2309 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2310 for (changed = HEAD(cleanup_list);
2312 changed = next_changed) {
2315 next_changed = NEXT(changed, link);
2316 rbtnode = changed->node;
2317 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2319 NODE_LOCK(lock, isc_rwlocktype_write);
2321 * This is a good opportunity to purge any dead nodes,
2324 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2327 rollback_node(rbtnode, serial);
2328 decrement_reference(rbtdb, rbtnode, least_serial,
2329 isc_rwlocktype_write,
2330 isc_rwlocktype_write, ISC_FALSE);
2332 NODE_UNLOCK(lock, isc_rwlocktype_write);
2334 isc_mem_put(rbtdb->common.mctx, changed,
2337 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2345 * Add the necessary magic for the wildcard name 'name'
2346 * to be found in 'rbtdb'.
2348 * In order for wildcard matching to work correctly in
2349 * zone_find(), we must ensure that a node for the wildcarding
2350 * level exists in the database, and has its 'find_callback'
2351 * and 'wild' bits set.
2353 * E.g. if the wildcard name is "*.sub.example." then we
2354 * must ensure that "sub.example." exists and is marked as
2358 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2359 isc_result_t result;
2360 dns_name_t foundname;
2361 dns_offsets_t offsets;
2363 dns_rbtnode_t *node = NULL;
2365 dns_name_init(&foundname, offsets);
2366 n = dns_name_countlabels(name);
2369 dns_name_getlabelsequence(name, 1, n, &foundname);
2370 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2371 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2374 node->find_callback = 1;
2376 return (ISC_R_SUCCESS);
2380 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2381 isc_result_t result;
2382 dns_name_t foundname;
2383 dns_offsets_t offsets;
2384 unsigned int n, l, i;
2386 dns_name_init(&foundname, offsets);
2387 n = dns_name_countlabels(name);
2388 l = dns_name_countlabels(&rbtdb->common.origin);
2391 dns_rbtnode_t *node = NULL; /* dummy */
2392 dns_name_getlabelsequence(name, n - i, i, &foundname);
2393 if (dns_name_iswildcard(&foundname)) {
2394 result = add_wildcard_magic(rbtdb, &foundname);
2395 if (result != ISC_R_SUCCESS)
2397 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2399 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2405 return (ISC_R_SUCCESS);
2409 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2410 dns_dbnode_t **nodep)
2412 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2413 dns_rbtnode_t *node = NULL;
2414 dns_name_t nodename;
2415 isc_result_t result;
2416 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2418 REQUIRE(VALID_RBTDB(rbtdb));
2420 dns_name_init(&nodename, NULL);
2421 RWLOCK(&rbtdb->tree_lock, locktype);
2422 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2423 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2424 if (result != ISC_R_SUCCESS) {
2425 RWUNLOCK(&rbtdb->tree_lock, locktype);
2427 if (result == DNS_R_PARTIALMATCH)
2428 result = ISC_R_NOTFOUND;
2432 * It would be nice to try to upgrade the lock instead of
2433 * unlocking then relocking.
2435 locktype = isc_rwlocktype_write;
2436 RWLOCK(&rbtdb->tree_lock, locktype);
2438 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2439 if (result == ISC_R_SUCCESS) {
2440 dns_rbt_namefromnode(node, &nodename);
2441 #ifdef DNS_RBT_USEHASH
2442 node->locknum = node->hashval % rbtdb->node_lock_count;
2444 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2445 rbtdb->node_lock_count;
2448 add_empty_wildcards(rbtdb, name);
2450 if (dns_name_iswildcard(name)) {
2451 result = add_wildcard_magic(rbtdb, name);
2452 if (result != ISC_R_SUCCESS) {
2453 RWUNLOCK(&rbtdb->tree_lock, locktype);
2457 } else if (result != ISC_R_EXISTS) {
2458 RWUNLOCK(&rbtdb->tree_lock, locktype);
2462 reactivate_node(rbtdb, node, locktype);
2463 RWUNLOCK(&rbtdb->tree_lock, locktype);
2465 *nodep = (dns_dbnode_t *)node;
2467 return (ISC_R_SUCCESS);
2471 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2472 dns_dbnode_t **nodep)
2474 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2475 dns_rbtnode_t *node = NULL;
2476 dns_name_t nodename;
2477 isc_result_t result;
2478 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2480 REQUIRE(VALID_RBTDB(rbtdb));
2482 dns_name_init(&nodename, NULL);
2483 RWLOCK(&rbtdb->tree_lock, locktype);
2484 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2485 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2486 if (result != ISC_R_SUCCESS) {
2487 RWUNLOCK(&rbtdb->tree_lock, locktype);
2489 if (result == DNS_R_PARTIALMATCH)
2490 result = ISC_R_NOTFOUND;
2494 * It would be nice to try to upgrade the lock instead of
2495 * unlocking then relocking.
2497 locktype = isc_rwlocktype_write;
2498 RWLOCK(&rbtdb->tree_lock, locktype);
2500 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2501 if (result == ISC_R_SUCCESS) {
2502 dns_rbt_namefromnode(node, &nodename);
2503 #ifdef DNS_RBT_USEHASH
2504 node->locknum = node->hashval % rbtdb->node_lock_count;
2506 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2507 rbtdb->node_lock_count;
2510 } else if (result != ISC_R_EXISTS) {
2511 RWUNLOCK(&rbtdb->tree_lock, locktype);
2515 INSIST(node->nsec3);
2516 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2517 new_reference(rbtdb, node);
2518 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2519 RWUNLOCK(&rbtdb->tree_lock, locktype);
2521 *nodep = (dns_dbnode_t *)node;
2523 return (ISC_R_SUCCESS);
2527 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2528 rbtdb_search_t *search = arg;
2529 rdatasetheader_t *header, *header_next;
2530 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2531 rdatasetheader_t *found;
2532 isc_result_t result;
2533 dns_rbtnode_t *onode;
2536 * We only want to remember the topmost zone cut, since it's the one
2537 * that counts, so we'll just continue if we've already found a
2540 if (search->zonecut != NULL)
2541 return (DNS_R_CONTINUE);
2544 result = DNS_R_CONTINUE;
2545 onode = search->rbtdb->origin_node;
2547 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2548 isc_rwlocktype_read);
2551 * Look for an NS or DNAME rdataset active in our version.
2554 dname_header = NULL;
2555 sigdname_header = NULL;
2556 for (header = node->data; header != NULL; header = header_next) {
2557 header_next = header->next;
2558 if (header->type == dns_rdatatype_ns ||
2559 header->type == dns_rdatatype_dname ||
2560 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2562 if (header->serial <= search->serial &&
2565 * Is this a "this rdataset doesn't
2568 if (NONEXISTENT(header))
2572 header = header->down;
2573 } while (header != NULL);
2574 if (header != NULL) {
2575 if (header->type == dns_rdatatype_dname)
2576 dname_header = header;
2577 else if (header->type ==
2578 RBTDB_RDATATYPE_SIGDNAME)
2579 sigdname_header = header;
2580 else if (node != onode ||
2581 IS_STUB(search->rbtdb)) {
2583 * We've found an NS rdataset that
2584 * isn't at the origin node. We check
2585 * that they're not at the origin node,
2586 * because otherwise we'd erroneously
2587 * treat the zone top as if it were
2597 * Did we find anything?
2599 if (dname_header != NULL) {
2601 * Note that DNAME has precedence over NS if both exist.
2603 found = dname_header;
2604 search->zonecut_sigrdataset = sigdname_header;
2605 } else if (ns_header != NULL) {
2607 search->zonecut_sigrdataset = NULL;
2610 if (found != NULL) {
2612 * We increment the reference count on node to ensure that
2613 * search->zonecut_rdataset will still be valid later.
2615 new_reference(search->rbtdb, node);
2616 search->zonecut = node;
2617 search->zonecut_rdataset = found;
2618 search->need_cleanup = ISC_TRUE;
2620 * Since we've found a zonecut, anything beneath it is
2621 * glue and is not subject to wildcard matching, so we
2622 * may clear search->wild.
2624 search->wild = ISC_FALSE;
2625 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2627 * If the caller does not want to find glue, then
2628 * this is the best answer and the search should
2631 result = DNS_R_PARTIALMATCH;
2636 * The search will continue beneath the zone cut.
2637 * This may or may not be the best match. In case it
2638 * is, we need to remember the node name.
2640 zcname = dns_fixedname_name(&search->zonecut_name);
2641 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2643 search->copy_name = ISC_TRUE;
2647 * There is no zonecut at this node which is active in this
2650 * If this is a "wild" node and the caller hasn't disabled
2651 * wildcard matching, remember that we've seen a wild node
2652 * in case we need to go searching for wildcard matches
2655 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2656 search->wild = ISC_TRUE;
2659 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2660 isc_rwlocktype_read);
2666 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2667 rdatasetheader_t *header, isc_stdtime_t now,
2668 dns_rdataset_t *rdataset)
2670 unsigned char *raw; /* RDATASLAB */
2673 * Caller must be holding the node reader lock.
2674 * XXXJT: technically, we need a writer lock, since we'll increment
2675 * the header count below. However, since the actual counter value
2676 * doesn't matter, we prioritize performance here. (We may want to
2677 * use atomic increment when available).
2680 if (rdataset == NULL)
2683 new_reference(rbtdb, node);
2685 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2687 rdataset->methods = &rdataset_methods;
2688 rdataset->rdclass = rbtdb->common.rdclass;
2689 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2690 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2691 rdataset->ttl = header->rdh_ttl - now;
2692 rdataset->trust = header->trust;
2693 if (NXDOMAIN(header))
2694 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2696 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2697 rdataset->private1 = rbtdb;
2698 rdataset->private2 = node;
2699 raw = (unsigned char *)header + sizeof(*header);
2700 rdataset->private3 = raw;
2701 rdataset->count = header->count++;
2702 if (rdataset->count == ISC_UINT32_MAX)
2703 rdataset->count = 0;
2706 * Reset iterator state.
2708 rdataset->privateuint4 = 0;
2709 rdataset->private5 = NULL;
2712 * Add noqname proof.
2714 rdataset->private6 = header->noqname;
2715 if (rdataset->private6 != NULL)
2716 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2717 rdataset->private7 = header->closest;
2718 if (rdataset->private7 != NULL)
2719 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2722 * Copy out re-signing information.
2724 if (RESIGN(header)) {
2725 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2726 rdataset->resign = header->resign;
2728 rdataset->resign = 0;
2731 static inline isc_result_t
2732 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2733 dns_name_t *foundname, dns_rdataset_t *rdataset,
2734 dns_rdataset_t *sigrdataset)
2736 isc_result_t result;
2738 rbtdb_rdatatype_t type;
2739 dns_rbtnode_t *node;
2742 * The caller MUST NOT be holding any node locks.
2745 node = search->zonecut;
2746 type = search->zonecut_rdataset->type;
2749 * If we have to set foundname, we do it before anything else.
2750 * If we were to set foundname after we had set nodep or bound the
2751 * rdataset, then we'd have to undo that work if dns_name_copy()
2752 * failed. By setting foundname first, there's nothing to undo if
2755 if (foundname != NULL && search->copy_name) {
2756 zcname = dns_fixedname_name(&search->zonecut_name);
2757 result = dns_name_copy(zcname, foundname, NULL);
2758 if (result != ISC_R_SUCCESS)
2761 if (nodep != NULL) {
2763 * Note that we don't have to increment the node's reference
2764 * count here because we're going to use the reference we
2765 * already have in the search block.
2768 search->need_cleanup = ISC_FALSE;
2770 if (rdataset != NULL) {
2771 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2772 isc_rwlocktype_read);
2773 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2774 search->now, rdataset);
2775 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2776 bind_rdataset(search->rbtdb, node,
2777 search->zonecut_sigrdataset,
2778 search->now, sigrdataset);
2779 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2780 isc_rwlocktype_read);
2783 if (type == dns_rdatatype_dname)
2784 return (DNS_R_DNAME);
2785 return (DNS_R_DELEGATION);
2788 static inline isc_boolean_t
2789 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2790 dns_rbtnode_t *node)
2792 unsigned char *raw; /* RDATASLAB */
2793 unsigned int count, size;
2795 isc_boolean_t valid = ISC_FALSE;
2796 dns_offsets_t offsets;
2797 isc_region_t region;
2798 rdatasetheader_t *header;
2801 * No additional locking is required.
2805 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2806 * if it occurs at a zone cut, but is not valid below it.
2808 if (type == dns_rdatatype_ns) {
2809 if (node != search->zonecut) {
2812 } else if (type != dns_rdatatype_a &&
2813 type != dns_rdatatype_aaaa &&
2814 type != dns_rdatatype_a6) {
2818 header = search->zonecut_rdataset;
2819 raw = (unsigned char *)header + sizeof(*header);
2820 count = raw[0] * 256 + raw[1];
2821 #if DNS_RDATASET_FIXED
2822 raw += 2 + (4 * count);
2829 size = raw[0] * 256 + raw[1];
2830 #if DNS_RDATASET_FIXED
2836 region.length = size;
2839 * XXX Until we have rdata structures, we have no choice but
2840 * to directly access the rdata format.
2842 dns_name_init(&ns_name, offsets);
2843 dns_name_fromregion(&ns_name, ®ion);
2844 if (dns_name_compare(&ns_name, name) == 0) {
2853 static inline isc_boolean_t
2854 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2857 dns_fixedname_t fnext;
2858 dns_fixedname_t forigin;
2863 dns_rbtnode_t *node;
2864 isc_result_t result;
2865 isc_boolean_t answer = ISC_FALSE;
2866 rdatasetheader_t *header;
2868 rbtdb = search->rbtdb;
2870 dns_name_init(&prefix, NULL);
2871 dns_fixedname_init(&fnext);
2872 next = dns_fixedname_name(&fnext);
2873 dns_fixedname_init(&forigin);
2874 origin = dns_fixedname_name(&forigin);
2876 result = dns_rbtnodechain_next(chain, NULL, NULL);
2877 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2879 result = dns_rbtnodechain_current(chain, &prefix,
2881 if (result != ISC_R_SUCCESS)
2883 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2884 isc_rwlocktype_read);
2885 for (header = node->data;
2887 header = header->next) {
2888 if (header->serial <= search->serial &&
2889 !IGNORE(header) && EXISTS(header))
2892 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2893 isc_rwlocktype_read);
2896 result = dns_rbtnodechain_next(chain, NULL, NULL);
2898 if (result == ISC_R_SUCCESS)
2899 result = dns_name_concatenate(&prefix, origin, next, NULL);
2900 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2905 static inline isc_boolean_t
2906 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2907 dns_fixedname_t fnext;
2908 dns_fixedname_t forigin;
2909 dns_fixedname_t fprev;
2917 dns_rbtnode_t *node;
2918 dns_rbtnodechain_t chain;
2919 isc_boolean_t check_next = ISC_TRUE;
2920 isc_boolean_t check_prev = ISC_TRUE;
2921 isc_boolean_t answer = ISC_FALSE;
2922 isc_result_t result;
2923 rdatasetheader_t *header;
2926 rbtdb = search->rbtdb;
2928 dns_name_init(&name, NULL);
2929 dns_name_init(&tname, NULL);
2930 dns_name_init(&rname, NULL);
2931 dns_fixedname_init(&fnext);
2932 next = dns_fixedname_name(&fnext);
2933 dns_fixedname_init(&fprev);
2934 prev = dns_fixedname_name(&fprev);
2935 dns_fixedname_init(&forigin);
2936 origin = dns_fixedname_name(&forigin);
2939 * Find if qname is at or below a empty node.
2940 * Use our own copy of the chain.
2943 chain = search->chain;
2946 result = dns_rbtnodechain_current(&chain, &name,
2948 if (result != ISC_R_SUCCESS)
2950 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2951 isc_rwlocktype_read);
2952 for (header = node->data;
2954 header = header->next) {
2955 if (header->serial <= search->serial &&
2956 !IGNORE(header) && EXISTS(header))
2959 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2960 isc_rwlocktype_read);
2963 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2964 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2965 if (result == ISC_R_SUCCESS)
2966 result = dns_name_concatenate(&name, origin, prev, NULL);
2967 if (result != ISC_R_SUCCESS)
2968 check_prev = ISC_FALSE;
2970 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2971 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2973 result = dns_rbtnodechain_current(&chain, &name,
2975 if (result != ISC_R_SUCCESS)
2977 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2978 isc_rwlocktype_read);
2979 for (header = node->data;
2981 header = header->next) {
2982 if (header->serial <= search->serial &&
2983 !IGNORE(header) && EXISTS(header))
2986 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2987 isc_rwlocktype_read);
2990 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2992 if (result == ISC_R_SUCCESS)
2993 result = dns_name_concatenate(&name, origin, next, NULL);
2994 if (result != ISC_R_SUCCESS)
2995 check_next = ISC_FALSE;
2997 dns_name_clone(qname, &rname);
3000 * Remove the wildcard label to find the terminal name.
3002 n = dns_name_countlabels(wname);
3003 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3006 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3007 (check_next && dns_name_issubdomain(next, &rname))) {
3012 * Remove the left hand label.
3014 n = dns_name_countlabels(&rname);
3015 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3016 } while (!dns_name_equal(&rname, &tname));
3020 static inline isc_result_t
3021 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3025 dns_rbtnode_t *node, *level_node, *wnode;
3026 rdatasetheader_t *header;
3027 isc_result_t result = ISC_R_NOTFOUND;
3030 dns_fixedname_t fwname;
3032 isc_boolean_t done, wild, active;
3033 dns_rbtnodechain_t wchain;
3036 * Caller must be holding the tree lock and MUST NOT be holding
3041 * Examine each ancestor level. If the level's wild bit
3042 * is set, then construct the corresponding wildcard name and
3043 * search for it. If the wildcard node exists, and is active in
3044 * this version, we're done. If not, then we next check to see
3045 * if the ancestor is active in this version. If so, then there
3046 * can be no possible wildcard match and again we're done. If not,
3047 * continue the search.
3050 rbtdb = search->rbtdb;
3051 i = search->chain.level_matches;
3055 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3056 isc_rwlocktype_read);
3059 * First we try to figure out if this node is active in
3060 * the search's version. We do this now, even though we
3061 * may not need the information, because it simplifies the
3062 * locking and code flow.
3064 for (header = node->data;
3066 header = header->next) {
3067 if (header->serial <= search->serial &&
3068 !IGNORE(header) && EXISTS(header))
3081 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3082 isc_rwlocktype_read);
3086 * Construct the wildcard name for this level.
3088 dns_name_init(&name, NULL);
3089 dns_rbt_namefromnode(node, &name);
3090 dns_fixedname_init(&fwname);
3091 wname = dns_fixedname_name(&fwname);
3092 result = dns_name_concatenate(dns_wildcardname, &name,
3095 while (result == ISC_R_SUCCESS && j != 0) {
3097 level_node = search->chain.levels[j];
3098 dns_name_init(&name, NULL);
3099 dns_rbt_namefromnode(level_node, &name);
3100 result = dns_name_concatenate(wname,
3105 if (result != ISC_R_SUCCESS)
3109 dns_rbtnodechain_init(&wchain, NULL);
3110 result = dns_rbt_findnode(rbtdb->tree, wname,
3111 NULL, &wnode, &wchain,
3112 DNS_RBTFIND_EMPTYDATA,
3114 if (result == ISC_R_SUCCESS) {
3118 * We have found the wildcard node. If it
3119 * is active in the search's version, we're
3122 lock = &rbtdb->node_locks[wnode->locknum].lock;
3123 NODE_LOCK(lock, isc_rwlocktype_read);
3124 for (header = wnode->data;
3126 header = header->next) {
3127 if (header->serial <= search->serial &&
3128 !IGNORE(header) && EXISTS(header))
3131 NODE_UNLOCK(lock, isc_rwlocktype_read);
3132 if (header != NULL ||
3133 activeempty(search, &wchain, wname)) {
3134 if (activeemtpynode(search, qname,
3136 return (ISC_R_NOTFOUND);
3139 * The wildcard node is active!
3141 * Note: result is still ISC_R_SUCCESS
3142 * so we don't have to set it.
3147 } else if (result != ISC_R_NOTFOUND &&
3148 result != DNS_R_PARTIALMATCH) {
3150 * An error has occurred. Bail out.
3158 * The level node is active. Any wildcarding
3159 * present at higher levels has no
3160 * effect and we're done.
3162 result = ISC_R_NOTFOUND;
3168 node = search->chain.levels[i];
3176 static isc_boolean_t
3177 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3179 dns_rdata_t rdata = DNS_RDATA_INIT;
3180 dns_rdata_nsec3_t nsec3;
3181 unsigned char *raw; /* RDATASLAB */
3182 unsigned int rdlen, count;
3183 isc_region_t region;
3184 isc_result_t result;
3186 REQUIRE(header->type == dns_rdatatype_nsec3);
3188 raw = (unsigned char *)header + sizeof(*header);
3189 count = raw[0] * 256 + raw[1]; /* count */
3190 #if DNS_RDATASET_FIXED
3191 raw += count * 4 + 2;
3195 while (count-- > 0) {
3196 rdlen = raw[0] * 256 + raw[1];
3197 #if DNS_RDATASET_FIXED
3203 region.length = rdlen;
3204 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3205 dns_rdatatype_nsec3, ®ion);
3207 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3208 INSIST(result == ISC_R_SUCCESS);
3209 if (nsec3.hash == search->rbtversion->hash &&
3210 nsec3.iterations == search->rbtversion->iterations &&
3211 nsec3.salt_length == search->rbtversion->salt_length &&
3212 memcmp(nsec3.salt, search->rbtversion->salt,
3213 nsec3.salt_length) == 0)
3215 dns_rdata_reset(&rdata);
3220 static inline isc_result_t
3221 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3222 dns_name_t *foundname, dns_rdataset_t *rdataset,
3223 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3224 dns_db_secure_t secure)
3226 dns_rbtnode_t *node;
3227 rdatasetheader_t *header, *header_next, *found, *foundsig;
3228 isc_boolean_t empty_node;
3229 isc_result_t result;
3230 dns_fixedname_t fname, forigin;
3231 dns_name_t *name, *origin;
3232 dns_rdatatype_t type;
3233 rbtdb_rdatatype_t sigtype;
3234 isc_boolean_t wraps;
3235 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3237 if (tree == search->rbtdb->nsec3) {
3238 type = dns_rdatatype_nsec3;
3239 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3242 type = dns_rdatatype_nsec;
3243 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3250 dns_fixedname_init(&fname);
3251 name = dns_fixedname_name(&fname);
3252 dns_fixedname_init(&forigin);
3253 origin = dns_fixedname_name(&forigin);
3254 result = dns_rbtnodechain_current(&search->chain, name,
3256 if (result != ISC_R_SUCCESS)
3258 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3259 isc_rwlocktype_read);
3262 empty_node = ISC_TRUE;
3263 for (header = node->data;
3265 header = header_next) {
3266 header_next = header->next;
3268 * Look for an active, extant NSEC or RRSIG NSEC.
3271 if (header->serial <= search->serial &&
3274 * Is this a "this rdataset doesn't
3277 if (NONEXISTENT(header))
3281 header = header->down;
3282 } while (header != NULL);
3283 if (header != NULL) {
3285 * We now know that there is at least one
3286 * active rdataset at this node.
3288 empty_node = ISC_FALSE;
3289 if (header->type == type) {
3291 if (foundsig != NULL)
3293 } else if (header->type == sigtype) {
3301 if (found != NULL && search->rbtversion->havensec3 &&
3302 found->type == dns_rdatatype_nsec3 &&
3303 !matchparams(found, search)) {
3304 empty_node = ISC_TRUE;
3307 result = dns_rbtnodechain_prev(&search->chain,
3309 } else if (found != NULL &&
3310 (foundsig != NULL || !need_sig))
3313 * We've found the right NSEC/NSEC3 record.
3315 * Note: for this to really be the right
3316 * NSEC record, it's essential that the NSEC
3317 * records of any nodes obscured by a zone
3318 * cut have been removed; we assume this is
3321 result = dns_name_concatenate(name, origin,
3323 if (result == ISC_R_SUCCESS) {
3324 if (nodep != NULL) {
3325 new_reference(search->rbtdb,
3329 bind_rdataset(search->rbtdb, node,
3332 if (foundsig != NULL)
3333 bind_rdataset(search->rbtdb,
3339 } else if (found == NULL && foundsig == NULL) {
3341 * This node is active, but has no NSEC or
3342 * RRSIG NSEC. That means it's glue or
3343 * other obscured zone data that isn't
3344 * relevant for our search. Treat the
3345 * node as if it were empty and keep looking.
3347 empty_node = ISC_TRUE;
3348 result = dns_rbtnodechain_prev(&search->chain,
3352 * We found an active node, but either the
3353 * NSEC or the RRSIG NSEC is missing. This
3356 result = DNS_R_BADDB;
3360 * This node isn't active. We've got to keep
3363 result = dns_rbtnodechain_prev(&search->chain, NULL,
3366 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3367 isc_rwlocktype_read);
3368 } while (empty_node && result == ISC_R_SUCCESS);
3370 if (result == ISC_R_NOMORE && wraps) {
3371 result = dns_rbtnodechain_last(&search->chain, tree,
3373 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3380 * If the result is ISC_R_NOMORE, then we got to the beginning of
3381 * the database and didn't find a NSEC record. This shouldn't
3384 if (result == ISC_R_NOMORE)
3385 result = DNS_R_BADDB;
3391 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3392 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3393 dns_dbnode_t **nodep, dns_name_t *foundname,
3394 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3396 dns_rbtnode_t *node = NULL;
3397 isc_result_t result;
3398 rbtdb_search_t search;
3399 isc_boolean_t cname_ok = ISC_TRUE;
3400 isc_boolean_t close_version = ISC_FALSE;
3401 isc_boolean_t maybe_zonecut = ISC_FALSE;
3402 isc_boolean_t at_zonecut = ISC_FALSE;
3404 isc_boolean_t empty_node;
3405 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3406 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3407 rbtdb_rdatatype_t sigtype;
3408 isc_boolean_t active;
3409 dns_rbtnodechain_t chain;
3413 search.rbtdb = (dns_rbtdb_t *)db;
3415 REQUIRE(VALID_RBTDB(search.rbtdb));
3418 * We don't care about 'now'.
3423 * If the caller didn't supply a version, attach to the current
3426 if (version == NULL) {
3427 currentversion(db, &version);
3428 close_version = ISC_TRUE;
3431 search.rbtversion = version;
3432 search.serial = search.rbtversion->serial;
3433 search.options = options;
3434 search.copy_name = ISC_FALSE;
3435 search.need_cleanup = ISC_FALSE;
3436 search.wild = ISC_FALSE;
3437 search.zonecut = NULL;
3438 dns_fixedname_init(&search.zonecut_name);
3439 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3443 * 'wild' will be true iff. we've matched a wildcard.
3447 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3450 * Search down from the root of the tree. If, while going down, we
3451 * encounter a callback node, zone_zonecut_callback() will search the
3452 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3454 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3456 result = dns_rbt_findnode(tree, name, foundname, &node,
3457 &search.chain, DNS_RBTFIND_EMPTYDATA,
3458 zone_zonecut_callback, &search);
3460 if (result == DNS_R_PARTIALMATCH) {
3462 if (search.zonecut != NULL) {
3463 result = setup_delegation(&search, nodep, foundname,
3464 rdataset, sigrdataset);
3470 * At least one of the levels in the search chain
3471 * potentially has a wildcard. For each such level,
3472 * we must see if there's a matching wildcard active
3473 * in the current version.
3475 result = find_wildcard(&search, &node, name);
3476 if (result == ISC_R_SUCCESS) {
3477 result = dns_name_copy(name, foundname, NULL);
3478 if (result != ISC_R_SUCCESS)
3483 else if (result != ISC_R_NOTFOUND)
3487 chain = search.chain;
3488 active = activeempty(&search, &chain, name);
3491 * If we're here, then the name does not exist, is not
3492 * beneath a zonecut, and there's no matching wildcard.
3494 if ((search.rbtversion->secure == dns_db_secure &&
3495 !search.rbtversion->havensec3) ||
3496 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3497 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3499 result = find_closest_nsec(&search, nodep, foundname,
3500 rdataset, sigrdataset, tree,
3501 search.rbtversion->secure);
3502 if (result == ISC_R_SUCCESS)
3503 result = active ? DNS_R_EMPTYNAME :
3506 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3508 } else if (result != ISC_R_SUCCESS)
3513 * We have found a node whose name is the desired name, or we
3514 * have matched a wildcard.
3517 if (search.zonecut != NULL) {
3519 * If we're beneath a zone cut, we don't want to look for
3520 * CNAMEs because they're not legitimate zone glue.
3522 cname_ok = ISC_FALSE;
3525 * The node may be a zone cut itself. If it might be one,
3526 * make sure we check for it later.
3528 if (node->find_callback &&
3529 (node != search.rbtdb->origin_node ||
3530 IS_STUB(search.rbtdb)) &&
3531 !dns_rdatatype_atparent(type))
3532 maybe_zonecut = ISC_TRUE;
3536 * Certain DNSSEC types are not subject to CNAME matching
3537 * (RFC4035, section 2.5 and RFC3007).
3539 * We don't check for RRSIG, because we don't store RRSIG records
3542 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3543 cname_ok = ISC_FALSE;
3546 * We now go looking for rdata...
3549 NODE_LOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3550 isc_rwlocktype_read);
3554 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3558 empty_node = ISC_TRUE;
3559 for (header = node->data; header != NULL; header = header_next) {
3560 header_next = header->next;
3562 * Look for an active, extant rdataset.
3565 if (header->serial <= search.serial &&
3568 * Is this a "this rdataset doesn't
3571 if (NONEXISTENT(header))
3575 header = header->down;
3576 } while (header != NULL);
3577 if (header != NULL) {
3579 * We now know that there is at least one active
3580 * rdataset at this node.
3582 empty_node = ISC_FALSE;
3585 * Do special zone cut handling, if requested.
3587 if (maybe_zonecut &&
3588 header->type == dns_rdatatype_ns) {
3590 * We increment the reference count on node to
3591 * ensure that search->zonecut_rdataset will
3592 * still be valid later.
3594 new_reference(search.rbtdb, node);
3595 search.zonecut = node;
3596 search.zonecut_rdataset = header;
3597 search.zonecut_sigrdataset = NULL;
3598 search.need_cleanup = ISC_TRUE;
3599 maybe_zonecut = ISC_FALSE;
3600 at_zonecut = ISC_TRUE;
3602 * It is not clear if KEY should still be
3603 * allowed at the parent side of the zone
3604 * cut or not. It is needed for RFC3007
3605 * validated updates.
3607 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3608 && type != dns_rdatatype_nsec
3609 && type != dns_rdatatype_key) {
3611 * Glue is not OK, but any answer we
3612 * could return would be glue. Return
3618 if (found != NULL && foundsig != NULL)
3624 * If the NSEC3 record doesn't match the chain
3625 * we are using behave as if it isn't here.
3627 if (header->type == dns_rdatatype_nsec3 &&
3628 !matchparams(header, &search))
3631 * If we found a type we were looking for,
3634 if (header->type == type ||
3635 type == dns_rdatatype_any ||
3636 (header->type == dns_rdatatype_cname &&
3639 * We've found the answer!
3642 if (header->type == dns_rdatatype_cname &&
3645 * We may be finding a CNAME instead
3646 * of the desired type.
3648 * If we've already got the CNAME RRSIG,
3649 * use it, otherwise change sigtype
3650 * so that we find it.
3652 if (cnamesig != NULL)
3653 foundsig = cnamesig;
3656 RBTDB_RDATATYPE_SIGCNAME;
3659 * If we've got all we need, end the search.
3661 if (!maybe_zonecut && foundsig != NULL)
3663 } else if (header->type == sigtype) {
3665 * We've found the RRSIG rdataset for our
3666 * target type. Remember it.
3670 * If we've got all we need, end the search.
3672 if (!maybe_zonecut && found != NULL)
3674 } else if (header->type == dns_rdatatype_nsec &&
3675 !search.rbtversion->havensec3) {
3677 * Remember a NSEC rdataset even if we're
3678 * not specifically looking for it, because
3679 * we might need it later.
3681 nsecheader = header;
3682 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3683 !search.rbtversion->havensec3) {
3685 * If we need the NSEC rdataset, we'll also
3686 * need its signature.
3689 } else if (cname_ok &&
3690 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3692 * If we get a CNAME match, we'll also need
3702 * We have an exact match for the name, but there are no
3703 * active rdatasets in the desired version. That means that
3704 * this node doesn't exist in the desired version, and that
3705 * we really have a partial match.
3708 lock = &search.rbtdb->node_locks[node->locknum].lock;
3709 NODE_UNLOCK(lock, isc_rwlocktype_read);
3715 * If we didn't find what we were looking for...
3717 if (found == NULL) {
3718 if (search.zonecut != NULL) {
3720 * We were trying to find glue at a node beneath a
3721 * zone cut, but didn't.
3723 * Return the delegation.
3725 lock = &search.rbtdb->node_locks[node->locknum].lock;
3726 NODE_UNLOCK(lock, isc_rwlocktype_read);
3727 result = setup_delegation(&search, nodep, foundname,
3728 rdataset, sigrdataset);
3732 * The desired type doesn't exist.
3734 result = DNS_R_NXRRSET;
3735 if (search.rbtversion->secure == dns_db_secure &&
3736 !search.rbtversion->havensec3 &&
3737 (nsecheader == NULL || nsecsig == NULL)) {
3739 * The zone is secure but there's no NSEC,
3740 * or the NSEC has no signature!
3743 result = DNS_R_BADDB;
3747 lock = &search.rbtdb->node_locks[node->locknum].lock;
3748 NODE_UNLOCK(lock, isc_rwlocktype_read);
3749 result = find_closest_nsec(&search, nodep, foundname,
3750 rdataset, sigrdataset,
3752 search.rbtversion->secure);
3753 if (result == ISC_R_SUCCESS)
3754 result = DNS_R_EMPTYWILD;
3757 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3761 * There's no NSEC record, and we were told
3764 result = DNS_R_BADDB;
3767 if (nodep != NULL) {
3768 new_reference(search.rbtdb, node);
3771 if ((search.rbtversion->secure == dns_db_secure &&
3772 !search.rbtversion->havensec3) ||
3773 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3775 bind_rdataset(search.rbtdb, node, nsecheader,
3777 if (nsecsig != NULL)
3778 bind_rdataset(search.rbtdb, node,
3779 nsecsig, 0, sigrdataset);
3782 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3787 * We found what we were looking for, or we found a CNAME.
3790 if (type != found->type &&
3791 type != dns_rdatatype_any &&
3792 found->type == dns_rdatatype_cname) {
3794 * We weren't doing an ANY query and we found a CNAME instead
3795 * of the type we were looking for, so we need to indicate
3796 * that result to the caller.
3798 result = DNS_R_CNAME;
3799 } else if (search.zonecut != NULL) {
3801 * If we're beneath a zone cut, we must indicate that the
3802 * result is glue, unless we're actually at the zone cut
3803 * and the type is NSEC or KEY.
3805 if (search.zonecut == node) {
3807 * It is not clear if KEY should still be
3808 * allowed at the parent side of the zone
3809 * cut or not. It is needed for RFC3007
3810 * validated updates.
3812 if (type == dns_rdatatype_nsec ||
3813 type == dns_rdatatype_nsec3 ||
3814 type == dns_rdatatype_key)
3815 result = ISC_R_SUCCESS;
3816 else if (type == dns_rdatatype_any)
3817 result = DNS_R_ZONECUT;
3819 result = DNS_R_GLUE;
3821 result = DNS_R_GLUE;
3823 * We might have found data that isn't glue, but was occluded
3824 * by a dynamic update. If the caller cares about this, they
3825 * will have told us to validate glue.
3827 * XXX We should cache the glue validity state!
3829 if (result == DNS_R_GLUE &&
3830 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3831 !valid_glue(&search, foundname, type, node)) {
3832 lock = &search.rbtdb->node_locks[node->locknum].lock;
3833 NODE_UNLOCK(lock, isc_rwlocktype_read);
3834 result = setup_delegation(&search, nodep, foundname,
3835 rdataset, sigrdataset);
3840 * An ordinary successful query!
3842 result = ISC_R_SUCCESS;
3845 if (nodep != NULL) {
3847 new_reference(search.rbtdb, node);
3849 search.need_cleanup = ISC_FALSE;
3853 if (type != dns_rdatatype_any) {
3854 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3855 if (foundsig != NULL)
3856 bind_rdataset(search.rbtdb, node, foundsig, 0,
3861 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3864 NODE_UNLOCK(&(search.rbtdb->node_locks[node->locknum].lock),
3865 isc_rwlocktype_read);
3868 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3871 * If we found a zonecut but aren't going to use it, we have to
3874 if (search.need_cleanup) {
3875 node = search.zonecut;
3876 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3878 NODE_LOCK(lock, isc_rwlocktype_read);
3879 decrement_reference(search.rbtdb, node, 0,
3880 isc_rwlocktype_read, isc_rwlocktype_none,
3882 NODE_UNLOCK(lock, isc_rwlocktype_read);
3886 closeversion(db, &version, ISC_FALSE);
3888 dns_rbtnodechain_reset(&search.chain);
3894 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3895 isc_stdtime_t now, dns_dbnode_t **nodep,
3896 dns_name_t *foundname,
3897 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3906 UNUSED(sigrdataset);
3908 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3910 return (ISC_R_NOTIMPLEMENTED);
3914 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3915 rbtdb_search_t *search = arg;
3916 rdatasetheader_t *header, *header_prev, *header_next;
3917 rdatasetheader_t *dname_header, *sigdname_header;
3918 isc_result_t result;
3920 isc_rwlocktype_t locktype;
3924 REQUIRE(search->zonecut == NULL);
3927 * Keep compiler silent.
3931 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3932 locktype = isc_rwlocktype_read;
3933 NODE_LOCK(lock, locktype);
3936 * Look for a DNAME or RRSIG DNAME rdataset.
3938 dname_header = NULL;
3939 sigdname_header = NULL;
3941 for (header = node->data; header != NULL; header = header_next) {
3942 header_next = header->next;
3943 if (header->rdh_ttl <= search->now) {
3945 * This rdataset is stale. If no one else is
3946 * using the node, we can clean it up right
3947 * now, otherwise we mark it as stale, and
3948 * the node as dirty, so it will get cleaned
3951 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3952 (locktype == isc_rwlocktype_write ||
3953 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3955 * We update the node's status only when we
3956 * can get write access; otherwise, we leave
3957 * others to this work. Periodical cleaning
3958 * will eventually take the job as the last
3960 * We won't downgrade the lock, since other
3961 * rdatasets are probably stale, too.
3963 locktype = isc_rwlocktype_write;
3965 if (dns_rbtnode_refcurrent(node) == 0) {
3969 * header->down can be non-NULL if the
3970 * refcount has just decremented to 0
3971 * but decrement_reference() has not
3972 * performed clean_cache_node(), in
3973 * which case we need to purge the
3974 * stale headers first.
3976 mctx = search->rbtdb->common.mctx;
3977 clean_stale_headers(search->rbtdb,
3980 if (header_prev != NULL)
3984 node->data = header->next;
3985 free_rdataset(search->rbtdb, mctx,
3988 header->attributes |=
3989 RDATASET_ATTR_STALE;
3991 header_prev = header;
3994 header_prev = header;
3995 } else if (header->type == dns_rdatatype_dname &&
3997 dname_header = header;
3998 header_prev = header;
3999 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4001 sigdname_header = header;
4002 header_prev = header;
4004 header_prev = header;
4007 if (dname_header != NULL &&
4008 (!DNS_TRUST_PENDING(dname_header->trust) ||
4009 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4011 * We increment the reference count on node to ensure that
4012 * search->zonecut_rdataset will still be valid later.
4014 new_reference(search->rbtdb, node);
4015 INSIST(!ISC_LINK_LINKED(node, deadlink));
4016 search->zonecut = node;
4017 search->zonecut_rdataset = dname_header;
4018 search->zonecut_sigrdataset = sigdname_header;
4019 search->need_cleanup = ISC_TRUE;
4020 result = DNS_R_PARTIALMATCH;
4022 result = DNS_R_CONTINUE;
4024 NODE_UNLOCK(lock, locktype);
4029 static inline isc_result_t
4030 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4031 dns_dbnode_t **nodep, dns_name_t *foundname,
4032 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4035 dns_rbtnode_t *level_node;
4036 rdatasetheader_t *header, *header_prev, *header_next;
4037 rdatasetheader_t *found, *foundsig;
4038 isc_result_t result = ISC_R_NOTFOUND;
4043 isc_rwlocktype_t locktype;
4046 * Caller must be holding the tree lock.
4049 rbtdb = search->rbtdb;
4050 i = search->chain.level_matches;
4053 locktype = isc_rwlocktype_read;
4054 lock = &rbtdb->node_locks[node->locknum].lock;
4055 NODE_LOCK(lock, locktype);
4058 * Look for NS and RRSIG NS rdatasets.
4063 for (header = node->data;
4065 header = header_next) {
4066 header_next = header->next;
4067 if (header->rdh_ttl <= search->now) {
4069 * This rdataset is stale. If no one else is
4070 * using the node, we can clean it up right
4071 * now, otherwise we mark it as stale, and
4072 * the node as dirty, so it will get cleaned
4075 if ((header->rdh_ttl <= search->now -
4077 (locktype == isc_rwlocktype_write ||
4078 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4080 * We update the node's status only
4081 * when we can get write access.
4083 locktype = isc_rwlocktype_write;
4085 if (dns_rbtnode_refcurrent(node)
4089 m = search->rbtdb->common.mctx;
4090 clean_stale_headers(
4093 if (header_prev != NULL)
4099 free_rdataset(rbtdb, m,
4102 header->attributes |=
4103 RDATASET_ATTR_STALE;
4105 header_prev = header;
4108 header_prev = header;
4109 } else if (EXISTS(header)) {
4111 * We've found an extant rdataset. See if
4112 * we're interested in it.
4114 if (header->type == dns_rdatatype_ns) {
4116 if (foundsig != NULL)
4118 } else if (header->type ==
4119 RBTDB_RDATATYPE_SIGNS) {
4124 header_prev = header;
4126 header_prev = header;
4129 if (found != NULL) {
4131 * If we have to set foundname, we do it before
4132 * anything else. If we were to set foundname after
4133 * we had set nodep or bound the rdataset, then we'd
4134 * have to undo that work if dns_name_concatenate()
4135 * failed. By setting foundname first, there's
4136 * nothing to undo if we have trouble.
4138 if (foundname != NULL) {
4139 dns_name_init(&name, NULL);
4140 dns_rbt_namefromnode(node, &name);
4141 result = dns_name_copy(&name, foundname, NULL);
4142 while (result == ISC_R_SUCCESS && i > 0) {
4144 level_node = search->chain.levels[i];
4145 dns_name_init(&name, NULL);
4146 dns_rbt_namefromnode(level_node,
4149 dns_name_concatenate(foundname,
4154 if (result != ISC_R_SUCCESS) {
4159 result = DNS_R_DELEGATION;
4160 if (nodep != NULL) {
4161 new_reference(search->rbtdb, node);
4164 bind_rdataset(search->rbtdb, node, found, search->now,
4166 if (foundsig != NULL)
4167 bind_rdataset(search->rbtdb, node, foundsig,
4168 search->now, sigrdataset);
4169 if (need_headerupdate(found, search->now) ||
4170 (foundsig != NULL &&
4171 need_headerupdate(foundsig, search->now))) {
4172 if (locktype != isc_rwlocktype_write) {
4173 NODE_UNLOCK(lock, locktype);
4174 NODE_LOCK(lock, isc_rwlocktype_write);
4175 locktype = isc_rwlocktype_write;
4177 if (need_headerupdate(found, search->now))
4178 update_header(search->rbtdb, found,
4180 if (foundsig != NULL &&
4181 need_headerupdate(foundsig, search->now)) {
4182 update_header(search->rbtdb, foundsig,
4189 NODE_UNLOCK(lock, locktype);
4191 if (found == NULL && i > 0) {
4193 node = search->chain.levels[i];
4203 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4204 isc_stdtime_t now, dns_name_t *foundname,
4205 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4207 dns_rbtnode_t *node;
4208 rdatasetheader_t *header, *header_next, *header_prev;
4209 rdatasetheader_t *found, *foundsig;
4210 isc_boolean_t empty_node;
4211 isc_result_t result;
4212 dns_fixedname_t fname, forigin;
4213 dns_name_t *name, *origin;
4214 rbtdb_rdatatype_t matchtype, sigmatchtype;
4216 isc_rwlocktype_t locktype;
4218 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4219 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4220 dns_rdatatype_nsec);
4224 dns_fixedname_init(&fname);
4225 name = dns_fixedname_name(&fname);
4226 dns_fixedname_init(&forigin);
4227 origin = dns_fixedname_name(&forigin);
4228 result = dns_rbtnodechain_current(&search->chain, name,
4230 if (result != ISC_R_SUCCESS)
4232 locktype = isc_rwlocktype_read;
4233 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4234 NODE_LOCK(lock, locktype);
4237 empty_node = ISC_TRUE;
4239 for (header = node->data;
4241 header = header_next) {
4242 header_next = header->next;
4243 if (header->rdh_ttl <= now) {
4245 * This rdataset is stale. If no one else is
4246 * using the node, we can clean it up right
4247 * now, otherwise we mark it as stale, and the
4248 * node as dirty, so it will get cleaned up
4251 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4252 (locktype == isc_rwlocktype_write ||
4253 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4255 * We update the node's status only
4256 * when we can get write access.
4258 locktype = isc_rwlocktype_write;
4260 if (dns_rbtnode_refcurrent(node)
4264 m = search->rbtdb->common.mctx;
4265 clean_stale_headers(
4268 if (header_prev != NULL)
4272 node->data = header->next;
4273 free_rdataset(search->rbtdb, m,
4276 header->attributes |=
4277 RDATASET_ATTR_STALE;
4279 header_prev = header;
4282 header_prev = header;
4285 if (NONEXISTENT(header) ||
4286 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4287 header_prev = header;
4290 empty_node = ISC_FALSE;
4291 if (header->type == matchtype)
4293 else if (header->type == sigmatchtype)
4295 header_prev = header;
4297 if (found != NULL) {
4298 result = dns_name_concatenate(name, origin,
4300 if (result != ISC_R_SUCCESS)
4302 bind_rdataset(search->rbtdb, node, found,
4304 if (foundsig != NULL)
4305 bind_rdataset(search->rbtdb, node, foundsig,
4307 new_reference(search->rbtdb, node);
4309 result = DNS_R_COVERINGNSEC;
4310 } else if (!empty_node) {
4311 result = ISC_R_NOTFOUND;
4313 result = dns_rbtnodechain_prev(&search->chain, NULL,
4316 NODE_UNLOCK(lock, locktype);
4317 } while (empty_node && result == ISC_R_SUCCESS);
4322 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4323 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4324 dns_dbnode_t **nodep, dns_name_t *foundname,
4325 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4327 dns_rbtnode_t *node = NULL;
4328 isc_result_t result;
4329 rbtdb_search_t search;
4330 isc_boolean_t cname_ok = ISC_TRUE;
4331 isc_boolean_t empty_node;
4333 isc_rwlocktype_t locktype;
4334 rdatasetheader_t *header, *header_prev, *header_next;
4335 rdatasetheader_t *found, *nsheader;
4336 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4337 rdatasetheader_t *update, *updatesig;
4338 rbtdb_rdatatype_t sigtype, negtype;
4342 search.rbtdb = (dns_rbtdb_t *)db;
4344 REQUIRE(VALID_RBTDB(search.rbtdb));
4345 REQUIRE(version == NULL);
4348 isc_stdtime_get(&now);
4350 search.rbtversion = NULL;
4352 search.options = options;
4353 search.copy_name = ISC_FALSE;
4354 search.need_cleanup = ISC_FALSE;
4355 search.wild = ISC_FALSE;
4356 search.zonecut = NULL;
4357 dns_fixedname_init(&search.zonecut_name);
4358 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4363 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4366 * Search down from the root of the tree. If, while going down, we
4367 * encounter a callback node, cache_zonecut_callback() will search the
4368 * rdatasets at the zone cut for a DNAME rdataset.
4370 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4371 &search.chain, DNS_RBTFIND_EMPTYDATA,
4372 cache_zonecut_callback, &search);
4374 if (result == DNS_R_PARTIALMATCH) {
4375 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4376 result = find_coveringnsec(&search, nodep, now,
4377 foundname, rdataset,
4379 if (result == DNS_R_COVERINGNSEC)
4382 if (search.zonecut != NULL) {
4383 result = setup_delegation(&search, nodep, foundname,
4384 rdataset, sigrdataset);
4388 result = find_deepest_zonecut(&search, node, nodep,
4389 foundname, rdataset,
4393 } else if (result != ISC_R_SUCCESS)
4397 * Certain DNSSEC types are not subject to CNAME matching
4398 * (RFC4035, section 2.5 and RFC3007).
4400 * We don't check for RRSIG, because we don't store RRSIG records
4403 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4404 cname_ok = ISC_FALSE;
4407 * We now go looking for rdata...
4410 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4411 locktype = isc_rwlocktype_read;
4412 NODE_LOCK(lock, locktype);
4416 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4417 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4421 empty_node = ISC_TRUE;
4423 for (header = node->data; header != NULL; header = header_next) {
4424 header_next = header->next;
4425 if (header->rdh_ttl <= now) {
4427 * This rdataset is stale. If no one else is using the
4428 * node, we can clean it up right now, otherwise we
4429 * mark it as stale, and the node as dirty, so it will
4430 * get cleaned up later.
4432 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4433 (locktype == isc_rwlocktype_write ||
4434 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4436 * We update the node's status only when we
4437 * can get write access.
4439 locktype = isc_rwlocktype_write;
4441 if (dns_rbtnode_refcurrent(node) == 0) {
4444 mctx = search.rbtdb->common.mctx;
4445 clean_stale_headers(search.rbtdb, mctx,
4447 if (header_prev != NULL)
4451 node->data = header->next;
4452 free_rdataset(search.rbtdb, mctx,
4455 header->attributes |=
4456 RDATASET_ATTR_STALE;
4458 header_prev = header;
4461 header_prev = header;
4462 } else if (EXISTS(header)) {
4464 * We now know that there is at least one active
4465 * non-stale rdataset at this node.
4467 empty_node = ISC_FALSE;
4470 * If we found a type we were looking for, remember
4473 if (header->type == type ||
4474 (type == dns_rdatatype_any &&
4475 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4476 (cname_ok && header->type ==
4477 dns_rdatatype_cname)) {
4479 * We've found the answer.
4482 if (header->type == dns_rdatatype_cname &&
4486 * If we've already got the CNAME RRSIG,
4487 * use it, otherwise change sigtype
4488 * so that we find it.
4490 if (cnamesig != NULL)
4491 foundsig = cnamesig;
4494 RBTDB_RDATATYPE_SIGCNAME;
4495 foundsig = cnamesig;
4497 } else if (header->type == sigtype) {
4499 * We've found the RRSIG rdataset for our
4500 * target type. Remember it.
4503 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4504 header->type == negtype) {
4506 * We've found a negative cache entry.
4509 } else if (header->type == dns_rdatatype_ns) {
4511 * Remember a NS rdataset even if we're
4512 * not specifically looking for it, because
4513 * we might need it later.
4516 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4518 * If we need the NS rdataset, we'll also
4519 * need its signature.
4522 } else if (cname_ok &&
4523 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4525 * If we get a CNAME match, we'll also need
4530 header_prev = header;
4532 header_prev = header;
4537 * We have an exact match for the name, but there are no
4538 * extant rdatasets. That means that this node doesn't
4539 * meaningfully exist, and that we really have a partial match.
4541 NODE_UNLOCK(lock, locktype);
4546 * If we didn't find what we were looking for...
4548 if (found == NULL ||
4549 (found->trust == dns_trust_glue &&
4550 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4551 (DNS_TRUST_PENDING(found->trust) &&
4552 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4554 * If there is an NS rdataset at this node, then this is the
4557 if (nsheader != NULL) {
4558 if (nodep != NULL) {
4559 new_reference(search.rbtdb, node);
4560 INSIST(!ISC_LINK_LINKED(node, deadlink));
4563 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4565 if (need_headerupdate(nsheader, search.now))
4567 if (nssig != NULL) {
4568 bind_rdataset(search.rbtdb, node, nssig,
4569 search.now, sigrdataset);
4570 if (need_headerupdate(nssig, search.now))
4573 result = DNS_R_DELEGATION;
4578 * Go find the deepest zone cut.
4580 NODE_UNLOCK(lock, locktype);
4585 * We found what we were looking for, or we found a CNAME.
4588 if (nodep != NULL) {
4589 new_reference(search.rbtdb, node);
4590 INSIST(!ISC_LINK_LINKED(node, deadlink));
4594 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4596 * We found a negative cache entry.
4598 if (NXDOMAIN(found))
4599 result = DNS_R_NCACHENXDOMAIN;
4601 result = DNS_R_NCACHENXRRSET;
4602 } else if (type != found->type &&
4603 type != dns_rdatatype_any &&
4604 found->type == dns_rdatatype_cname) {
4606 * We weren't doing an ANY query and we found a CNAME instead
4607 * of the type we were looking for, so we need to indicate
4608 * that result to the caller.
4610 result = DNS_R_CNAME;
4613 * An ordinary successful query!
4615 result = ISC_R_SUCCESS;
4618 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4619 result == DNS_R_NCACHENXRRSET) {
4620 bind_rdataset(search.rbtdb, node, found, search.now,
4622 if (need_headerupdate(found, search.now))
4624 if (foundsig != NULL) {
4625 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4627 if (need_headerupdate(foundsig, search.now))
4628 updatesig = foundsig;
4633 if ((update != NULL || updatesig != NULL) &&
4634 locktype != isc_rwlocktype_write) {
4635 NODE_UNLOCK(lock, locktype);
4636 NODE_LOCK(lock, isc_rwlocktype_write);
4637 locktype = isc_rwlocktype_write;
4639 if (update != NULL && need_headerupdate(update, search.now))
4640 update_header(search.rbtdb, update, search.now);
4641 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4642 update_header(search.rbtdb, updatesig, search.now);
4644 NODE_UNLOCK(lock, locktype);
4647 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4650 * If we found a zonecut but aren't going to use it, we have to
4653 if (search.need_cleanup) {
4654 node = search.zonecut;
4655 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4657 NODE_LOCK(lock, isc_rwlocktype_read);
4658 decrement_reference(search.rbtdb, node, 0,
4659 isc_rwlocktype_read, isc_rwlocktype_none,
4661 NODE_UNLOCK(lock, isc_rwlocktype_read);
4664 dns_rbtnodechain_reset(&search.chain);
4670 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4671 isc_stdtime_t now, dns_dbnode_t **nodep,
4672 dns_name_t *foundname,
4673 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4675 dns_rbtnode_t *node = NULL;
4677 isc_result_t result;
4678 rbtdb_search_t search;
4679 rdatasetheader_t *header, *header_prev, *header_next;
4680 rdatasetheader_t *found, *foundsig;
4681 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4682 isc_rwlocktype_t locktype;
4684 search.rbtdb = (dns_rbtdb_t *)db;
4686 REQUIRE(VALID_RBTDB(search.rbtdb));
4689 isc_stdtime_get(&now);
4691 search.rbtversion = NULL;
4693 search.options = options;
4694 search.copy_name = ISC_FALSE;
4695 search.need_cleanup = ISC_FALSE;
4696 search.wild = ISC_FALSE;
4697 search.zonecut = NULL;
4698 dns_fixedname_init(&search.zonecut_name);
4699 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4702 if ((options & DNS_DBFIND_NOEXACT) != 0)
4703 rbtoptions |= DNS_RBTFIND_NOEXACT;
4705 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4708 * Search down from the root of the tree.
4710 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4711 &search.chain, rbtoptions, NULL, &search);
4713 if (result == DNS_R_PARTIALMATCH) {
4715 result = find_deepest_zonecut(&search, node, nodep, foundname,
4716 rdataset, sigrdataset);
4718 } else if (result != ISC_R_SUCCESS)
4722 * We now go looking for an NS rdataset at the node.
4725 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4726 locktype = isc_rwlocktype_read;
4727 NODE_LOCK(lock, locktype);
4732 for (header = node->data; header != NULL; header = header_next) {
4733 header_next = header->next;
4734 if (header->rdh_ttl <= now) {
4736 * This rdataset is stale. If no one else is using the
4737 * node, we can clean it up right now, otherwise we
4738 * mark it as stale, and the node as dirty, so it will
4739 * get cleaned up later.
4741 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4742 (locktype == isc_rwlocktype_write ||
4743 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4745 * We update the node's status only when we
4746 * can get write access.
4748 locktype = isc_rwlocktype_write;
4750 if (dns_rbtnode_refcurrent(node) == 0) {
4753 mctx = search.rbtdb->common.mctx;
4754 clean_stale_headers(search.rbtdb, mctx,
4756 if (header_prev != NULL)
4760 node->data = header->next;
4761 free_rdataset(search.rbtdb, mctx,
4764 header->attributes |=
4765 RDATASET_ATTR_STALE;
4767 header_prev = header;
4770 header_prev = header;
4771 } else if (EXISTS(header)) {
4773 * If we found a type we were looking for, remember
4776 if (header->type == dns_rdatatype_ns) {
4778 * Remember a NS rdataset even if we're
4779 * not specifically looking for it, because
4780 * we might need it later.
4783 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4785 * If we need the NS rdataset, we'll also
4786 * need its signature.
4790 header_prev = header;
4792 header_prev = header;
4795 if (found == NULL) {
4797 * No NS records here.
4799 NODE_UNLOCK(lock, locktype);
4803 if (nodep != NULL) {
4804 new_reference(search.rbtdb, node);
4805 INSIST(!ISC_LINK_LINKED(node, deadlink));
4809 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4810 if (foundsig != NULL)
4811 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4814 if (need_headerupdate(found, search.now) ||
4815 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4816 if (locktype != isc_rwlocktype_write) {
4817 NODE_UNLOCK(lock, locktype);
4818 NODE_LOCK(lock, isc_rwlocktype_write);
4819 locktype = isc_rwlocktype_write;
4821 if (need_headerupdate(found, search.now))
4822 update_header(search.rbtdb, found, search.now);
4823 if (foundsig != NULL &&
4824 need_headerupdate(foundsig, search.now)) {
4825 update_header(search.rbtdb, foundsig, search.now);
4829 NODE_UNLOCK(lock, locktype);
4832 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4834 INSIST(!search.need_cleanup);
4836 dns_rbtnodechain_reset(&search.chain);
4838 if (result == DNS_R_DELEGATION)
4839 result = ISC_R_SUCCESS;
4845 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4846 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4847 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4850 REQUIRE(VALID_RBTDB(rbtdb));
4851 REQUIRE(targetp != NULL && *targetp == NULL);
4853 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4854 dns_rbtnode_refincrement(node, &refs);
4856 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4862 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4863 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4864 dns_rbtnode_t *node;
4865 isc_boolean_t want_free = ISC_FALSE;
4866 isc_boolean_t inactive = ISC_FALSE;
4867 rbtdb_nodelock_t *nodelock;
4869 REQUIRE(VALID_RBTDB(rbtdb));
4870 REQUIRE(targetp != NULL && *targetp != NULL);
4872 node = (dns_rbtnode_t *)(*targetp);
4873 nodelock = &rbtdb->node_locks[node->locknum];
4875 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4877 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4878 isc_rwlocktype_none, ISC_FALSE)) {
4879 if (isc_refcount_current(&nodelock->references) == 0 &&
4880 nodelock->exiting) {
4881 inactive = ISC_TRUE;
4885 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4890 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4892 if (rbtdb->active == 0)
4893 want_free = ISC_TRUE;
4894 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4896 char buf[DNS_NAME_FORMATSIZE];
4897 if (dns_name_dynamic(&rbtdb->common.origin))
4898 dns_name_format(&rbtdb->common.origin, buf,
4901 strcpy(buf, "<UNKNOWN>");
4902 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4903 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4904 "calling free_rbtdb(%s)", buf);
4905 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4911 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4912 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4913 dns_rbtnode_t *rbtnode = node;
4914 rdatasetheader_t *header;
4915 isc_boolean_t force_expire = ISC_FALSE;
4917 * These are the category and module used by the cache cleaner.
4919 isc_boolean_t log = ISC_FALSE;
4920 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4921 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4922 int level = ISC_LOG_DEBUG(2);
4923 char printname[DNS_NAME_FORMATSIZE];
4925 REQUIRE(VALID_RBTDB(rbtdb));
4928 * Caller must hold a tree lock.
4932 isc_stdtime_get(&now);
4934 if (rbtdb->overmem) {
4937 isc_random_get(&val);
4939 * XXXDCL Could stand to have a better policy, like LRU.
4941 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4944 * Note that 'log' can be true IFF rbtdb->overmem is also true.
4945 * rbtdb->overmem can currently only be true for cache
4946 * databases -- hence all of the "overmem cache" log strings.
4948 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4950 isc_log_write(dns_lctx, category, module, level,
4951 "overmem cache: %s %s",
4952 force_expire ? "FORCE" : "check",
4953 dns_rbt_formatnodename(rbtnode,
4955 sizeof(printname)));
4959 * We may not need write access, but this code path is not performance
4960 * sensitive, so it should be okay to always lock as a writer.
4962 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4963 isc_rwlocktype_write);
4965 for (header = rbtnode->data; header != NULL; header = header->next)
4966 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4968 * We don't check if refcurrent(rbtnode) == 0 and try
4969 * to free like we do in cache_find(), because
4970 * refcurrent(rbtnode) must be non-zero. This is so
4971 * because 'node' is an argument to the function.
4973 header->attributes |= RDATASET_ATTR_STALE;
4976 isc_log_write(dns_lctx, category, module,
4977 level, "overmem cache: stale %s",
4979 } else if (force_expire) {
4980 if (! RETAIN(header)) {
4981 set_ttl(rbtdb, header, 0);
4982 header->attributes |= RDATASET_ATTR_STALE;
4985 isc_log_write(dns_lctx, category, module,
4986 level, "overmem cache: "
4987 "reprieve by RETAIN() %s",
4990 } else if (rbtdb->overmem && log)
4991 isc_log_write(dns_lctx, category, module, level,
4992 "overmem cache: saved %s", printname);
4994 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4995 isc_rwlocktype_write);
4997 return (ISC_R_SUCCESS);
5001 overmem(dns_db_t *db, isc_boolean_t overmem) {
5002 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5004 if (IS_CACHE(rbtdb))
5005 rbtdb->overmem = overmem;
5009 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5010 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5011 dns_rbtnode_t *rbtnode = node;
5012 isc_boolean_t first;
5014 REQUIRE(VALID_RBTDB(rbtdb));
5016 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5017 isc_rwlocktype_read);
5019 fprintf(out, "node %p, %u references, locknum = %u\n",
5020 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5022 if (rbtnode->data != NULL) {
5023 rdatasetheader_t *current, *top_next;
5025 for (current = rbtnode->data; current != NULL;
5026 current = top_next) {
5027 top_next = current->next;
5029 fprintf(out, "\ttype %u", current->type);
5035 "\tserial = %lu, ttl = %u, "
5036 "trust = %u, attributes = %u, "
5038 (unsigned long)current->serial,
5041 current->attributes,
5043 current = current->down;
5044 } while (current != NULL);
5047 fprintf(out, "(empty)\n");
5049 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5050 isc_rwlocktype_read);
5054 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5056 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5057 rbtdb_dbiterator_t *rbtdbiter;
5059 REQUIRE(VALID_RBTDB(rbtdb));
5061 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5062 if (rbtdbiter == NULL)
5063 return (ISC_R_NOMEMORY);
5065 rbtdbiter->common.methods = &dbiterator_methods;
5066 rbtdbiter->common.db = NULL;
5067 dns_db_attach(db, &rbtdbiter->common.db);
5068 rbtdbiter->common.relative_names =
5069 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5070 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5071 rbtdbiter->common.cleaning = ISC_FALSE;
5072 rbtdbiter->paused = ISC_TRUE;
5073 rbtdbiter->tree_locked = isc_rwlocktype_none;
5074 rbtdbiter->result = ISC_R_SUCCESS;
5075 dns_fixedname_init(&rbtdbiter->name);
5076 dns_fixedname_init(&rbtdbiter->origin);
5077 rbtdbiter->node = NULL;
5078 rbtdbiter->delete = 0;
5079 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5080 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5081 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5082 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5083 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5084 if (rbtdbiter->nsec3only)
5085 rbtdbiter->current = &rbtdbiter->nsec3chain;
5087 rbtdbiter->current = &rbtdbiter->chain;
5089 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5091 return (ISC_R_SUCCESS);
5095 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5096 dns_rdatatype_t type, dns_rdatatype_t covers,
5097 isc_stdtime_t now, dns_rdataset_t *rdataset,
5098 dns_rdataset_t *sigrdataset)
5100 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5101 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5102 rdatasetheader_t *header, *header_next, *found, *foundsig;
5103 rbtdb_serial_t serial;
5104 rbtdb_version_t *rbtversion = version;
5105 isc_boolean_t close_version = ISC_FALSE;
5106 rbtdb_rdatatype_t matchtype, sigmatchtype;
5108 REQUIRE(VALID_RBTDB(rbtdb));
5109 REQUIRE(type != dns_rdatatype_any);
5111 if (rbtversion == NULL) {
5112 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5113 close_version = ISC_TRUE;
5115 serial = rbtversion->serial;
5118 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5119 isc_rwlocktype_read);
5123 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5125 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5129 for (header = rbtnode->data; header != NULL; header = header_next) {
5130 header_next = header->next;
5132 if (header->serial <= serial &&
5135 * Is this a "this rdataset doesn't
5138 if (NONEXISTENT(header))
5142 header = header->down;
5143 } while (header != NULL);
5144 if (header != NULL) {
5146 * We have an active, extant rdataset. If it's a
5147 * type we're looking for, remember it.
5149 if (header->type == matchtype) {
5151 if (foundsig != NULL)
5153 } else if (header->type == sigmatchtype) {
5160 if (found != NULL) {
5161 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5162 if (foundsig != NULL)
5163 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5167 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5168 isc_rwlocktype_read);
5171 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5175 return (ISC_R_NOTFOUND);
5177 return (ISC_R_SUCCESS);
5181 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5182 dns_rdatatype_t type, dns_rdatatype_t covers,
5183 isc_stdtime_t now, dns_rdataset_t *rdataset,
5184 dns_rdataset_t *sigrdataset)
5186 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5187 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5188 rdatasetheader_t *header, *header_next, *found, *foundsig;
5189 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5190 isc_result_t result;
5192 isc_rwlocktype_t locktype;
5194 REQUIRE(VALID_RBTDB(rbtdb));
5195 REQUIRE(type != dns_rdatatype_any);
5199 result = ISC_R_SUCCESS;
5202 isc_stdtime_get(&now);
5204 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5205 locktype = isc_rwlocktype_read;
5206 NODE_LOCK(lock, locktype);
5210 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5211 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5213 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5217 for (header = rbtnode->data; header != NULL; header = header_next) {
5218 header_next = header->next;
5219 if (header->rdh_ttl <= now) {
5220 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5221 (locktype == isc_rwlocktype_write ||
5222 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5224 * We update the node's status only when we
5225 * can get write access.
5227 locktype = isc_rwlocktype_write;
5230 * We don't check if refcurrent(rbtnode) == 0
5231 * and try to free like we do in cache_find(),
5232 * because refcurrent(rbtnode) must be
5233 * non-zero. This is so because 'node' is an
5234 * argument to the function.
5236 header->attributes |= RDATASET_ATTR_STALE;
5239 } else if (EXISTS(header)) {
5240 if (header->type == matchtype)
5242 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5243 header->type == negtype)
5245 else if (header->type == sigmatchtype)
5249 if (found != NULL) {
5250 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5251 if (foundsig != NULL)
5252 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5256 NODE_UNLOCK(lock, locktype);
5259 return (ISC_R_NOTFOUND);
5261 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5263 * We found a negative cache entry.
5265 if (NXDOMAIN(found))
5266 result = DNS_R_NCACHENXDOMAIN;
5268 result = DNS_R_NCACHENXRRSET;
5275 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5276 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5278 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5279 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5280 rbtdb_version_t *rbtversion = version;
5281 rbtdb_rdatasetiter_t *iterator;
5284 REQUIRE(VALID_RBTDB(rbtdb));
5286 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5287 if (iterator == NULL)
5288 return (ISC_R_NOMEMORY);
5290 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5292 if (rbtversion == NULL)
5294 (dns_dbversion_t **) (void *)(&rbtversion));
5298 isc_refcount_increment(&rbtversion->references,
5304 isc_stdtime_get(&now);
5308 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5309 iterator->common.methods = &rdatasetiter_methods;
5310 iterator->common.db = db;
5311 iterator->common.node = node;
5312 iterator->common.version = (dns_dbversion_t *)rbtversion;
5313 iterator->common.now = now;
5315 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5317 dns_rbtnode_refincrement(rbtnode, &refs);
5320 iterator->current = NULL;
5322 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5324 *iteratorp = (dns_rdatasetiter_t *)iterator;
5326 return (ISC_R_SUCCESS);
5329 static isc_boolean_t
5330 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5331 rdatasetheader_t *header, *header_next;
5332 isc_boolean_t cname, other_data;
5333 dns_rdatatype_t rdtype;
5336 * The caller must hold the node lock.
5340 * Look for CNAME and "other data" rdatasets active in our version.
5343 other_data = ISC_FALSE;
5344 for (header = node->data; header != NULL; header = header_next) {
5345 header_next = header->next;
5346 if (header->type == dns_rdatatype_cname) {
5348 * Look for an active extant CNAME.
5351 if (header->serial <= serial &&
5354 * Is this a "this rdataset doesn't
5357 if (NONEXISTENT(header))
5361 header = header->down;
5362 } while (header != NULL);
5367 * Look for active extant "other data".
5369 * "Other data" is any rdataset whose type is not
5370 * KEY, NSEC, SIG or RRSIG.
5372 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5373 if (rdtype != dns_rdatatype_key &&
5374 rdtype != dns_rdatatype_sig &&
5375 rdtype != dns_rdatatype_nsec &&
5376 rdtype != dns_rdatatype_rrsig) {
5378 * Is it active and extant?
5381 if (header->serial <= serial &&
5384 * Is this a "this rdataset
5385 * doesn't exist" record?
5387 if (NONEXISTENT(header))
5391 header = header->down;
5392 } while (header != NULL);
5394 other_data = ISC_TRUE;
5399 if (cname && other_data)
5406 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5407 isc_result_t result;
5409 INSIST(newheader->heap_index == 0);
5410 INSIST(!ISC_LINK_LINKED(newheader, lru_link));
5411 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5416 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5417 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5418 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5420 rbtdb_changed_t *changed = NULL;
5421 rdatasetheader_t *topheader, *topheader_prev, *header;
5422 unsigned char *merged;
5423 isc_result_t result;
5424 isc_boolean_t header_nx;
5425 isc_boolean_t newheader_nx;
5426 isc_boolean_t merge;
5427 dns_rdatatype_t rdtype, covers;
5428 rbtdb_rdatatype_t negtype;
5433 * Add an rdatasetheader_t to a node.
5437 * Caller must be holding the node lock.
5440 if ((options & DNS_DBADD_MERGE) != 0) {
5441 REQUIRE(rbtversion != NULL);
5446 if ((options & DNS_DBADD_FORCE) != 0)
5447 trust = dns_trust_ultimate;
5449 trust = newheader->trust;
5451 if (rbtversion != NULL && !loading) {
5453 * We always add a changed record, even if no changes end up
5454 * being made to this node, because it's harmless and
5455 * simplifies the code.
5457 changed = add_changed(rbtdb, rbtversion, rbtnode);
5458 if (changed == NULL) {
5459 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5460 return (ISC_R_NOMEMORY);
5464 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5465 topheader_prev = NULL;
5468 if (rbtversion == NULL && !newheader_nx) {
5469 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5472 * We're adding a negative cache entry.
5474 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5475 if (covers == dns_rdatatype_any) {
5477 * We're adding an negative cache entry
5478 * which covers all types (NXDOMAIN,
5479 * NODATA(QTYPE=ANY)).
5481 * We make all other data stale so that the
5482 * only rdataset that can be found at this
5483 * node is the negative cache entry.
5485 for (topheader = rbtnode->data;
5487 topheader = topheader->next) {
5488 set_ttl(rbtdb, topheader, 0);
5489 topheader->attributes |=
5490 RDATASET_ATTR_STALE;
5495 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5498 * We're adding something that isn't a
5499 * negative cache entry. Look for an extant
5500 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5503 for (topheader = rbtnode->data;
5505 topheader = topheader->next) {
5506 if (topheader->type ==
5507 RBTDB_RDATATYPE_NCACHEANY)
5510 if (topheader != NULL && EXISTS(topheader) &&
5511 topheader->rdh_ttl > now) {
5515 if (trust < topheader->trust) {
5517 * The NXDOMAIN/NODATA(QTYPE=ANY)
5520 free_rdataset(rbtdb,
5523 if (addedrdataset != NULL)
5524 bind_rdataset(rbtdb, rbtnode,
5527 return (DNS_R_UNCHANGED);
5530 * The new rdataset is better. Expire the
5531 * NXDOMAIN/NODATA(QTYPE=ANY).
5533 set_ttl(rbtdb, topheader, 0);
5534 topheader->attributes |= RDATASET_ATTR_STALE;
5539 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5543 for (topheader = rbtnode->data;
5545 topheader = topheader->next) {
5546 if (topheader->type == newheader->type ||
5547 topheader->type == negtype)
5549 topheader_prev = topheader;
5554 * If header isn't NULL, we've found the right type. There may be
5555 * IGNORE rdatasets between the top of the chain and the first real
5556 * data. We skip over them.
5559 while (header != NULL && IGNORE(header))
5560 header = header->down;
5561 if (header != NULL) {
5562 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5565 * Deleting an already non-existent rdataset has no effect.
5567 if (header_nx && newheader_nx) {
5568 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5569 return (DNS_R_UNCHANGED);
5573 * Trying to add an rdataset with lower trust to a cache DB
5574 * has no effect, provided that the cache data isn't stale.
5576 if (rbtversion == NULL && trust < header->trust &&
5577 (header->rdh_ttl > now || header_nx)) {
5578 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5579 if (addedrdataset != NULL)
5580 bind_rdataset(rbtdb, rbtnode, header, now,
5582 return (DNS_R_UNCHANGED);
5586 * Don't merge if a nonexistent rdataset is involved.
5588 if (merge && (header_nx || newheader_nx))
5592 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5593 * that is the union of 'newheader' and 'header'.
5596 unsigned int flags = 0;
5597 INSIST(rbtversion->serial >= header->serial);
5599 result = ISC_R_SUCCESS;
5601 if ((options & DNS_DBADD_EXACT) != 0)
5602 flags |= DNS_RDATASLAB_EXACT;
5603 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5604 newheader->rdh_ttl != header->rdh_ttl)
5605 result = DNS_R_NOTEXACT;
5606 else if (newheader->rdh_ttl != header->rdh_ttl)
5607 flags |= DNS_RDATASLAB_FORCE;
5608 if (result == ISC_R_SUCCESS)
5609 result = dns_rdataslab_merge(
5610 (unsigned char *)header,
5611 (unsigned char *)newheader,
5612 (unsigned int)(sizeof(*newheader)),
5614 rbtdb->common.rdclass,
5615 (dns_rdatatype_t)header->type,
5617 if (result == ISC_R_SUCCESS) {
5619 * If 'header' has the same serial number as
5620 * we do, we could clean it up now if we knew
5621 * that our caller had no references to it.
5622 * We don't know this, however, so we leave it
5623 * alone. It will get cleaned up when
5624 * clean_zone_node() runs.
5626 free_rdataset(rbtdb, rbtdb->common.mctx,
5628 newheader = (rdatasetheader_t *)merged;
5629 if (loading && RESIGN(newheader) &&
5631 header->resign < newheader->resign)
5632 newheader->resign = header->resign;
5634 free_rdataset(rbtdb, rbtdb->common.mctx,
5640 * Don't replace existing NS, A and AAAA RRsets
5641 * in the cache if they are already exist. This
5642 * prevents named being locked to old servers.
5643 * Don't lower trust of existing record if the
5646 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5647 header->type == dns_rdatatype_ns &&
5648 !header_nx && !newheader_nx &&
5649 header->trust >= newheader->trust &&
5650 dns_rdataslab_equalx((unsigned char *)header,
5651 (unsigned char *)newheader,
5652 (unsigned int)(sizeof(*newheader)),
5653 rbtdb->common.rdclass,
5654 (dns_rdatatype_t)header->type)) {
5656 * Honour the new ttl if it is less than the
5659 if (header->rdh_ttl > newheader->rdh_ttl)
5660 set_ttl(rbtdb, header, newheader->rdh_ttl);
5661 if (header->noqname == NULL &&
5662 newheader->noqname != NULL) {
5663 header->noqname = newheader->noqname;
5664 newheader->noqname = NULL;
5666 if (header->closest == NULL &&
5667 newheader->closest != NULL) {
5668 header->closest = newheader->closest;
5669 newheader->closest = NULL;
5671 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5672 if (addedrdataset != NULL)
5673 bind_rdataset(rbtdb, rbtnode, header, now,
5675 return (ISC_R_SUCCESS);
5677 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5678 (header->type == dns_rdatatype_a ||
5679 header->type == dns_rdatatype_aaaa) &&
5680 !header_nx && !newheader_nx &&
5681 header->trust >= newheader->trust &&
5682 dns_rdataslab_equal((unsigned char *)header,
5683 (unsigned char *)newheader,
5684 (unsigned int)(sizeof(*newheader)))) {
5686 * Honour the new ttl if it is less than the
5689 if (header->rdh_ttl > newheader->rdh_ttl)
5690 set_ttl(rbtdb, header, newheader->rdh_ttl);
5691 if (header->noqname == NULL &&
5692 newheader->noqname != NULL) {
5693 header->noqname = newheader->noqname;
5694 newheader->noqname = NULL;
5696 if (header->closest == NULL &&
5697 newheader->closest != NULL) {
5698 header->closest = newheader->closest;
5699 newheader->closest = NULL;
5701 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5702 if (addedrdataset != NULL)
5703 bind_rdataset(rbtdb, rbtnode, header, now,
5705 return (ISC_R_SUCCESS);
5707 INSIST(rbtversion == NULL ||
5708 rbtversion->serial >= topheader->serial);
5709 if (topheader_prev != NULL)
5710 topheader_prev->next = newheader;
5712 rbtnode->data = newheader;
5713 newheader->next = topheader->next;
5716 * There are no other references to 'header' when
5717 * loading, so we MAY clean up 'header' now.
5718 * Since we don't generate changed records when
5719 * loading, we MUST clean up 'header' now.
5721 newheader->down = NULL;
5722 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5724 newheader->down = topheader;
5725 topheader->next = newheader;
5727 if (changed != NULL)
5728 changed->dirty = ISC_TRUE;
5729 if (rbtversion == NULL) {
5730 set_ttl(rbtdb, header, 0);
5731 header->attributes |= RDATASET_ATTR_STALE;
5733 idx = newheader->node->locknum;
5734 if (IS_CACHE(rbtdb)) {
5735 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5736 newheader, lru_link);
5738 * XXXMLG We don't check the return value
5739 * here. If it fails, we will not do TTL
5740 * based expiry on this node. However, we
5741 * will do it on the LRU side, so memory
5742 * will not leak... for long.
5744 isc_heap_insert(rbtdb->heaps[idx], newheader);
5745 } else if (RESIGN(newheader))
5746 resign_insert(rbtdb, idx, newheader);
5750 * No non-IGNORED rdatasets of the given type exist at
5755 * If we're trying to delete the type, don't bother.
5758 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5759 return (DNS_R_UNCHANGED);
5762 if (topheader != NULL) {
5764 * We have an list of rdatasets of the given type,
5765 * but they're all marked IGNORE. We simply insert
5766 * the new rdataset at the head of the list.
5768 * Ignored rdatasets cannot occur during loading, so
5772 INSIST(rbtversion == NULL ||
5773 rbtversion->serial >= topheader->serial);
5774 if (topheader_prev != NULL)
5775 topheader_prev->next = newheader;
5777 rbtnode->data = newheader;
5778 newheader->next = topheader->next;
5779 newheader->down = topheader;
5780 topheader->next = newheader;
5782 if (changed != NULL)
5783 changed->dirty = ISC_TRUE;
5786 * No rdatasets of the given type exist at the node.
5788 newheader->next = rbtnode->data;
5789 newheader->down = NULL;
5790 rbtnode->data = newheader;
5792 idx = newheader->node->locknum;
5793 if (IS_CACHE(rbtdb)) {
5794 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5795 newheader, lru_link);
5796 isc_heap_insert(rbtdb->heaps[idx], newheader);
5797 } else if (RESIGN(newheader)) {
5798 resign_insert(rbtdb, idx, newheader);
5803 * Check if the node now contains CNAME and other data.
5805 if (rbtversion != NULL &&
5806 cname_and_other_data(rbtnode, rbtversion->serial))
5807 return (DNS_R_CNAMEANDOTHER);
5809 if (addedrdataset != NULL)
5810 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5812 return (ISC_R_SUCCESS);
5815 static inline isc_boolean_t
5816 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5817 rbtdb_rdatatype_t type)
5819 if (IS_CACHE(rbtdb)) {
5820 if (type == dns_rdatatype_dname)
5824 } else if (type == dns_rdatatype_dname ||
5825 (type == dns_rdatatype_ns &&
5826 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5831 static inline isc_result_t
5832 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5833 dns_rdataset_t *rdataset)
5835 struct noqname *noqname;
5836 isc_mem_t *mctx = rbtdb->common.mctx;
5838 dns_rdataset_t neg, negsig;
5839 isc_result_t result;
5842 dns_name_init(&name, NULL);
5843 dns_rdataset_init(&neg);
5844 dns_rdataset_init(&negsig);
5846 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5847 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5849 noqname = isc_mem_get(mctx, sizeof(*noqname));
5850 if (noqname == NULL) {
5851 result = ISC_R_NOMEMORY;
5854 dns_name_init(&noqname->name, NULL);
5855 noqname->neg = NULL;
5856 noqname->negsig = NULL;
5857 noqname->type = neg.type;
5858 result = dns_name_dup(&name, mctx, &noqname->name);
5859 if (result != ISC_R_SUCCESS)
5861 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5862 if (result != ISC_R_SUCCESS)
5864 noqname->neg = r.base;
5865 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5866 if (result != ISC_R_SUCCESS)
5868 noqname->negsig = r.base;
5869 dns_rdataset_disassociate(&neg);
5870 dns_rdataset_disassociate(&negsig);
5871 newheader->noqname = noqname;
5872 return (ISC_R_SUCCESS);
5875 dns_rdataset_disassociate(&neg);
5876 dns_rdataset_disassociate(&negsig);
5877 free_noqname(mctx, &noqname);
5881 static inline isc_result_t
5882 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5883 dns_rdataset_t *rdataset)
5885 struct noqname *closest;
5886 isc_mem_t *mctx = rbtdb->common.mctx;
5888 dns_rdataset_t neg, negsig;
5889 isc_result_t result;
5892 dns_name_init(&name, NULL);
5893 dns_rdataset_init(&neg);
5894 dns_rdataset_init(&negsig);
5896 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5897 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5899 closest = isc_mem_get(mctx, sizeof(*closest));
5900 if (closest == NULL) {
5901 result = ISC_R_NOMEMORY;
5904 dns_name_init(&closest->name, NULL);
5905 closest->neg = NULL;
5906 closest->negsig = NULL;
5907 closest->type = neg.type;
5908 result = dns_name_dup(&name, mctx, &closest->name);
5909 if (result != ISC_R_SUCCESS)
5911 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5912 if (result != ISC_R_SUCCESS)
5914 closest->neg = r.base;
5915 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5916 if (result != ISC_R_SUCCESS)
5918 closest->negsig = r.base;
5919 dns_rdataset_disassociate(&neg);
5920 dns_rdataset_disassociate(&negsig);
5921 newheader->closest = closest;
5922 return (ISC_R_SUCCESS);
5925 dns_rdataset_disassociate(&neg);
5926 dns_rdataset_disassociate(&negsig);
5927 free_noqname(mctx, &closest);
5931 static dns_dbmethods_t zone_methods;
5934 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5935 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5936 dns_rdataset_t *addedrdataset)
5938 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5939 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5940 rbtdb_version_t *rbtversion = version;
5941 isc_region_t region;
5942 rdatasetheader_t *newheader;
5943 rdatasetheader_t *header;
5944 isc_result_t result;
5945 isc_boolean_t delegating;
5946 isc_boolean_t tree_locked = ISC_FALSE;
5948 REQUIRE(VALID_RBTDB(rbtdb));
5950 if (rbtdb->common.methods == &zone_methods)
5951 REQUIRE(((rbtnode->nsec3 &&
5952 (rdataset->type == dns_rdatatype_nsec3 ||
5953 rdataset->covers == dns_rdatatype_nsec3)) ||
5955 rdataset->type != dns_rdatatype_nsec3 &&
5956 rdataset->covers != dns_rdatatype_nsec3)));
5958 if (rbtversion == NULL) {
5960 isc_stdtime_get(&now);
5964 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5966 sizeof(rdatasetheader_t));
5967 if (result != ISC_R_SUCCESS)
5970 newheader = (rdatasetheader_t *)region.base;
5971 init_rdataset(rbtdb, newheader);
5972 set_ttl(rbtdb, newheader, rdataset->ttl + now);
5973 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5975 newheader->attributes = 0;
5976 newheader->noqname = NULL;
5977 newheader->closest = NULL;
5978 newheader->count = init_count++;
5979 newheader->trust = rdataset->trust;
5980 newheader->additional_auth = NULL;
5981 newheader->additional_glue = NULL;
5982 newheader->last_used = now;
5983 newheader->node = rbtnode;
5984 if (rbtversion != NULL) {
5985 newheader->serial = rbtversion->serial;
5988 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5989 newheader->attributes |= RDATASET_ATTR_RESIGN;
5990 newheader->resign = rdataset->resign;
5992 newheader->resign = 0;
5994 newheader->serial = 1;
5995 newheader->resign = 0;
5996 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5997 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
5998 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
5999 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6000 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6001 result = addnoqname(rbtdb, newheader, rdataset);
6002 if (result != ISC_R_SUCCESS) {
6003 free_rdataset(rbtdb, rbtdb->common.mctx,
6008 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6009 result = addclosest(rbtdb, newheader, rdataset);
6010 if (result != ISC_R_SUCCESS) {
6011 free_rdataset(rbtdb, rbtdb->common.mctx,
6019 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6020 * just DNAME for the cache), then we need to set the callback bit
6023 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6024 delegating = ISC_TRUE;
6026 delegating = ISC_FALSE;
6029 * If we're adding a delegation type or the DB is a cache in an overmem
6030 * state, hold an exclusive lock on the tree. In the latter case
6031 * the lock does not necessarily have to be acquired but it will help
6032 * purge stale entries more effectively.
6034 if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6035 tree_locked = ISC_TRUE;
6036 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6039 if (IS_CACHE(rbtdb) && rbtdb->overmem)
6040 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6042 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6043 isc_rwlocktype_write);
6045 if (rbtdb->rrsetstats != NULL) {
6046 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6047 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6050 if (IS_CACHE(rbtdb)) {
6052 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6054 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6055 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6056 expire_header(rbtdb, header, tree_locked);
6059 * If we've been holding a write lock on the tree just for
6060 * cleaning, we can release it now. However, we still need the
6063 if (tree_locked && !delegating) {
6064 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6065 tree_locked = ISC_FALSE;
6069 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6070 addedrdataset, now);
6071 if (result == ISC_R_SUCCESS && delegating)
6072 rbtnode->find_callback = 1;
6074 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6075 isc_rwlocktype_write);
6078 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6081 * Update the zone's secure status. If version is non-NULL
6082 * this is deferred until closeversion() is called.
6084 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6085 iszonesecure(db, version, rbtdb->origin_node);
6091 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6092 dns_rdataset_t *rdataset, unsigned int options,
6093 dns_rdataset_t *newrdataset)
6095 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6096 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6097 rbtdb_version_t *rbtversion = version;
6098 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6099 unsigned char *subresult;
6100 isc_region_t region;
6101 isc_result_t result;
6102 rbtdb_changed_t *changed;
6104 REQUIRE(VALID_RBTDB(rbtdb));
6106 if (rbtdb->common.methods == &zone_methods)
6107 REQUIRE(((rbtnode->nsec3 &&
6108 (rdataset->type == dns_rdatatype_nsec3 ||
6109 rdataset->covers == dns_rdatatype_nsec3)) ||
6111 rdataset->type != dns_rdatatype_nsec3 &&
6112 rdataset->covers != dns_rdatatype_nsec3)));
6114 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6116 sizeof(rdatasetheader_t));
6117 if (result != ISC_R_SUCCESS)
6119 newheader = (rdatasetheader_t *)region.base;
6120 init_rdataset(rbtdb, newheader);
6121 set_ttl(rbtdb, newheader, rdataset->ttl);
6122 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6124 newheader->attributes = 0;
6125 newheader->serial = rbtversion->serial;
6126 newheader->trust = 0;
6127 newheader->noqname = NULL;
6128 newheader->closest = NULL;
6129 newheader->count = init_count++;
6130 newheader->additional_auth = NULL;
6131 newheader->additional_glue = NULL;
6132 newheader->last_used = 0;
6133 newheader->node = rbtnode;
6134 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6135 newheader->attributes |= RDATASET_ATTR_RESIGN;
6136 newheader->resign = rdataset->resign;
6138 newheader->resign = 0;
6140 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6141 isc_rwlocktype_write);
6143 changed = add_changed(rbtdb, rbtversion, rbtnode);
6144 if (changed == NULL) {
6145 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6146 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6147 isc_rwlocktype_write);
6148 return (ISC_R_NOMEMORY);
6151 topheader_prev = NULL;
6152 for (topheader = rbtnode->data;
6154 topheader = topheader->next) {
6155 if (topheader->type == newheader->type)
6157 topheader_prev = topheader;
6160 * If header isn't NULL, we've found the right type. There may be
6161 * IGNORE rdatasets between the top of the chain and the first real
6162 * data. We skip over them.
6165 while (header != NULL && IGNORE(header))
6166 header = header->down;
6167 if (header != NULL && EXISTS(header)) {
6168 unsigned int flags = 0;
6170 result = ISC_R_SUCCESS;
6171 if ((options & DNS_DBSUB_EXACT) != 0) {
6172 flags |= DNS_RDATASLAB_EXACT;
6173 if (newheader->rdh_ttl != header->rdh_ttl)
6174 result = DNS_R_NOTEXACT;
6176 if (result == ISC_R_SUCCESS)
6177 result = dns_rdataslab_subtract(
6178 (unsigned char *)header,
6179 (unsigned char *)newheader,
6180 (unsigned int)(sizeof(*newheader)),
6182 rbtdb->common.rdclass,
6183 (dns_rdatatype_t)header->type,
6185 if (result == ISC_R_SUCCESS) {
6186 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6187 newheader = (rdatasetheader_t *)subresult;
6188 init_rdataset(rbtdb, newheader);
6190 * We have to set the serial since the rdataslab
6191 * subtraction routine copies the reserved portion of
6192 * header, not newheader.
6194 newheader->serial = rbtversion->serial;
6196 * XXXJT: dns_rdataslab_subtract() copied the pointers
6197 * to additional info. We need to clear these fields
6198 * to avoid having duplicated references.
6200 newheader->additional_auth = NULL;
6201 newheader->additional_glue = NULL;
6202 } else if (result == DNS_R_NXRRSET) {
6204 * This subtraction would remove all of the rdata;
6205 * add a nonexistent header instead.
6207 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6208 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6209 if (newheader == NULL) {
6210 result = ISC_R_NOMEMORY;
6213 set_ttl(rbtdb, newheader, 0);
6214 newheader->type = topheader->type;
6215 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6216 newheader->trust = 0;
6217 newheader->serial = rbtversion->serial;
6218 newheader->noqname = NULL;
6219 newheader->closest = NULL;
6220 newheader->count = 0;
6221 newheader->additional_auth = NULL;
6222 newheader->additional_glue = NULL;
6223 newheader->node = rbtnode;
6224 newheader->resign = 0;
6225 newheader->last_used = 0;
6227 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6232 * If we're here, we want to link newheader in front of
6235 INSIST(rbtversion->serial >= topheader->serial);
6236 if (topheader_prev != NULL)
6237 topheader_prev->next = newheader;
6239 rbtnode->data = newheader;
6240 newheader->next = topheader->next;
6241 newheader->down = topheader;
6242 topheader->next = newheader;
6244 changed->dirty = ISC_TRUE;
6247 * The rdataset doesn't exist, so we don't need to do anything
6248 * to satisfy the deletion request.
6250 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6251 if ((options & DNS_DBSUB_EXACT) != 0)
6252 result = DNS_R_NOTEXACT;
6254 result = DNS_R_UNCHANGED;
6257 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6258 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6261 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6262 isc_rwlocktype_write);
6265 * Update the zone's secure status. If version is non-NULL
6266 * this is deferred until closeversion() is called.
6268 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6269 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6275 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6276 dns_rdatatype_t type, dns_rdatatype_t covers)
6278 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6279 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6280 rbtdb_version_t *rbtversion = version;
6281 isc_result_t result;
6282 rdatasetheader_t *newheader;
6284 REQUIRE(VALID_RBTDB(rbtdb));
6286 if (type == dns_rdatatype_any)
6287 return (ISC_R_NOTIMPLEMENTED);
6288 if (type == dns_rdatatype_rrsig && covers == 0)
6289 return (ISC_R_NOTIMPLEMENTED);
6291 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6292 if (newheader == NULL)
6293 return (ISC_R_NOMEMORY);
6294 set_ttl(rbtdb, newheader, 0);
6295 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6296 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6297 newheader->trust = 0;
6298 newheader->noqname = NULL;
6299 newheader->closest = NULL;
6300 newheader->additional_auth = NULL;
6301 newheader->additional_glue = NULL;
6302 if (rbtversion != NULL)
6303 newheader->serial = rbtversion->serial;
6305 newheader->serial = 0;
6306 newheader->count = 0;
6307 newheader->last_used = 0;
6308 newheader->node = rbtnode;
6310 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6311 isc_rwlocktype_write);
6313 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6314 ISC_FALSE, NULL, 0);
6316 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6317 isc_rwlocktype_write);
6320 * Update the zone's secure status. If version is non-NULL
6321 * this is deferred until closeversion() is called.
6323 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6324 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6330 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6331 rbtdb_load_t *loadctx = arg;
6332 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6333 dns_rbtnode_t *node;
6334 isc_result_t result;
6335 isc_region_t region;
6336 rdatasetheader_t *newheader;
6339 * This routine does no node locking. See comments in
6340 * 'load' below for more information on loading and
6346 * SOA records are only allowed at top of zone.
6348 if (rdataset->type == dns_rdatatype_soa &&
6349 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6350 return (DNS_R_NOTZONETOP);
6352 if (rdataset->type != dns_rdatatype_nsec3 &&
6353 rdataset->covers != dns_rdatatype_nsec3)
6354 add_empty_wildcards(rbtdb, name);
6356 if (dns_name_iswildcard(name)) {
6358 * NS record owners cannot legally be wild cards.
6360 if (rdataset->type == dns_rdatatype_ns)
6361 return (DNS_R_INVALIDNS);
6363 * NSEC3 record owners cannot legally be wild cards.
6365 if (rdataset->type == dns_rdatatype_nsec3)
6366 return (DNS_R_INVALIDNSEC3);
6367 result = add_wildcard_magic(rbtdb, name);
6368 if (result != ISC_R_SUCCESS)
6373 if (rdataset->type == dns_rdatatype_nsec3 ||
6374 rdataset->covers == dns_rdatatype_nsec3) {
6375 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6376 if (result == ISC_R_SUCCESS)
6379 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6380 if (result == ISC_R_SUCCESS)
6383 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6385 if (result != ISC_R_EXISTS) {
6386 dns_name_t foundname;
6387 dns_name_init(&foundname, NULL);
6388 dns_rbt_namefromnode(node, &foundname);
6389 #ifdef DNS_RBT_USEHASH
6390 node->locknum = node->hashval % rbtdb->node_lock_count;
6392 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6393 rbtdb->node_lock_count;
6397 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6399 sizeof(rdatasetheader_t));
6400 if (result != ISC_R_SUCCESS)
6402 newheader = (rdatasetheader_t *)region.base;
6403 init_rdataset(rbtdb, newheader);
6404 set_ttl(rbtdb, newheader,
6405 rdataset->ttl + loadctx->now); /* XXX overflow check */
6406 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6408 newheader->attributes = 0;
6409 newheader->trust = rdataset->trust;
6410 newheader->serial = 1;
6411 newheader->noqname = NULL;
6412 newheader->closest = NULL;
6413 newheader->count = init_count++;
6414 newheader->additional_auth = NULL;
6415 newheader->additional_glue = NULL;
6416 newheader->last_used = 0;
6417 newheader->node = node;
6418 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6419 newheader->attributes |= RDATASET_ATTR_RESIGN;
6420 newheader->resign = rdataset->resign;
6422 newheader->resign = 0;
6424 result = add(rbtdb, node, rbtdb->current_version, newheader,
6425 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6426 if (result == ISC_R_SUCCESS &&
6427 delegating_type(rbtdb, node, rdataset->type))
6428 node->find_callback = 1;
6429 else if (result == DNS_R_UNCHANGED)
6430 result = ISC_R_SUCCESS;
6436 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6437 rbtdb_load_t *loadctx;
6440 rbtdb = (dns_rbtdb_t *)db;
6442 REQUIRE(VALID_RBTDB(rbtdb));
6444 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6445 if (loadctx == NULL)
6446 return (ISC_R_NOMEMORY);
6448 loadctx->rbtdb = rbtdb;
6449 if (IS_CACHE(rbtdb))
6450 isc_stdtime_get(&loadctx->now);
6454 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6456 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6458 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6460 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6462 *addp = loading_addrdataset;
6465 return (ISC_R_SUCCESS);
6469 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6470 rbtdb_load_t *loadctx;
6471 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6473 REQUIRE(VALID_RBTDB(rbtdb));
6474 REQUIRE(dbloadp != NULL);
6476 REQUIRE(loadctx->rbtdb == rbtdb);
6478 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6480 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6481 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6483 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6484 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6486 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6489 * If there's a KEY rdataset at the zone origin containing a
6490 * zone key, we consider the zone secure.
6492 if (! IS_CACHE(rbtdb))
6493 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6497 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6499 return (ISC_R_SUCCESS);
6503 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6504 dns_masterformat_t masterformat) {
6507 rbtdb = (dns_rbtdb_t *)db;
6509 REQUIRE(VALID_RBTDB(rbtdb));
6511 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6512 &dns_master_style_default,
6513 filename, masterformat));
6517 delete_callback(void *data, void *arg) {
6518 dns_rbtdb_t *rbtdb = arg;
6519 rdatasetheader_t *current, *next;
6521 for (current = data; current != NULL; current = next) {
6522 next = current->next;
6523 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6527 static isc_boolean_t
6528 issecure(dns_db_t *db) {
6530 isc_boolean_t secure;
6532 rbtdb = (dns_rbtdb_t *)db;
6534 REQUIRE(VALID_RBTDB(rbtdb));
6536 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6537 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6538 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6543 static isc_boolean_t
6544 isdnssec(dns_db_t *db) {
6546 isc_boolean_t dnssec;
6548 rbtdb = (dns_rbtdb_t *)db;
6550 REQUIRE(VALID_RBTDB(rbtdb));
6552 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6553 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6554 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6560 nodecount(dns_db_t *db) {
6564 rbtdb = (dns_rbtdb_t *)db;
6566 REQUIRE(VALID_RBTDB(rbtdb));
6568 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6569 count = dns_rbt_nodecount(rbtdb->tree);
6570 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6576 settask(dns_db_t *db, isc_task_t *task) {
6579 rbtdb = (dns_rbtdb_t *)db;
6581 REQUIRE(VALID_RBTDB(rbtdb));
6583 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6584 if (rbtdb->task != NULL)
6585 isc_task_detach(&rbtdb->task);
6587 isc_task_attach(task, &rbtdb->task);
6588 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6591 static isc_boolean_t
6592 ispersistent(dns_db_t *db) {
6598 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6599 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6600 dns_rbtnode_t *onode;
6601 isc_result_t result = ISC_R_SUCCESS;
6603 REQUIRE(VALID_RBTDB(rbtdb));
6604 REQUIRE(nodep != NULL && *nodep == NULL);
6606 /* Note that the access to origin_node doesn't require a DB lock */
6607 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6608 if (onode != NULL) {
6609 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6610 new_reference(rbtdb, onode);
6611 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6613 *nodep = rbtdb->origin_node;
6615 INSIST(IS_CACHE(rbtdb));
6616 result = ISC_R_NOTFOUND;
6623 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6624 isc_uint8_t *flags, isc_uint16_t *iterations,
6625 unsigned char *salt, size_t *salt_length)
6628 isc_result_t result = ISC_R_NOTFOUND;
6629 rbtdb_version_t *rbtversion = version;
6631 rbtdb = (dns_rbtdb_t *)db;
6633 REQUIRE(VALID_RBTDB(rbtdb));
6635 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6637 if (rbtversion == NULL)
6638 rbtversion = rbtdb->current_version;
6640 if (rbtversion->havensec3) {
6642 *hash = rbtversion->hash;
6643 if (salt != NULL && salt_length != 0) {
6644 REQUIRE(*salt_length > rbtversion->salt_length);
6645 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6647 if (salt_length != NULL)
6648 *salt_length = rbtversion->salt_length;
6649 if (iterations != NULL)
6650 *iterations = rbtversion->iterations;
6652 *flags = rbtversion->flags;
6653 result = ISC_R_SUCCESS;
6655 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6661 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6662 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6663 isc_stdtime_t oldresign;
6664 isc_result_t result = ISC_R_SUCCESS;
6665 rdatasetheader_t *header;
6667 REQUIRE(VALID_RBTDB(rbtdb));
6668 REQUIRE(!IS_CACHE(rbtdb));
6669 REQUIRE(rdataset != NULL);
6671 header = rdataset->private3;
6674 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6675 isc_rwlocktype_write);
6677 oldresign = header->resign;
6678 header->resign = resign;
6679 if (header->heap_index != 0) {
6680 INSIST(RESIGN(header));
6682 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6683 header->heap_index);
6684 header->heap_index = 0;
6685 } else if (resign < oldresign)
6686 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6687 header->heap_index);
6689 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6690 header->heap_index);
6691 } else if (resign && header->heap_index == 0) {
6692 header->attributes |= RDATASET_ATTR_RESIGN;
6693 result = resign_insert(rbtdb, header->node->locknum, header);
6695 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6696 isc_rwlocktype_write);
6701 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6702 dns_name_t *foundname)
6704 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6705 rdatasetheader_t *header = NULL, *this;
6707 isc_result_t result = ISC_R_NOTFOUND;
6709 REQUIRE(VALID_RBTDB(rbtdb));
6711 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6713 for (i = 0; i < rbtdb->node_lock_count; i++) {
6714 this = isc_heap_element(rbtdb->heaps[i], 1);
6719 else if (isc_serial_lt(this->resign, header->resign))
6726 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6727 isc_rwlocktype_read);
6729 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6731 if (foundname != NULL)
6732 dns_rbt_fullnamefromnode(header->node, foundname);
6734 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6735 isc_rwlocktype_read);
6737 result = ISC_R_SUCCESS;
6740 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6746 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6748 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6749 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6750 dns_rbtnode_t *node;
6751 rdatasetheader_t *header;
6753 REQUIRE(VALID_RBTDB(rbtdb));
6754 REQUIRE(rdataset != NULL);
6755 REQUIRE(rbtdb->future_version == rbtversion);
6756 REQUIRE(rbtversion->writer);
6758 node = rdataset->private2;
6759 header = rdataset->private3;
6762 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6763 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6764 isc_rwlocktype_write);
6766 * Delete from heap and save to re-signed list so that it can
6767 * be restored if we backout of this change.
6769 new_reference(rbtdb, node);
6770 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6771 header->heap_index = 0;
6772 ISC_LIST_APPEND(rbtversion->resigned_list, header, lru_link);
6774 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6775 isc_rwlocktype_write);
6776 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6779 static dns_stats_t *
6780 getrrsetstats(dns_db_t *db) {
6781 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6783 REQUIRE(VALID_RBTDB(rbtdb));
6784 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6786 return (rbtdb->rrsetstats);
6789 static dns_dbmethods_t zone_methods = {
6828 static dns_dbmethods_t cache_methods = {
6868 #ifdef DNS_RBTDB_VERSION64
6873 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6874 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6875 void *driverarg, dns_db_t **dbp)
6878 isc_result_t result;
6881 isc_boolean_t (*sooner)(void *, void *);
6883 /* Keep the compiler happy. */
6888 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6890 return (ISC_R_NOMEMORY);
6892 memset(rbtdb, '\0', sizeof(*rbtdb));
6893 dns_name_init(&rbtdb->common.origin, NULL);
6894 rbtdb->common.attributes = 0;
6895 if (type == dns_dbtype_cache) {
6896 rbtdb->common.methods = &cache_methods;
6897 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6898 } else if (type == dns_dbtype_stub) {
6899 rbtdb->common.methods = &zone_methods;
6900 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6902 rbtdb->common.methods = &zone_methods;
6903 rbtdb->common.rdclass = rdclass;
6904 rbtdb->common.mctx = NULL;
6906 result = RBTDB_INITLOCK(&rbtdb->lock);
6907 if (result != ISC_R_SUCCESS)
6910 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6911 if (result != ISC_R_SUCCESS)
6915 * Initialize node_lock_count in a generic way to support future
6916 * extension which allows the user to specify this value on creation.
6917 * Note that when specified for a cache DB it must be larger than 1
6918 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6920 if (rbtdb->node_lock_count == 0) {
6921 if (IS_CACHE(rbtdb))
6922 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6924 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6925 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6926 result = ISC_R_RANGE;
6927 goto cleanup_tree_lock;
6929 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6930 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6931 sizeof(rbtdb_nodelock_t));
6932 if (rbtdb->node_locks == NULL) {
6933 result = ISC_R_NOMEMORY;
6934 goto cleanup_tree_lock;
6937 rbtdb->rrsetstats = NULL;
6938 if (IS_CACHE(rbtdb)) {
6939 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6940 if (result != ISC_R_SUCCESS)
6941 goto cleanup_node_locks;
6942 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6943 sizeof(rdatasetheaderlist_t));
6944 if (rbtdb->rdatasets == NULL) {
6945 result = ISC_R_NOMEMORY;
6946 goto cleanup_rrsetstats;
6948 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6949 ISC_LIST_INIT(rbtdb->rdatasets[i]);
6951 rbtdb->rdatasets = NULL;
6956 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6957 sizeof(isc_heap_t *));
6958 if (rbtdb->heaps == NULL) {
6959 result = ISC_R_NOMEMORY;
6960 goto cleanup_rdatasets;
6962 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6963 rbtdb->heaps[i] = NULL;
6964 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6965 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6966 result = isc_heap_create(mctx, sooner, set_index, 0,
6968 if (result != ISC_R_SUCCESS)
6973 * Create deadnode lists.
6975 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6976 sizeof(rbtnodelist_t));
6977 if (rbtdb->deadnodes == NULL) {
6978 result = ISC_R_NOMEMORY;
6981 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6982 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6984 rbtdb->active = rbtdb->node_lock_count;
6986 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
6987 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
6988 if (result == ISC_R_SUCCESS) {
6989 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
6990 if (result != ISC_R_SUCCESS)
6991 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6993 if (result != ISC_R_SUCCESS) {
6995 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
6996 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
6997 isc_refcount_destroy(&rbtdb->node_locks[i].references);
6999 goto cleanup_deadnodes;
7001 rbtdb->node_locks[i].exiting = ISC_FALSE;
7005 * Attach to the mctx. The database will persist so long as there
7006 * are references to it, and attaching to the mctx ensures that our
7007 * mctx won't disappear out from under us.
7009 isc_mem_attach(mctx, &rbtdb->common.mctx);
7012 * Must be initialized before free_rbtdb() is called.
7014 isc_ondestroy_init(&rbtdb->common.ondest);
7017 * Make a copy of the origin name.
7019 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7020 if (result != ISC_R_SUCCESS) {
7021 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7026 * Make the Red-Black Trees.
7028 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7029 if (result != ISC_R_SUCCESS) {
7030 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7034 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7035 if (result != ISC_R_SUCCESS) {
7036 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7041 * In order to set the node callback bit correctly in zone databases,
7042 * we need to know if the node has the origin name of the zone.
7043 * In loading_addrdataset() we could simply compare the new name
7044 * to the origin name, but this is expensive. Also, we don't know the
7045 * node name in addrdataset(), so we need another way of knowing the
7048 * We now explicitly create a node for the zone's origin, and then
7049 * we simply remember the node's address. This is safe, because
7050 * the top-of-zone node can never be deleted, nor can its address
7053 if (!IS_CACHE(rbtdb)) {
7054 rbtdb->origin_node = NULL;
7055 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7056 &rbtdb->origin_node);
7057 if (result != ISC_R_SUCCESS) {
7058 INSIST(result != ISC_R_EXISTS);
7059 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7062 rbtdb->origin_node->nsec3 = 0;
7064 * We need to give the origin node the right locknum.
7066 dns_name_init(&name, NULL);
7067 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7068 #ifdef DNS_RBT_USEHASH
7069 rbtdb->origin_node->locknum =
7070 rbtdb->origin_node->hashval %
7071 rbtdb->node_lock_count;
7073 rbtdb->origin_node->locknum =
7074 dns_name_hash(&name, ISC_TRUE) %
7075 rbtdb->node_lock_count;
7080 * Misc. Initialization.
7082 result = isc_refcount_init(&rbtdb->references, 1);
7083 if (result != ISC_R_SUCCESS) {
7084 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7087 rbtdb->attributes = 0;
7088 rbtdb->overmem = ISC_FALSE;
7092 * Version Initialization.
7094 rbtdb->current_serial = 1;
7095 rbtdb->least_serial = 1;
7096 rbtdb->next_serial = 2;
7097 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7098 if (rbtdb->current_version == NULL) {
7099 isc_refcount_decrement(&rbtdb->references, NULL);
7100 isc_refcount_destroy(&rbtdb->references);
7101 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7102 return (ISC_R_NOMEMORY);
7104 rbtdb->current_version->secure = dns_db_insecure;
7105 rbtdb->current_version->havensec3 = ISC_FALSE;
7106 rbtdb->current_version->flags = 0;
7107 rbtdb->current_version->iterations = 0;
7108 rbtdb->current_version->hash = 0;
7109 rbtdb->current_version->salt_length = 0;
7110 memset(rbtdb->current_version->salt, 0,
7111 sizeof(rbtdb->current_version->salt));
7112 rbtdb->future_version = NULL;
7113 ISC_LIST_INIT(rbtdb->open_versions);
7115 * Keep the current version in the open list so that list operation
7116 * won't happen in normal lookup operations.
7118 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7120 rbtdb->common.magic = DNS_DB_MAGIC;
7121 rbtdb->common.impmagic = RBTDB_MAGIC;
7123 *dbp = (dns_db_t *)rbtdb;
7125 return (ISC_R_SUCCESS);
7128 isc_mem_put(mctx, rbtdb->deadnodes,
7129 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7132 if (rbtdb->heaps != NULL) {
7133 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7134 if (rbtdb->heaps[i] != NULL)
7135 isc_heap_destroy(&rbtdb->heaps[i]);
7136 isc_mem_put(mctx, rbtdb->heaps,
7137 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7141 if (rbtdb->rdatasets != NULL)
7142 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7143 sizeof(rdatasetheaderlist_t));
7145 if (rbtdb->rrsetstats != NULL)
7146 dns_stats_detach(&rbtdb->rrsetstats);
7149 isc_mem_put(mctx, rbtdb->node_locks,
7150 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7153 isc_rwlock_destroy(&rbtdb->tree_lock);
7156 RBTDB_DESTROYLOCK(&rbtdb->lock);
7159 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7165 * Slabbed Rdataset Methods
7169 rdataset_disassociate(dns_rdataset_t *rdataset) {
7170 dns_db_t *db = rdataset->private1;
7171 dns_dbnode_t *node = rdataset->private2;
7173 detachnode(db, &node);
7177 rdataset_first(dns_rdataset_t *rdataset) {
7178 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7181 count = raw[0] * 256 + raw[1];
7183 rdataset->private5 = NULL;
7184 return (ISC_R_NOMORE);
7187 #if DNS_RDATASET_FIXED
7188 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7189 raw += 2 + (4 * count);
7195 * The privateuint4 field is the number of rdata beyond the
7196 * cursor position, so we decrement the total count by one
7197 * before storing it.
7199 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7200 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7201 * to the first entry in the offset table.
7204 rdataset->privateuint4 = count;
7205 rdataset->private5 = raw;
7207 return (ISC_R_SUCCESS);
7211 rdataset_next(dns_rdataset_t *rdataset) {
7213 unsigned int length;
7214 unsigned char *raw; /* RDATASLAB */
7216 count = rdataset->privateuint4;
7218 return (ISC_R_NOMORE);
7220 rdataset->privateuint4 = count;
7223 * Skip forward one record (length + 4) or one offset (4).
7225 raw = rdataset->private5;
7226 #if DNS_RDATASET_FIXED
7227 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7229 length = raw[0] * 256 + raw[1];
7231 #if DNS_RDATASET_FIXED
7233 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7235 rdataset->private5 = raw + 2; /* length(2) */
7238 return (ISC_R_SUCCESS);
7242 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7243 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7244 #if DNS_RDATASET_FIXED
7245 unsigned int offset;
7247 unsigned int length;
7249 unsigned int flags = 0;
7251 REQUIRE(raw != NULL);
7254 * Find the start of the record if not already in private5
7255 * then skip the length and order fields.
7257 #if DNS_RDATASET_FIXED
7258 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7259 offset = (raw[0] << 24) + (raw[1] << 16) +
7260 (raw[2] << 8) + raw[3];
7261 raw = rdataset->private3;
7265 length = raw[0] * 256 + raw[1];
7266 #if DNS_RDATASET_FIXED
7271 if (rdataset->type == dns_rdatatype_rrsig) {
7272 if (*raw & DNS_RDATASLAB_OFFLINE)
7273 flags |= DNS_RDATA_OFFLINE;
7279 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7280 rdata->flags |= flags;
7284 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7285 dns_db_t *db = source->private1;
7286 dns_dbnode_t *node = source->private2;
7287 dns_dbnode_t *cloned_node = NULL;
7289 attachnode(db, node, &cloned_node);
7293 * Reset iterator state.
7295 target->privateuint4 = 0;
7296 target->private5 = NULL;
7300 rdataset_count(dns_rdataset_t *rdataset) {
7301 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7304 count = raw[0] * 256 + raw[1];
7310 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7311 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7313 dns_db_t *db = rdataset->private1;
7314 dns_dbnode_t *node = rdataset->private2;
7315 dns_dbnode_t *cloned_node;
7316 struct noqname *noqname = rdataset->private6;
7319 attachnode(db, node, &cloned_node);
7320 nsec->methods = &rdataset_methods;
7321 nsec->rdclass = db->rdclass;
7322 nsec->type = noqname->type;
7324 nsec->ttl = rdataset->ttl;
7325 nsec->trust = rdataset->trust;
7326 nsec->private1 = rdataset->private1;
7327 nsec->private2 = rdataset->private2;
7328 nsec->private3 = noqname->neg;
7329 nsec->privateuint4 = 0;
7330 nsec->private5 = NULL;
7331 nsec->private6 = NULL;
7332 nsec->private7 = NULL;
7335 attachnode(db, node, &cloned_node);
7336 nsecsig->methods = &rdataset_methods;
7337 nsecsig->rdclass = db->rdclass;
7338 nsecsig->type = dns_rdatatype_rrsig;
7339 nsecsig->covers = noqname->type;
7340 nsecsig->ttl = rdataset->ttl;
7341 nsecsig->trust = rdataset->trust;
7342 nsecsig->private1 = rdataset->private1;
7343 nsecsig->private2 = rdataset->private2;
7344 nsecsig->private3 = noqname->negsig;
7345 nsecsig->privateuint4 = 0;
7346 nsecsig->private5 = NULL;
7347 nsec->private6 = NULL;
7348 nsec->private7 = NULL;
7350 dns_name_clone(&noqname->name, name);
7352 return (ISC_R_SUCCESS);
7356 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7357 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7359 dns_db_t *db = rdataset->private1;
7360 dns_dbnode_t *node = rdataset->private2;
7361 dns_dbnode_t *cloned_node;
7362 struct noqname *closest = rdataset->private7;
7365 attachnode(db, node, &cloned_node);
7366 nsec->methods = &rdataset_methods;
7367 nsec->rdclass = db->rdclass;
7368 nsec->type = closest->type;
7370 nsec->ttl = rdataset->ttl;
7371 nsec->trust = rdataset->trust;
7372 nsec->private1 = rdataset->private1;
7373 nsec->private2 = rdataset->private2;
7374 nsec->private3 = closest->neg;
7375 nsec->privateuint4 = 0;
7376 nsec->private5 = NULL;
7377 nsec->private6 = NULL;
7378 nsec->private7 = NULL;
7381 attachnode(db, node, &cloned_node);
7382 nsecsig->methods = &rdataset_methods;
7383 nsecsig->rdclass = db->rdclass;
7384 nsecsig->type = dns_rdatatype_rrsig;
7385 nsecsig->covers = closest->type;
7386 nsecsig->ttl = rdataset->ttl;
7387 nsecsig->trust = rdataset->trust;
7388 nsecsig->private1 = rdataset->private1;
7389 nsecsig->private2 = rdataset->private2;
7390 nsecsig->private3 = closest->negsig;
7391 nsecsig->privateuint4 = 0;
7392 nsecsig->private5 = NULL;
7393 nsec->private6 = NULL;
7394 nsec->private7 = NULL;
7396 dns_name_clone(&closest->name, name);
7398 return (ISC_R_SUCCESS);
7402 * Rdataset Iterator Methods
7406 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7407 rbtdb_rdatasetiter_t *rbtiterator;
7409 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7411 if (rbtiterator->common.version != NULL)
7412 closeversion(rbtiterator->common.db,
7413 &rbtiterator->common.version, ISC_FALSE);
7414 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7415 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7416 sizeof(*rbtiterator));
7422 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7423 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7424 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7425 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7426 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7427 rdatasetheader_t *header, *top_next;
7428 rbtdb_serial_t serial;
7431 if (IS_CACHE(rbtdb)) {
7433 now = rbtiterator->common.now;
7435 serial = rbtversion->serial;
7439 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7440 isc_rwlocktype_read);
7442 for (header = rbtnode->data; header != NULL; header = top_next) {
7443 top_next = header->next;
7445 if (header->serial <= serial && !IGNORE(header)) {
7447 * Is this a "this rdataset doesn't exist"
7448 * record? Or is it too old in the cache?
7450 * Note: unlike everywhere else, we
7451 * check for now > header->rdh_ttl instead
7452 * of now >= header->rdh_ttl. This allows
7453 * ANY and RRSIG queries for 0 TTL
7454 * rdatasets to work.
7456 if (NONEXISTENT(header) ||
7457 (now != 0 && now > header->rdh_ttl))
7461 header = header->down;
7462 } while (header != NULL);
7467 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7468 isc_rwlocktype_read);
7470 rbtiterator->current = header;
7473 return (ISC_R_NOMORE);
7475 return (ISC_R_SUCCESS);
7479 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7480 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7481 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7482 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7483 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7484 rdatasetheader_t *header, *top_next;
7485 rbtdb_serial_t serial;
7487 rbtdb_rdatatype_t type, negtype;
7488 dns_rdatatype_t rdtype, covers;
7490 header = rbtiterator->current;
7492 return (ISC_R_NOMORE);
7494 if (IS_CACHE(rbtdb)) {
7496 now = rbtiterator->common.now;
7498 serial = rbtversion->serial;
7502 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7503 isc_rwlocktype_read);
7505 type = header->type;
7506 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7508 covers = RBTDB_RDATATYPE_EXT(header->type);
7509 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7511 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7512 for (header = header->next; header != NULL; header = top_next) {
7513 top_next = header->next;
7515 * If not walking back up the down list.
7517 if (header->type != type && header->type != negtype) {
7519 if (header->serial <= serial &&
7522 * Is this a "this rdataset doesn't
7525 * Note: unlike everywhere else, we
7526 * check for now > header->ttl instead
7527 * of now >= header->ttl. This allows
7528 * ANY and RRSIG queries for 0 TTL
7529 * rdatasets to work.
7531 if ((header->attributes &
7532 RDATASET_ATTR_NONEXISTENT) != 0 ||
7533 (now != 0 && now > header->rdh_ttl))
7537 header = header->down;
7538 } while (header != NULL);
7544 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7545 isc_rwlocktype_read);
7547 rbtiterator->current = header;
7550 return (ISC_R_NOMORE);
7552 return (ISC_R_SUCCESS);
7556 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7557 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7558 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7559 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7560 rdatasetheader_t *header;
7562 header = rbtiterator->current;
7563 REQUIRE(header != NULL);
7565 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7566 isc_rwlocktype_read);
7568 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7571 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7572 isc_rwlocktype_read);
7577 * Database Iterator Methods
7581 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7582 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7583 dns_rbtnode_t *node = rbtdbiter->node;
7588 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7589 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7593 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7594 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7595 dns_rbtnode_t *node = rbtdbiter->node;
7601 lock = &rbtdb->node_locks[node->locknum].lock;
7602 NODE_LOCK(lock, isc_rwlocktype_read);
7603 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7604 rbtdbiter->tree_locked, ISC_FALSE);
7605 NODE_UNLOCK(lock, isc_rwlocktype_read);
7607 rbtdbiter->node = NULL;
7611 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7612 dns_rbtnode_t *node;
7613 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7614 isc_boolean_t was_read_locked = ISC_FALSE;
7618 if (rbtdbiter->delete != 0) {
7620 * Note that "%d node of %d in tree" can report things like
7621 * "flush_deletions: 59 nodes of 41 in tree". This means
7622 * That some nodes appear on the deletions list more than
7623 * once. Only the last occurence will actually be deleted.
7625 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7626 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7627 "flush_deletions: %d nodes of %d in tree",
7629 dns_rbt_nodecount(rbtdb->tree));
7631 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7632 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7633 was_read_locked = ISC_TRUE;
7635 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7636 rbtdbiter->tree_locked = isc_rwlocktype_write;
7638 for (i = 0; i < rbtdbiter->delete; i++) {
7639 node = rbtdbiter->deletions[i];
7640 lock = &rbtdb->node_locks[node->locknum].lock;
7642 NODE_LOCK(lock, isc_rwlocktype_read);
7643 decrement_reference(rbtdb, node, 0,
7644 isc_rwlocktype_read,
7645 rbtdbiter->tree_locked, ISC_FALSE);
7646 NODE_UNLOCK(lock, isc_rwlocktype_read);
7649 rbtdbiter->delete = 0;
7651 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7652 if (was_read_locked) {
7653 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7654 rbtdbiter->tree_locked = isc_rwlocktype_read;
7657 rbtdbiter->tree_locked = isc_rwlocktype_none;
7663 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7664 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7666 REQUIRE(rbtdbiter->paused);
7667 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7669 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7670 rbtdbiter->tree_locked = isc_rwlocktype_read;
7672 rbtdbiter->paused = ISC_FALSE;
7676 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7677 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7678 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7679 dns_db_t *db = NULL;
7681 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7682 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7683 rbtdbiter->tree_locked = isc_rwlocktype_none;
7685 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7687 dereference_iter_node(rbtdbiter);
7689 flush_deletions(rbtdbiter);
7691 dns_db_attach(rbtdbiter->common.db, &db);
7692 dns_db_detach(&rbtdbiter->common.db);
7694 dns_rbtnodechain_reset(&rbtdbiter->chain);
7695 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7696 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7703 dbiterator_first(dns_dbiterator_t *iterator) {
7704 isc_result_t result;
7705 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7706 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7707 dns_name_t *name, *origin;
7709 if (rbtdbiter->result != ISC_R_SUCCESS &&
7710 rbtdbiter->result != ISC_R_NOMORE)
7711 return (rbtdbiter->result);
7713 if (rbtdbiter->paused)
7714 resume_iteration(rbtdbiter);
7716 dereference_iter_node(rbtdbiter);
7718 name = dns_fixedname_name(&rbtdbiter->name);
7719 origin = dns_fixedname_name(&rbtdbiter->origin);
7720 dns_rbtnodechain_reset(&rbtdbiter->chain);
7721 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7723 if (rbtdbiter->nsec3only) {
7724 rbtdbiter->current = &rbtdbiter->nsec3chain;
7725 result = dns_rbtnodechain_first(rbtdbiter->current,
7726 rbtdb->nsec3, name, origin);
7728 rbtdbiter->current = &rbtdbiter->chain;
7729 result = dns_rbtnodechain_first(rbtdbiter->current,
7730 rbtdb->tree, name, origin);
7731 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7732 rbtdbiter->current = &rbtdbiter->nsec3chain;
7733 result = dns_rbtnodechain_first(rbtdbiter->current,
7738 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7739 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7740 NULL, &rbtdbiter->node);
7741 if (result == ISC_R_SUCCESS) {
7742 rbtdbiter->new_origin = ISC_TRUE;
7743 reference_iter_node(rbtdbiter);
7746 INSIST(result == ISC_R_NOTFOUND);
7747 result = ISC_R_NOMORE; /* The tree is empty. */
7750 rbtdbiter->result = result;
7756 dbiterator_last(dns_dbiterator_t *iterator) {
7757 isc_result_t result;
7758 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7759 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7760 dns_name_t *name, *origin;
7762 if (rbtdbiter->result != ISC_R_SUCCESS &&
7763 rbtdbiter->result != ISC_R_NOMORE)
7764 return (rbtdbiter->result);
7766 if (rbtdbiter->paused)
7767 resume_iteration(rbtdbiter);
7769 dereference_iter_node(rbtdbiter);
7771 name = dns_fixedname_name(&rbtdbiter->name);
7772 origin = dns_fixedname_name(&rbtdbiter->origin);
7773 dns_rbtnodechain_reset(&rbtdbiter->chain);
7774 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7776 result = ISC_R_NOTFOUND;
7777 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7778 rbtdbiter->current = &rbtdbiter->nsec3chain;
7779 result = dns_rbtnodechain_last(rbtdbiter->current,
7780 rbtdb->nsec3, name, origin);
7782 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7783 rbtdbiter->current = &rbtdbiter->chain;
7784 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7787 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7788 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7789 NULL, &rbtdbiter->node);
7790 if (result == ISC_R_SUCCESS) {
7791 rbtdbiter->new_origin = ISC_TRUE;
7792 reference_iter_node(rbtdbiter);
7795 INSIST(result == ISC_R_NOTFOUND);
7796 result = ISC_R_NOMORE; /* The tree is empty. */
7799 rbtdbiter->result = result;
7805 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7806 isc_result_t result;
7807 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7808 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7809 dns_name_t *iname, *origin;
7811 if (rbtdbiter->result != ISC_R_SUCCESS &&
7812 rbtdbiter->result != ISC_R_NOTFOUND &&
7813 rbtdbiter->result != ISC_R_NOMORE)
7814 return (rbtdbiter->result);
7816 if (rbtdbiter->paused)
7817 resume_iteration(rbtdbiter);
7819 dereference_iter_node(rbtdbiter);
7821 iname = dns_fixedname_name(&rbtdbiter->name);
7822 origin = dns_fixedname_name(&rbtdbiter->origin);
7823 dns_rbtnodechain_reset(&rbtdbiter->chain);
7824 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7826 if (rbtdbiter->nsec3only) {
7827 rbtdbiter->current = &rbtdbiter->nsec3chain;
7828 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7831 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7832 } else if (rbtdbiter->nonsec3) {
7833 rbtdbiter->current = &rbtdbiter->chain;
7834 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7837 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7840 * Stay on main chain if not found on either chain.
7842 rbtdbiter->current = &rbtdbiter->chain;
7843 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7846 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7847 if (result == DNS_R_PARTIALMATCH) {
7848 dns_rbtnode_t *node = NULL;
7849 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7850 &node, &rbtdbiter->nsec3chain,
7851 DNS_RBTFIND_EMPTYDATA,
7853 if (result == ISC_R_SUCCESS) {
7854 rbtdbiter->node = node;
7855 rbtdbiter->current = &rbtdbiter->nsec3chain;
7861 if (result == ISC_R_SUCCESS) {
7862 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7864 if (result == ISC_R_SUCCESS) {
7865 rbtdbiter->new_origin = ISC_TRUE;
7866 reference_iter_node(rbtdbiter);
7868 } else if (result == DNS_R_PARTIALMATCH) {
7869 result = ISC_R_NOTFOUND;
7870 rbtdbiter->node = NULL;
7873 rbtdbiter->result = result;
7875 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7876 isc_result_t tresult;
7877 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7879 if (tresult == ISC_R_SUCCESS) {
7880 rbtdbiter->new_origin = ISC_TRUE;
7881 reference_iter_node(rbtdbiter);
7884 rbtdbiter->node = NULL;
7887 rbtdbiter->node = NULL;
7889 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7890 ISC_R_SUCCESS : result;
7897 dbiterator_prev(dns_dbiterator_t *iterator) {
7898 isc_result_t result;
7899 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7900 dns_name_t *name, *origin;
7901 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7903 REQUIRE(rbtdbiter->node != NULL);
7905 if (rbtdbiter->result != ISC_R_SUCCESS)
7906 return (rbtdbiter->result);
7908 if (rbtdbiter->paused)
7909 resume_iteration(rbtdbiter);
7911 name = dns_fixedname_name(&rbtdbiter->name);
7912 origin = dns_fixedname_name(&rbtdbiter->origin);
7913 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7914 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7915 !rbtdbiter->nonsec3 &&
7916 &rbtdbiter->nsec3chain == rbtdbiter->current) {
7917 rbtdbiter->current = &rbtdbiter->chain;
7918 dns_rbtnodechain_reset(rbtdbiter->current);
7919 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7921 if (result == ISC_R_NOTFOUND)
7922 result = ISC_R_NOMORE;
7925 dereference_iter_node(rbtdbiter);
7927 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7928 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7929 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7930 NULL, &rbtdbiter->node);
7933 if (result == ISC_R_SUCCESS)
7934 reference_iter_node(rbtdbiter);
7936 rbtdbiter->result = result;
7942 dbiterator_next(dns_dbiterator_t *iterator) {
7943 isc_result_t result;
7944 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7945 dns_name_t *name, *origin;
7946 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7948 REQUIRE(rbtdbiter->node != NULL);
7950 if (rbtdbiter->result != ISC_R_SUCCESS)
7951 return (rbtdbiter->result);
7953 if (rbtdbiter->paused)
7954 resume_iteration(rbtdbiter);
7956 name = dns_fixedname_name(&rbtdbiter->name);
7957 origin = dns_fixedname_name(&rbtdbiter->origin);
7958 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
7959 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7960 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
7961 rbtdbiter->current = &rbtdbiter->nsec3chain;
7962 dns_rbtnodechain_reset(rbtdbiter->current);
7963 result = dns_rbtnodechain_first(rbtdbiter->current,
7964 rbtdb->nsec3, name, origin);
7965 if (result == ISC_R_NOTFOUND)
7966 result = ISC_R_NOMORE;
7969 dereference_iter_node(rbtdbiter);
7971 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7972 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7973 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7974 NULL, &rbtdbiter->node);
7976 if (result == ISC_R_SUCCESS)
7977 reference_iter_node(rbtdbiter);
7979 rbtdbiter->result = result;
7985 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
7988 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7989 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7990 dns_rbtnode_t *node = rbtdbiter->node;
7991 isc_result_t result;
7992 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
7993 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
7995 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
7996 REQUIRE(rbtdbiter->node != NULL);
7998 if (rbtdbiter->paused)
7999 resume_iteration(rbtdbiter);
8002 if (rbtdbiter->common.relative_names)
8004 result = dns_name_concatenate(nodename, origin, name, NULL);
8005 if (result != ISC_R_SUCCESS)
8007 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8008 result = DNS_R_NEWORIGIN;
8010 result = ISC_R_SUCCESS;
8012 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8013 new_reference(rbtdb, node);
8014 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8016 *nodep = rbtdbiter->node;
8018 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8019 isc_result_t expire_result;
8022 * If the deletion array is full, flush it before trying
8023 * to expire the current node. The current node can't
8024 * fully deleted while the iteration cursor is still on it.
8026 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8027 flush_deletions(rbtdbiter);
8029 expire_result = expirenode(iterator->db, *nodep, 0);
8032 * expirenode() currently always returns success.
8034 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8037 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8038 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8039 dns_rbtnode_refincrement(node, &refs);
8041 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8049 dbiterator_pause(dns_dbiterator_t *iterator) {
8050 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8051 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8053 if (rbtdbiter->result != ISC_R_SUCCESS &&
8054 rbtdbiter->result != ISC_R_NOMORE)
8055 return (rbtdbiter->result);
8057 if (rbtdbiter->paused)
8058 return (ISC_R_SUCCESS);
8060 rbtdbiter->paused = ISC_TRUE;
8062 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8063 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8064 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8065 rbtdbiter->tree_locked = isc_rwlocktype_none;
8068 flush_deletions(rbtdbiter);
8070 return (ISC_R_SUCCESS);
8074 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8075 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8076 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8078 if (rbtdbiter->result != ISC_R_SUCCESS)
8079 return (rbtdbiter->result);
8081 return (dns_name_copy(origin, name, NULL));
8085 * Additional cache routines.
8088 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8089 dns_rdatatype_t qtype, dns_acache_t *acache,
8090 dns_zone_t **zonep, dns_db_t **dbp,
8091 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8092 dns_name_t *fname, dns_message_t *msg,
8095 dns_rbtdb_t *rbtdb = rdataset->private1;
8096 dns_rbtnode_t *rbtnode = rdataset->private2;
8097 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8098 unsigned int current_count = rdataset->privateuint4;
8100 rdatasetheader_t *header;
8101 nodelock_t *nodelock;
8102 unsigned int total_count;
8103 acachectl_t *acarray;
8104 dns_acacheentry_t *entry;
8105 isc_result_t result;
8107 UNUSED(qtype); /* we do not use this value at least for now */
8110 header = (struct rdatasetheader *)(raw - sizeof(*header));
8112 total_count = raw[0] * 256 + raw[1];
8113 INSIST(total_count > current_count);
8114 count = total_count - current_count - 1;
8118 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8119 NODE_LOCK(nodelock, isc_rwlocktype_read);
8122 case dns_rdatasetadditional_fromauth:
8123 acarray = header->additional_auth;
8125 case dns_rdatasetadditional_fromcache:
8128 case dns_rdatasetadditional_fromglue:
8129 acarray = header->additional_glue;
8135 if (acarray == NULL) {
8136 if (type != dns_rdatasetadditional_fromcache)
8137 dns_acache_countquerymiss(acache);
8138 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8139 return (ISC_R_NOTFOUND);
8142 if (acarray[count].entry == NULL) {
8143 dns_acache_countquerymiss(acache);
8144 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8145 return (ISC_R_NOTFOUND);
8149 dns_acache_attachentry(acarray[count].entry, &entry);
8151 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8153 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8154 nodep, fname, msg, now);
8156 dns_acache_detachentry(&entry);
8162 acache_callback(dns_acacheentry_t *entry, void **arg) {
8164 dns_rbtnode_t *rbtnode;
8165 nodelock_t *nodelock;
8166 acachectl_t *acarray = NULL;
8167 acache_cbarg_t *cbarg;
8170 REQUIRE(arg != NULL);
8174 * The caller must hold the entry lock.
8177 rbtdb = (dns_rbtdb_t *)cbarg->db;
8178 rbtnode = (dns_rbtnode_t *)cbarg->node;
8180 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8181 NODE_LOCK(nodelock, isc_rwlocktype_write);
8183 switch (cbarg->type) {
8184 case dns_rdatasetadditional_fromauth:
8185 acarray = cbarg->header->additional_auth;
8187 case dns_rdatasetadditional_fromglue:
8188 acarray = cbarg->header->additional_glue;
8194 count = cbarg->count;
8195 if (acarray != NULL && acarray[count].entry == entry) {
8196 acarray[count].entry = NULL;
8197 INSIST(acarray[count].cbarg == cbarg);
8198 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8199 acarray[count].cbarg = NULL;
8201 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8203 dns_acache_detachentry(&entry);
8205 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8207 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8208 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8214 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8215 acache_cbarg_t **cbargp)
8217 acache_cbarg_t *cbarg;
8219 REQUIRE(mctx != NULL);
8220 REQUIRE(entry != NULL);
8221 REQUIRE(cbargp != NULL && *cbargp != NULL);
8225 dns_acache_cancelentry(entry);
8226 dns_db_detachnode(cbarg->db, &cbarg->node);
8227 dns_db_detach(&cbarg->db);
8229 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8235 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8236 dns_rdatatype_t qtype, dns_acache_t *acache,
8237 dns_zone_t *zone, dns_db_t *db,
8238 dns_dbversion_t *version, dns_dbnode_t *node,
8241 dns_rbtdb_t *rbtdb = rdataset->private1;
8242 dns_rbtnode_t *rbtnode = rdataset->private2;
8243 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8244 unsigned int current_count = rdataset->privateuint4;
8245 rdatasetheader_t *header;
8246 unsigned int total_count, count;
8247 nodelock_t *nodelock;
8248 isc_result_t result;
8249 acachectl_t *acarray;
8250 dns_acacheentry_t *newentry, *oldentry = NULL;
8251 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8255 if (type == dns_rdatasetadditional_fromcache)
8256 return (ISC_R_SUCCESS);
8258 header = (struct rdatasetheader *)(raw - sizeof(*header));
8260 total_count = raw[0] * 256 + raw[1];
8261 INSIST(total_count > current_count);
8262 count = total_count - current_count - 1; /* should be private data */
8264 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8265 if (newcbarg == NULL)
8266 return (ISC_R_NOMEMORY);
8267 newcbarg->type = type;
8268 newcbarg->count = count;
8269 newcbarg->header = header;
8270 newcbarg->db = NULL;
8271 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8272 newcbarg->node = NULL;
8273 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8276 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8277 acache_callback, newcbarg, &newentry);
8278 if (result != ISC_R_SUCCESS)
8280 /* Set cache data in the new entry. */
8281 result = dns_acache_setentry(acache, newentry, zone, db,
8282 version, node, fname);
8283 if (result != ISC_R_SUCCESS)
8286 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8287 NODE_LOCK(nodelock, isc_rwlocktype_write);
8291 case dns_rdatasetadditional_fromauth:
8292 acarray = header->additional_auth;
8294 case dns_rdatasetadditional_fromglue:
8295 acarray = header->additional_glue;
8301 if (acarray == NULL) {
8304 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8305 sizeof(acachectl_t));
8307 if (acarray == NULL) {
8308 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8312 for (i = 0; i < total_count; i++) {
8313 acarray[i].entry = NULL;
8314 acarray[i].cbarg = NULL;
8318 case dns_rdatasetadditional_fromauth:
8319 header->additional_auth = acarray;
8321 case dns_rdatasetadditional_fromglue:
8322 header->additional_glue = acarray;
8328 if (acarray[count].entry != NULL) {
8330 * Swap the entry. Delay cleaning-up the old entry since
8331 * it would require a node lock.
8333 oldentry = acarray[count].entry;
8334 INSIST(acarray[count].cbarg != NULL);
8335 oldcbarg = acarray[count].cbarg;
8337 acarray[count].entry = newentry;
8338 acarray[count].cbarg = newcbarg;
8340 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8342 if (oldentry != NULL) {
8343 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8344 dns_acache_detachentry(&oldentry);
8347 return (ISC_R_SUCCESS);
8350 if (newcbarg != NULL) {
8351 if (newentry != NULL) {
8352 acache_cancelentry(rbtdb->common.mctx, newentry,
8354 dns_acache_detachentry(&newentry);
8356 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8357 dns_db_detach(&newcbarg->db);
8358 isc_mem_put(rbtdb->common.mctx, newcbarg,
8367 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8368 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8370 dns_rbtdb_t *rbtdb = rdataset->private1;
8371 dns_rbtnode_t *rbtnode = rdataset->private2;
8372 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8373 unsigned int current_count = rdataset->privateuint4;
8374 rdatasetheader_t *header;
8375 nodelock_t *nodelock;
8376 unsigned int total_count, count;
8377 acachectl_t *acarray;
8378 dns_acacheentry_t *entry;
8379 acache_cbarg_t *cbarg;
8381 UNUSED(qtype); /* we do not use this value at least for now */
8384 if (type == dns_rdatasetadditional_fromcache)
8385 return (ISC_R_SUCCESS);
8387 header = (struct rdatasetheader *)(raw - sizeof(*header));
8389 total_count = raw[0] * 256 + raw[1];
8390 INSIST(total_count > current_count);
8391 count = total_count - current_count - 1;
8396 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8397 NODE_LOCK(nodelock, isc_rwlocktype_write);
8400 case dns_rdatasetadditional_fromauth:
8401 acarray = header->additional_auth;
8403 case dns_rdatasetadditional_fromglue:
8404 acarray = header->additional_glue;
8410 if (acarray == NULL) {
8411 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8412 return (ISC_R_NOTFOUND);
8415 entry = acarray[count].entry;
8416 if (entry == NULL) {
8417 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8418 return (ISC_R_NOTFOUND);
8421 acarray[count].entry = NULL;
8422 cbarg = acarray[count].cbarg;
8423 acarray[count].cbarg = NULL;
8425 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8427 if (entry != NULL) {
8429 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8430 dns_acache_detachentry(&entry);
8433 return (ISC_R_SUCCESS);
8437 * Routines for LRU-based cache management.
8441 * See if a given cache entry that is being reused needs to be updated
8442 * in the LRU-list. From the LRU management point of view, this function is
8443 * expected to return true for almost all cases. When used with threads,
8444 * however, this may cause a non-negligible performance penalty because a
8445 * writer lock will have to be acquired before updating the list.
8446 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8447 * function returns true if the entry has not been updated for some period of
8448 * time. We differentiate the NS or glue address case and the others since
8449 * experiments have shown that the former tends to be accessed relatively
8450 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8451 * may cause external queries at a higher level zone, involving more
8454 * Caller must hold the node (read or write) lock.
8456 static inline isc_boolean_t
8457 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8458 if ((header->attributes &
8459 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8462 #if DNS_RBTDB_LIMITLRUUPDATE
8463 if (header->type == dns_rdatatype_ns ||
8464 (header->trust == dns_trust_glue &&
8465 (header->type == dns_rdatatype_a ||
8466 header->type == dns_rdatatype_aaaa))) {
8468 * Glue records are updated if at least 60 seconds have passed
8469 * since the previous update time.
8471 return (header->last_used + 60 <= now);
8474 /* Other records are updated if 5 minutes have passed. */
8475 return (header->last_used + 300 <= now);
8484 * Update the timestamp of a given cache entry and move it to the head
8485 * of the corresponding LRU list.
8487 * Caller must hold the node (write) lock.
8489 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8492 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8495 INSIST(IS_CACHE(rbtdb));
8497 /* To be checked: can we really assume this? XXXMLG */
8498 INSIST(ISC_LINK_LINKED(header, lru_link));
8500 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum],
8502 header->last_used = now;
8503 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum],
8508 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8509 * under an overmem condition. To recover from this condition quickly, up to
8510 * 2 entries will be purged. This process is triggered while adding a new
8511 * entry, and we specifically avoid purging entries in the same LRU bucket as
8512 * the one to which the new entry will belong. Otherwise, we might purge
8513 * entries of the same name of different RR types while adding RRsets from a
8514 * single response (consider the case where we're adding A and AAAA glue records
8515 * of the same NS name).
8518 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8519 isc_stdtime_t now, isc_boolean_t tree_locked)
8521 rdatasetheader_t *header, *header_prev;
8522 unsigned int locknum;
8525 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8526 locknum != locknum_start && purgecount > 0;
8527 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8528 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8529 isc_rwlocktype_write);
8531 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8532 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8533 expire_header(rbtdb, header, tree_locked);
8537 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8538 header != NULL && purgecount > 0;
8539 header = header_prev) {
8540 header_prev = ISC_LIST_PREV(header, lru_link);
8542 * Unlink the entry at this point to avoid checking it
8543 * again even if it's currently used someone else and
8544 * cannot be purged at this moment. This entry won't be
8545 * referenced any more (so unlinking is safe) since the
8546 * TTL was reset to 0.
8548 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8550 expire_header(rbtdb, header, tree_locked);
8554 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8555 isc_rwlocktype_write);
8560 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8561 isc_boolean_t tree_locked)
8563 set_ttl(rbtdb, header, 0);
8564 header->attributes |= RDATASET_ATTR_STALE;
8565 header->node->dirty = 1;
8568 * Caller must hold the node (write) lock.
8571 if (dns_rbtnode_refcurrent(header->node) == 0) {
8573 * If no one else is using the node, we can clean it up now.
8574 * We first need to gain a new reference to the node to meet a
8575 * requirement of decrement_reference().
8577 new_reference(rbtdb, header->node);
8578 decrement_reference(rbtdb, header->node, 0,
8579 isc_rwlocktype_write,
8580 tree_locked ? isc_rwlocktype_write :
8581 isc_rwlocktype_none, ISC_FALSE);