2 * Copyright (C) 2004-2011 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.270.12.32 2011-06-09 00:16:35 each Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
65 #include <dns/zonekey.h>
67 #ifdef DNS_RBTDB_VERSION64
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
80 * Note that "impmagic" is not the first four bytes of the struct, so
81 * ISC_MAGIC_VALID cannot be used.
83 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
84 (rbtdb)->common.impmagic == RBTDB_MAGIC)
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t rbtdb_serial_t;
89 * Make casting easier in symbolic debuggers by using different names
90 * for the 64 bit version.
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
96 typedef isc_uint32_t rbtdb_serial_t;
99 typedef isc_uint32_t rbtdb_rdatatype_t;
101 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
105 #define RBTDB_RDATATYPE_SIGNSEC \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
119 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120 * Using rwlock is effective with regard to lookup performance only when
121 * it is implemented in an efficient way.
122 * Otherwise, it is generally wise to stick to the simple locking since rwlock
123 * would require more memory or can even make lookups slower due to its own
124 * overhead (when it internally calls mutex locks).
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
129 #define DNS_RBTDB_USERWLOCK 0
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
138 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t) LOCK(l)
141 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
145 * Since node locking is sensitive to both performance and memory footprint,
146 * we need some trick here. If we have both high-performance rwlock and
147 * high performance and small-memory reference counters, we use rwlock for
148 * node lock and isc_refcount for node references. In this case, we don't have
149 * to protect the access to the counters by locks.
150 * Otherwise, we simply use ordinary mutex lock for node locking, and use
151 * simple integers as reference counters which is protected by the lock.
152 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153 * NODE_UNLOCK. In some other cases, however, we need to protect reference
154 * counters first and then protect other parts of a node as read-only data.
155 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156 * provided for these special cases. When we can use the efficient backend
157 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159 * section including the access to the reference counter.
160 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161 * section is also protected by NODE_STRONGLOCK().
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
166 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t) RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
172 #define NODE_STRONGLOCK(l) ((void)0)
173 #define NODE_STRONGUNLOCK(l) ((void)0)
174 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
178 typedef isc_mutex_t nodelock_t;
180 #define NODE_INITLOCK(l) isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
182 #define NODE_LOCK(l, t) LOCK(l)
183 #define NODE_UNLOCK(l, t) UNLOCK(l)
184 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
186 #define NODE_STRONGLOCK(l) LOCK(l)
187 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t) ((void)0)
189 #define NODE_WEAKUNLOCK(l, t) ((void)0)
190 #define NODE_WEAKDOWNGRADE(l) ((void)0)
194 * Whether to rate-limit updating the LRU to avoid possible thread contention.
195 * Our performance measurement has shown the cost is marginal, so it's defined
196 * to be 0 by default either with or without threads.
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 * Allow clients with a virtual time of up to 5 minutes in the past to see
204 * records that would have otherwise have expired.
206 #define RBTDB_VIRTUAL 300
212 dns_rdatatype_t type;
215 typedef struct acachectl acachectl_t;
217 typedef struct rdatasetheader {
219 * Locked by the owning node's lock.
221 rbtdb_serial_t serial;
223 rbtdb_rdatatype_t type;
224 isc_uint16_t attributes;
226 struct noqname *noqname;
227 struct noqname *closest;
229 * We don't use the LIST macros, because the LIST structure has
230 * both head and tail pointers, and is doubly linked.
233 struct rdatasetheader *next;
235 * If this is the top header for an rdataset, 'next' points
236 * to the top header for the next rdataset (i.e., the next type).
237 * Otherwise, it points up to the header whose down pointer points
241 struct rdatasetheader *down;
243 * Points to the header for the next older version of
249 * Monotonously increased every time this rdataset is bound so that
250 * it is used as the base of the starting point in DNS responses
251 * when the "cyclic" rrset-order is required. Since the ordering
252 * should not be so crucial, no lock is set for the counter for
253 * performance reasons.
256 acachectl_t *additional_auth;
257 acachectl_t *additional_glue;
260 isc_stdtime_t last_used;
261 ISC_LINK(struct rdatasetheader) link;
263 unsigned int heap_index;
265 * Used for TTL-based cache cleaning.
267 isc_stdtime_t resign;
270 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
273 #define RDATASET_ATTR_NONEXISTENT 0x0001
274 #define RDATASET_ATTR_STALE 0x0002
275 #define RDATASET_ATTR_IGNORE 0x0004
276 #define RDATASET_ATTR_RETAIN 0x0008
277 #define RDATASET_ATTR_NXDOMAIN 0x0010
278 #define RDATASET_ATTR_RESIGN 0x0020
279 #define RDATASET_ATTR_STATCOUNT 0x0040
280 #define RDATASET_ATTR_OPTOUT 0x0080
281 #define RDATASET_ATTR_NEGATIVE 0x0100
283 typedef struct acache_cbarg {
284 dns_rdatasetadditional_t type;
288 rdatasetheader_t *header;
292 dns_acacheentry_t *entry;
293 acache_cbarg_t *cbarg;
298 * When the cache will pre-expire data (due to memory low or other
299 * situations) before the rdataset's TTL has expired, it MUST
300 * respect the RETAIN bit and not expire the data until its TTL is
304 #undef IGNORE /* WIN32 winbase.h defines this. */
306 #define EXISTS(header) \
307 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
308 #define NONEXISTENT(header) \
309 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
310 #define IGNORE(header) \
311 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
312 #define RETAIN(header) \
313 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
314 #define NXDOMAIN(header) \
315 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
316 #define RESIGN(header) \
317 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
318 #define OPTOUT(header) \
319 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define NEGATIVE(header) \
321 (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
323 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
326 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
327 * There is a tradeoff issue about configuring this value: if this is too
328 * small, it may cause heavier contention between threads; if this is too large,
329 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
330 * The default value should work well for most environments, but this can
331 * also be configurable at compilation time via the
332 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
333 * 1 due to the assumption of overmem_purge().
335 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
336 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
337 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
342 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
343 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
347 /* Protected in the refcount routines. */
348 isc_refcount_t references;
349 /* Locked by lock. */
350 isc_boolean_t exiting;
353 typedef struct rbtdb_changed {
354 dns_rbtnode_t * node;
356 ISC_LINK(struct rbtdb_changed) link;
359 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
367 typedef struct rbtdb_version {
369 rbtdb_serial_t serial;
371 * Protected in the refcount routines.
372 * XXXJT: should we change the lock policy based on the refcount
375 isc_refcount_t references;
376 /* Locked by database lock. */
377 isc_boolean_t writer;
378 isc_boolean_t commit_ok;
379 rbtdb_changedlist_t changed_list;
380 rdatasetheaderlist_t resigned_list;
381 ISC_LINK(struct rbtdb_version) link;
382 dns_db_secure_t secure;
383 isc_boolean_t havensec3;
384 /* NSEC3 parameters */
387 isc_uint16_t iterations;
388 isc_uint8_t salt_length;
389 unsigned char salt[DNS_NSEC3_SALTSIZE];
392 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
397 /* Locks the data in this struct */
398 #if DNS_RBTDB_USERWLOCK
403 /* Locks the tree structure (prevents nodes appearing/disappearing) */
404 isc_rwlock_t tree_lock;
405 /* Locks for individual tree nodes */
406 unsigned int node_lock_count;
407 rbtdb_nodelock_t * node_locks;
408 dns_rbtnode_t * origin_node;
409 dns_stats_t * rrsetstats; /* cache DB only */
410 /* Locked by lock. */
412 isc_refcount_t references;
413 unsigned int attributes;
414 rbtdb_serial_t current_serial;
415 rbtdb_serial_t least_serial;
416 rbtdb_serial_t next_serial;
417 rbtdb_version_t * current_version;
418 rbtdb_version_t * future_version;
419 rbtdb_versionlist_t open_versions;
421 dns_dbnode_t *soanode;
422 dns_dbnode_t *nsnode;
425 * This is a linked list used to implement the LRU cache. There will
426 * be node_lock_count linked lists here. Nodes in bucket 1 will be
427 * placed on the linked list rdatasets[1].
429 rdatasetheaderlist_t *rdatasets;
432 * Temporary storage for stale cache nodes and dynamically deleted
433 * nodes that await being cleaned up.
435 rbtnodelist_t *deadnodes;
438 * Heaps. These are used for TTL based expiry in a cache,
439 * or for zone resigning in a zone DB. hmctx is the memory
440 * context to use for the heap (which differs from the main
441 * database memory context in the case of a cache).
446 /* Locked by tree_lock. */
451 unsigned int quantum;
454 #define RBTDB_ATTR_LOADED 0x01
455 #define RBTDB_ATTR_LOADING 0x02
462 rbtdb_version_t * rbtversion;
463 rbtdb_serial_t serial;
464 unsigned int options;
465 dns_rbtnodechain_t chain;
466 isc_boolean_t copy_name;
467 isc_boolean_t need_cleanup;
469 dns_rbtnode_t * zonecut;
470 rdatasetheader_t * zonecut_rdataset;
471 rdatasetheader_t * zonecut_sigrdataset;
472 dns_fixedname_t zonecut_name;
484 static void rdataset_disassociate(dns_rdataset_t *rdataset);
485 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
486 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
487 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
488 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
489 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
490 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
493 dns_rdataset_t *negsig);
494 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
497 dns_rdataset_t *negsig);
498 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
499 dns_rdatasetadditional_t type,
500 dns_rdatatype_t qtype,
501 dns_acache_t *acache,
504 dns_dbversion_t **versionp,
505 dns_dbnode_t **nodep,
509 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
510 dns_rdatasetadditional_t type,
511 dns_rdatatype_t qtype,
512 dns_acache_t *acache,
515 dns_dbversion_t *version,
518 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
519 dns_rdataset_t *rdataset,
520 dns_rdatasetadditional_t type,
521 dns_rdatatype_t qtype);
522 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
524 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
526 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
527 isc_boolean_t tree_locked);
528 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
529 isc_stdtime_t now, isc_boolean_t tree_locked);
530 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
531 rdatasetheader_t *newheader);
532 static void prune_tree(isc_task_t *task, isc_event_t *event);
533 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
534 static void rdataset_expire(dns_rdataset_t *rdataset);
536 static dns_rdatasetmethods_t rdataset_methods = {
537 rdataset_disassociate,
547 rdataset_getadditional,
548 rdataset_setadditional,
549 rdataset_putadditional,
554 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
555 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
556 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
557 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
558 dns_rdataset_t *rdataset);
560 static dns_rdatasetitermethods_t rdatasetiter_methods = {
561 rdatasetiter_destroy,
567 typedef struct rbtdb_rdatasetiter {
568 dns_rdatasetiter_t common;
569 rdatasetheader_t * current;
570 } rbtdb_rdatasetiter_t;
572 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
573 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
574 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
575 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
577 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
578 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
579 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
580 dns_dbnode_t **nodep,
582 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
583 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
586 static dns_dbiteratormethods_t dbiterator_methods = {
598 #define DELETION_BATCH_MAX 64
601 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
603 typedef struct rbtdb_dbiterator {
604 dns_dbiterator_t common;
605 isc_boolean_t paused;
606 isc_boolean_t new_origin;
607 isc_rwlocktype_t tree_locked;
609 dns_fixedname_t name;
610 dns_fixedname_t origin;
611 dns_rbtnodechain_t chain;
612 dns_rbtnodechain_t nsec3chain;
613 dns_rbtnodechain_t *current;
615 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
617 isc_boolean_t nsec3only;
618 isc_boolean_t nonsec3;
619 } rbtdb_dbiterator_t;
622 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
623 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
625 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
627 static void overmem(dns_db_t *db, isc_boolean_t overmem);
628 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
629 isc_boolean_t *nsec3createflag);
632 * 'init_count' is used to initialize 'newheader->count' which inturn
633 * is used to determine where in the cycle rrset-order cyclic starts.
634 * We don't lock this as we don't care about simultaneous updates.
637 * Both init_count and header->count can be ISC_UINT32_MAX.
638 * The count on the returned rdataset however can't be as
639 * that indicates that the database does not implement cyclic
642 static unsigned int init_count;
647 * If a routine is going to lock more than one lock in this module, then
648 * the locking must be done in the following order:
652 * Node Lock (Only one from the set may be locked at one time by
657 * Failure to follow this hierarchy can result in deadlock.
663 * For zone databases the node for the origin of the zone MUST NOT be deleted.
672 attach(dns_db_t *source, dns_db_t **targetp) {
673 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
675 REQUIRE(VALID_RBTDB(rbtdb));
677 isc_refcount_increment(&rbtdb->references, NULL);
683 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
684 dns_rbtdb_t *rbtdb = event->ev_arg;
688 free_rbtdb(rbtdb, ISC_TRUE, event);
692 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
693 isc_boolean_t increment)
695 dns_rdatastatstype_t statattributes = 0;
696 dns_rdatastatstype_t base = 0;
697 dns_rdatastatstype_t type;
699 /* At the moment we count statistics only for cache DB */
700 INSIST(IS_CACHE(rbtdb));
702 if (NEGATIVE(header)) {
703 if (NXDOMAIN(header))
704 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
706 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
707 base = RBTDB_RDATATYPE_EXT(header->type);
710 base = RBTDB_RDATATYPE_BASE(header->type);
712 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
714 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
716 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
720 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
725 oldttl = header->rdh_ttl;
726 header->rdh_ttl = newttl;
728 if (!IS_CACHE(rbtdb))
732 * It's possible the rbtdb is not a cache. If this is the case,
733 * we will not have a heap, and we move on. If we do, though,
734 * we might need to adjust things.
736 if (header->heap_index == 0 || newttl == oldttl)
738 idx = header->node->locknum;
739 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
741 heap = rbtdb->heaps[idx];
744 isc_heap_increased(heap, header->heap_index);
746 isc_heap_decreased(heap, header->heap_index);
750 * These functions allow the heap code to rank the priority of each
751 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
754 ttl_sooner(void *v1, void *v2) {
755 rdatasetheader_t *h1 = v1;
756 rdatasetheader_t *h2 = v2;
758 if (h1->rdh_ttl < h2->rdh_ttl)
764 resign_sooner(void *v1, void *v2) {
765 rdatasetheader_t *h1 = v1;
766 rdatasetheader_t *h2 = v2;
768 if (h1->resign < h2->resign)
774 * This function sets the heap index into the header.
777 set_index(void *what, unsigned int index) {
778 rdatasetheader_t *h = what;
780 h->heap_index = index;
784 * Work out how many nodes can be deleted in the time between two
785 * requests to the nameserver. Smooth the resulting number and use it
786 * as a estimate for the number of nodes to be deleted in the next
790 adjust_quantum(unsigned int old, isc_time_t *start) {
791 unsigned int pps = dns_pps; /* packets per second */
792 unsigned int interval;
801 interval = 1000000 / pps; /* interval in usec */
804 usecs = isc_time_microdiff(&end, start);
807 * We were unable to measure the amount of time taken.
808 * Double the nodes deleted next time.
815 new = old * interval;
816 new /= (unsigned int)usecs;
823 new = (new + old * 3) / 4;
825 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
826 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
832 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
834 isc_ondestroy_t ondest;
836 char buf[DNS_NAME_FORMATSIZE];
839 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
840 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
842 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
843 REQUIRE(rbtdb->future_version == NULL);
845 if (rbtdb->current_version != NULL) {
848 isc_refcount_decrement(&rbtdb->current_version->references,
851 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
852 isc_refcount_destroy(&rbtdb->current_version->references);
853 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
854 sizeof(rbtdb_version_t));
858 * We assume the number of remaining dead nodes is reasonably small;
859 * the overhead of unlinking all nodes here should be negligible.
861 for (i = 0; i < rbtdb->node_lock_count; i++) {
864 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
865 while (node != NULL) {
866 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
867 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
872 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
874 if (rbtdb->tree != NULL) {
875 isc_time_now(&start);
876 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
877 if (result == ISC_R_QUOTA) {
878 INSIST(rbtdb->task != NULL);
879 if (rbtdb->quantum != 0)
880 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
883 event = isc_event_allocate(rbtdb->common.mctx,
885 DNS_EVENT_FREESTORAGE,
888 sizeof(isc_event_t));
891 isc_task_send(rbtdb->task, &event);
894 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
897 if (rbtdb->nsec3 != NULL) {
898 isc_time_now(&start);
899 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
900 if (result == ISC_R_QUOTA) {
901 INSIST(rbtdb->task != NULL);
902 if (rbtdb->quantum != 0)
903 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
906 event = isc_event_allocate(rbtdb->common.mctx,
908 DNS_EVENT_FREESTORAGE,
911 sizeof(isc_event_t));
914 isc_task_send(rbtdb->task, &event);
917 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
921 isc_event_free(&event);
923 if (dns_name_dynamic(&rbtdb->common.origin))
924 dns_name_format(&rbtdb->common.origin, buf,
927 strcpy(buf, "<UNKNOWN>");
928 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
929 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
930 "done free_rbtdb(%s)", buf);
932 if (dns_name_dynamic(&rbtdb->common.origin))
933 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
934 for (i = 0; i < rbtdb->node_lock_count; i++) {
935 isc_refcount_destroy(&rbtdb->node_locks[i].references);
936 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
940 * Clean up LRU / re-signing order lists.
942 if (rbtdb->rdatasets != NULL) {
943 for (i = 0; i < rbtdb->node_lock_count; i++)
944 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
945 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
946 rbtdb->node_lock_count *
947 sizeof(rdatasetheaderlist_t));
950 * Clean up dead node buckets.
952 if (rbtdb->deadnodes != NULL) {
953 for (i = 0; i < rbtdb->node_lock_count; i++)
954 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
955 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
956 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
959 * Clean up heap objects.
961 if (rbtdb->heaps != NULL) {
962 for (i = 0; i < rbtdb->node_lock_count; i++)
963 isc_heap_destroy(&rbtdb->heaps[i]);
964 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
965 rbtdb->node_lock_count * sizeof(isc_heap_t *));
968 if (rbtdb->rrsetstats != NULL)
969 dns_stats_detach(&rbtdb->rrsetstats);
971 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
972 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
973 isc_rwlock_destroy(&rbtdb->tree_lock);
974 isc_refcount_destroy(&rbtdb->references);
975 if (rbtdb->task != NULL)
976 isc_task_detach(&rbtdb->task);
978 RBTDB_DESTROYLOCK(&rbtdb->lock);
979 rbtdb->common.magic = 0;
980 rbtdb->common.impmagic = 0;
981 ondest = rbtdb->common.ondest;
982 isc_mem_detach(&rbtdb->hmctx);
983 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
984 isc_ondestroy_notify(&ondest, rbtdb);
988 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
989 isc_boolean_t want_free = ISC_FALSE;
991 unsigned int inactive = 0;
993 /* XXX check for open versions here */
995 if (rbtdb->soanode != NULL)
996 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
997 if (rbtdb->nsnode != NULL)
998 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1001 * Even though there are no external direct references, there still
1002 * may be nodes in use.
1004 for (i = 0; i < rbtdb->node_lock_count; i++) {
1005 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1006 rbtdb->node_locks[i].exiting = ISC_TRUE;
1007 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1008 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1014 if (inactive != 0) {
1015 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1016 rbtdb->active -= inactive;
1017 if (rbtdb->active == 0)
1018 want_free = ISC_TRUE;
1019 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1021 char buf[DNS_NAME_FORMATSIZE];
1022 if (dns_name_dynamic(&rbtdb->common.origin))
1023 dns_name_format(&rbtdb->common.origin, buf,
1026 strcpy(buf, "<UNKNOWN>");
1027 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1028 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1029 "calling free_rbtdb(%s)", buf);
1030 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1036 detach(dns_db_t **dbp) {
1037 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1040 REQUIRE(VALID_RBTDB(rbtdb));
1042 isc_refcount_decrement(&rbtdb->references, &refs);
1045 maybe_free_rbtdb(rbtdb);
1051 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1052 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1053 rbtdb_version_t *version;
1056 REQUIRE(VALID_RBTDB(rbtdb));
1058 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1059 version = rbtdb->current_version;
1060 isc_refcount_increment(&version->references, &refs);
1061 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1063 *versionp = (dns_dbversion_t *)version;
1066 static inline rbtdb_version_t *
1067 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1068 unsigned int references, isc_boolean_t writer)
1070 isc_result_t result;
1071 rbtdb_version_t *version;
1073 version = isc_mem_get(mctx, sizeof(*version));
1074 if (version == NULL)
1076 version->serial = serial;
1077 result = isc_refcount_init(&version->references, references);
1078 if (result != ISC_R_SUCCESS) {
1079 isc_mem_put(mctx, version, sizeof(*version));
1082 version->writer = writer;
1083 version->commit_ok = ISC_FALSE;
1084 ISC_LIST_INIT(version->changed_list);
1085 ISC_LIST_INIT(version->resigned_list);
1086 ISC_LINK_INIT(version, link);
1092 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1093 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1094 rbtdb_version_t *version;
1096 REQUIRE(VALID_RBTDB(rbtdb));
1097 REQUIRE(versionp != NULL && *versionp == NULL);
1098 REQUIRE(rbtdb->future_version == NULL);
1100 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1101 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1102 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1104 if (version != NULL) {
1105 version->commit_ok = ISC_TRUE;
1106 version->secure = rbtdb->current_version->secure;
1107 version->havensec3 = rbtdb->current_version->havensec3;
1108 if (version->havensec3) {
1109 version->flags = rbtdb->current_version->flags;
1110 version->iterations =
1111 rbtdb->current_version->iterations;
1112 version->hash = rbtdb->current_version->hash;
1113 version->salt_length =
1114 rbtdb->current_version->salt_length;
1115 memcpy(version->salt, rbtdb->current_version->salt,
1116 version->salt_length);
1119 version->iterations = 0;
1121 version->salt_length = 0;
1122 memset(version->salt, 0, sizeof(version->salt));
1124 rbtdb->next_serial++;
1125 rbtdb->future_version = version;
1127 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1129 if (version == NULL)
1130 return (ISC_R_NOMEMORY);
1132 *versionp = version;
1134 return (ISC_R_SUCCESS);
1138 attachversion(dns_db_t *db, dns_dbversion_t *source,
1139 dns_dbversion_t **targetp)
1141 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1142 rbtdb_version_t *rbtversion = source;
1145 REQUIRE(VALID_RBTDB(rbtdb));
1147 isc_refcount_increment(&rbtversion->references, &refs);
1150 *targetp = rbtversion;
1153 static rbtdb_changed_t *
1154 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1155 dns_rbtnode_t *node)
1157 rbtdb_changed_t *changed;
1161 * Caller must be holding the node lock if its reference must be
1162 * protected by the lock.
1165 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1167 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1169 REQUIRE(version->writer);
1171 if (changed != NULL) {
1172 dns_rbtnode_refincrement(node, &refs);
1174 changed->node = node;
1175 changed->dirty = ISC_FALSE;
1176 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1178 version->commit_ok = ISC_FALSE;
1180 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1186 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1191 unsigned char *raw; /* RDATASLAB */
1194 * The caller must be holding the corresponding node lock.
1200 raw = (unsigned char *)header + sizeof(*header);
1201 count = raw[0] * 256 + raw[1];
1204 * Sanity check: since an additional cache entry has a reference to
1205 * the original DB node (in the callback arg), there should be no
1206 * acache entries when the node can be freed.
1208 for (i = 0; i < count; i++)
1209 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1211 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1215 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1217 if (dns_name_dynamic(&(*noqname)->name))
1218 dns_name_free(&(*noqname)->name, mctx);
1219 if ((*noqname)->neg != NULL)
1220 isc_mem_put(mctx, (*noqname)->neg,
1221 dns_rdataslab_size((*noqname)->neg, 0));
1222 if ((*noqname)->negsig != NULL)
1223 isc_mem_put(mctx, (*noqname)->negsig,
1224 dns_rdataslab_size((*noqname)->negsig, 0));
1225 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1230 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1232 ISC_LINK_INIT(h, link);
1236 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1237 fprintf(stderr, "initialized header: %p\n", h);
1243 static inline rdatasetheader_t *
1244 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1246 rdatasetheader_t *h;
1248 h = isc_mem_get(mctx, sizeof(*h));
1253 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1254 fprintf(stderr, "allocated header: %p\n", h);
1256 init_rdataset(rbtdb, h);
1261 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1266 if (EXISTS(rdataset) &&
1267 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1268 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1271 idx = rdataset->node->locknum;
1272 if (ISC_LINK_LINKED(rdataset, link)) {
1273 INSIST(IS_CACHE(rbtdb));
1274 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1276 if (rdataset->heap_index != 0)
1277 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1278 rdataset->heap_index = 0;
1280 if (rdataset->noqname != NULL)
1281 free_noqname(mctx, &rdataset->noqname);
1282 if (rdataset->closest != NULL)
1283 free_noqname(mctx, &rdataset->closest);
1285 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1286 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1288 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1289 size = sizeof(*rdataset);
1291 size = dns_rdataslab_size((unsigned char *)rdataset,
1293 isc_mem_put(mctx, rdataset, size);
1297 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1298 rdatasetheader_t *header, *dcurrent;
1299 isc_boolean_t make_dirty = ISC_FALSE;
1302 * Caller must hold the node lock.
1306 * We set the IGNORE attribute on rdatasets with serial number
1307 * 'serial'. When the reference count goes to zero, these rdatasets
1308 * will be cleaned up; until that time, they will be ignored.
1310 for (header = node->data; header != NULL; header = header->next) {
1311 if (header->serial == serial) {
1312 header->attributes |= RDATASET_ATTR_IGNORE;
1313 make_dirty = ISC_TRUE;
1315 for (dcurrent = header->down;
1317 dcurrent = dcurrent->down) {
1318 if (dcurrent->serial == serial) {
1319 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1320 make_dirty = ISC_TRUE;
1329 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1331 rdatasetheader_t *d, *down_next;
1333 for (d = top->down; d != NULL; d = down_next) {
1334 down_next = d->down;
1335 free_rdataset(rbtdb, mctx, d);
1341 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1342 rdatasetheader_t *current, *top_prev, *top_next;
1343 isc_mem_t *mctx = rbtdb->common.mctx;
1346 * Caller must be holding the node lock.
1350 for (current = node->data; current != NULL; current = top_next) {
1351 top_next = current->next;
1352 clean_stale_headers(rbtdb, mctx, current);
1354 * If current is nonexistent or stale, we can clean it up.
1356 if ((current->attributes &
1357 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1358 if (top_prev != NULL)
1359 top_prev->next = current->next;
1361 node->data = current->next;
1362 free_rdataset(rbtdb, mctx, current);
1370 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1371 rbtdb_serial_t least_serial)
1373 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1374 rdatasetheader_t *top_prev, *top_next;
1375 isc_mem_t *mctx = rbtdb->common.mctx;
1376 isc_boolean_t still_dirty = ISC_FALSE;
1379 * Caller must be holding the node lock.
1381 REQUIRE(least_serial != 0);
1384 for (current = node->data; current != NULL; current = top_next) {
1385 top_next = current->next;
1388 * First, we clean up any instances of multiple rdatasets
1389 * with the same serial number, or that have the IGNORE
1393 for (dcurrent = current->down;
1395 dcurrent = down_next) {
1396 down_next = dcurrent->down;
1397 INSIST(dcurrent->serial <= dparent->serial);
1398 if (dcurrent->serial == dparent->serial ||
1400 if (down_next != NULL)
1401 down_next->next = dparent;
1402 dparent->down = down_next;
1403 free_rdataset(rbtdb, mctx, dcurrent);
1409 * We've now eliminated all IGNORE datasets with the possible
1410 * exception of current, which we now check.
1412 if (IGNORE(current)) {
1413 down_next = current->down;
1414 if (down_next == NULL) {
1415 if (top_prev != NULL)
1416 top_prev->next = current->next;
1418 node->data = current->next;
1419 free_rdataset(rbtdb, mctx, current);
1421 * current no longer exists, so we can
1422 * just continue with the loop.
1427 * Pull up current->down, making it the new
1430 if (top_prev != NULL)
1431 top_prev->next = down_next;
1433 node->data = down_next;
1434 down_next->next = top_next;
1435 free_rdataset(rbtdb, mctx, current);
1436 current = down_next;
1441 * We now try to find the first down node less than the
1445 for (dcurrent = current->down;
1447 dcurrent = down_next) {
1448 down_next = dcurrent->down;
1449 if (dcurrent->serial < least_serial)
1455 * If there is a such an rdataset, delete it and any older
1458 if (dcurrent != NULL) {
1460 down_next = dcurrent->down;
1461 INSIST(dcurrent->serial <= least_serial);
1462 free_rdataset(rbtdb, mctx, dcurrent);
1463 dcurrent = down_next;
1464 } while (dcurrent != NULL);
1465 dparent->down = NULL;
1469 * Note. The serial number of 'current' might be less than
1470 * least_serial too, but we cannot delete it because it is
1471 * the most recent version, unless it is a NONEXISTENT
1474 if (current->down != NULL) {
1475 still_dirty = ISC_TRUE;
1479 * If this is a NONEXISTENT rdataset, we can delete it.
1481 if (NONEXISTENT(current)) {
1482 if (top_prev != NULL)
1483 top_prev->next = current->next;
1485 node->data = current->next;
1486 free_rdataset(rbtdb, mctx, current);
1496 * Clean up dead nodes. These are nodes which have no references, and
1497 * have no data. They are dead but we could not or chose not to delete
1498 * them when we deleted all the data at that node because we did not want
1499 * to wait for the tree write lock.
1501 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1504 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1505 dns_rbtnode_t *node;
1506 isc_result_t result;
1507 int count = 10; /* XXXJT: should be adjustable */
1509 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1510 while (node != NULL && count > 0) {
1511 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1514 * Since we're holding a tree write lock, it should be
1515 * impossible for this node to be referenced by others.
1517 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1518 node->data == NULL);
1520 INSIST(!ISC_LINK_LINKED(node, deadlink));
1522 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1525 result = dns_rbt_deletenode(rbtdb->tree, node,
1527 if (result != ISC_R_SUCCESS)
1528 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1529 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1530 "cleanup_dead_nodes: "
1531 "dns_rbt_deletenode: %s",
1532 isc_result_totext(result));
1533 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1539 * Caller must be holding the node lock if its reference must be protected
1543 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1544 unsigned int lockrefs, noderefs;
1545 isc_refcount_t *lockref;
1547 dns_rbtnode_refincrement0(node, &noderefs);
1548 if (noderefs == 1) { /* this is the first reference to the node */
1549 lockref = &rbtdb->node_locks[node->locknum].references;
1550 isc_refcount_increment0(lockref, &lockrefs);
1551 INSIST(lockrefs != 0);
1553 INSIST(noderefs != 0);
1557 * This function is assumed to be called when a node is newly referenced
1558 * and can be in the deadnode list. In that case the node must be retrieved
1559 * from the list because it is going to be used. In addition, if the caller
1560 * happens to hold a write lock on the tree, it's a good chance to purge dead
1562 * Note: while a new reference is gained in multiple places, there are only very
1563 * few cases where the node can be in the deadnode list (only empty nodes can
1564 * have been added to the list).
1567 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1568 isc_rwlocktype_t treelocktype)
1570 isc_boolean_t need_relock = ISC_FALSE;
1572 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1573 new_reference(rbtdb, node);
1575 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1576 isc_rwlocktype_read);
1577 if (ISC_LINK_LINKED(node, deadlink))
1578 need_relock = ISC_TRUE;
1579 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1580 treelocktype == isc_rwlocktype_write)
1581 need_relock = ISC_TRUE;
1582 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1583 isc_rwlocktype_read);
1585 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1586 isc_rwlocktype_write);
1587 if (ISC_LINK_LINKED(node, deadlink))
1588 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1590 if (treelocktype == isc_rwlocktype_write)
1591 cleanup_dead_nodes(rbtdb, node->locknum);
1592 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1593 isc_rwlocktype_write);
1596 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1600 * Caller must be holding the node lock; either the "strong", read or write
1601 * lock. Note that the lock must be held even when node references are
1602 * atomically modified; in that case the decrement operation itself does not
1603 * have to be protected, but we must avoid a race condition where multiple
1604 * threads are decreasing the reference to zero simultaneously and at least
1605 * one of them is going to free the node.
1606 * This function returns ISC_TRUE if and only if the node reference decreases
1609 static isc_boolean_t
1610 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1611 rbtdb_serial_t least_serial,
1612 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1613 isc_boolean_t pruning)
1615 isc_result_t result;
1616 isc_boolean_t write_locked;
1617 rbtdb_nodelock_t *nodelock;
1618 unsigned int refs, nrefs;
1619 int bucket = node->locknum;
1620 isc_boolean_t no_reference;
1622 nodelock = &rbtdb->node_locks[bucket];
1624 /* Handle easy and typical case first. */
1625 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1626 dns_rbtnode_refdecrement(node, &nrefs);
1627 INSIST((int)nrefs >= 0);
1629 isc_refcount_decrement(&nodelock->references, &refs);
1630 INSIST((int)refs >= 0);
1632 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1635 /* Upgrade the lock? */
1636 if (nlock == isc_rwlocktype_read) {
1637 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1638 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1640 dns_rbtnode_refdecrement(node, &nrefs);
1641 INSIST((int)nrefs >= 0);
1643 /* Restore the lock? */
1644 if (nlock == isc_rwlocktype_read)
1645 NODE_WEAKDOWNGRADE(&nodelock->lock);
1649 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1650 if (IS_CACHE(rbtdb))
1651 clean_cache_node(rbtdb, node);
1653 if (least_serial == 0) {
1655 * Caller doesn't know the least serial.
1658 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1659 least_serial = rbtdb->least_serial;
1660 RBTDB_UNLOCK(&rbtdb->lock,
1661 isc_rwlocktype_read);
1663 clean_zone_node(rbtdb, node, least_serial);
1667 isc_refcount_decrement(&nodelock->references, &refs);
1668 INSIST((int)refs >= 0);
1671 * XXXDCL should this only be done for cache zones?
1673 if (node->data != NULL || node->down != NULL) {
1674 /* Restore the lock? */
1675 if (nlock == isc_rwlocktype_read)
1676 NODE_WEAKDOWNGRADE(&nodelock->lock);
1681 * Attempt to switch to a write lock on the tree. If this fails,
1682 * we will add this node to a linked list of nodes in this locking
1683 * bucket which we will free later.
1685 if (tlock != isc_rwlocktype_write) {
1687 * Locking hierarchy notwithstanding, we don't need to free
1688 * the node lock before acquiring the tree write lock because
1689 * we only do a trylock.
1691 if (tlock == isc_rwlocktype_read)
1692 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1694 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1695 isc_rwlocktype_write);
1696 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1697 result == ISC_R_LOCKBUSY);
1699 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1701 write_locked = ISC_TRUE;
1703 no_reference = ISC_TRUE;
1704 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1706 * We can now delete the node if the reference counter is
1707 * zero. This should be typically the case, but a different
1708 * thread may still gain a (new) reference just before the
1709 * current thread locks the tree (e.g., in findnode()).
1713 * If this node is the only one in the level it's in, deleting
1714 * this node may recursively make its parent the only node in
1715 * the parent level; if so, and if no one is currently using
1716 * the parent node, this is almost the only opportunity to
1717 * clean it up. But the recursive cleanup is not that trivial
1718 * since the child and parent may be in different lock buckets,
1719 * which would cause a lock order reversal problem. To avoid
1720 * the trouble, we'll dispatch a separate event for batch
1721 * cleaning. We need to check whether we're deleting the node
1722 * as a result of pruning to avoid infinite dispatching.
1723 * Note: pruning happens only when a task has been set for the
1724 * rbtdb. If the user of the rbtdb chooses not to set a task,
1725 * it's their responsibility to purge stale leaves (e.g. by
1726 * periodic walk-through).
1728 if (!pruning && node->parent != NULL &&
1729 node->parent->down == node && node->left == NULL &&
1730 node->right == NULL && rbtdb->task != NULL) {
1734 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1737 sizeof(isc_event_t));
1739 new_reference(rbtdb, node);
1741 attach((dns_db_t *)rbtdb, &db);
1743 isc_task_send(rbtdb->task, &ev);
1744 no_reference = ISC_FALSE;
1747 * XXX: this is a weird situation. We could
1748 * ignore this error case, but then the stale
1749 * node will unlikely be purged except via a
1750 * rare condition such as manual cleanup. So
1751 * we queue it in the deadnodes list, hoping
1752 * the memory shortage is temporary and the node
1753 * will be deleted later.
1755 isc_log_write(dns_lctx,
1756 DNS_LOGCATEGORY_DATABASE,
1757 DNS_LOGMODULE_CACHE,
1759 "decrement_reference: failed to "
1760 "allocate pruning event");
1761 INSIST(!ISC_LINK_LINKED(node, deadlink));
1762 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1766 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1767 char printname[DNS_NAME_FORMATSIZE];
1769 isc_log_write(dns_lctx,
1770 DNS_LOGCATEGORY_DATABASE,
1771 DNS_LOGMODULE_CACHE,
1773 "decrement_reference: "
1774 "delete from rbt: %p %s",
1776 dns_rbt_formatnodename(node,
1778 sizeof(printname)));
1781 INSIST(!ISC_LINK_LINKED(node, deadlink));
1783 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1786 result = dns_rbt_deletenode(rbtdb->tree, node,
1788 if (result != ISC_R_SUCCESS) {
1789 isc_log_write(dns_lctx,
1790 DNS_LOGCATEGORY_DATABASE,
1791 DNS_LOGMODULE_CACHE,
1793 "decrement_reference: "
1794 "dns_rbt_deletenode: %s",
1795 isc_result_totext(result));
1798 } else if (dns_rbtnode_refcurrent(node) == 0) {
1799 INSIST(!ISC_LINK_LINKED(node, deadlink));
1800 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1802 no_reference = ISC_FALSE;
1804 /* Restore the lock? */
1805 if (nlock == isc_rwlocktype_read)
1806 NODE_WEAKDOWNGRADE(&nodelock->lock);
1809 * Relock a read lock, or unlock the write lock if no lock was held.
1811 if (tlock == isc_rwlocktype_none)
1813 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1815 if (tlock == isc_rwlocktype_read)
1817 isc_rwlock_downgrade(&rbtdb->tree_lock);
1819 return (no_reference);
1823 * Prune the tree by recursively cleaning-up single leaves. In the worst
1824 * case, the number of iteration is the number of tree levels, which is at
1825 * most the maximum number of domain name labels, i.e, 127. In practice, this
1826 * should be much smaller (only a few times), and even the worst case would be
1827 * acceptable for a single event.
1830 prune_tree(isc_task_t *task, isc_event_t *event) {
1831 dns_rbtdb_t *rbtdb = event->ev_sender;
1832 dns_rbtnode_t *node = event->ev_arg;
1833 dns_rbtnode_t *parent;
1834 unsigned int locknum;
1838 isc_event_free(&event);
1840 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1841 locknum = node->locknum;
1842 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1844 parent = node->parent;
1845 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1846 isc_rwlocktype_write, ISC_TRUE);
1848 if (parent != NULL && parent->down == NULL) {
1850 * node was the only down child of the parent and has
1851 * just been removed. We'll then need to examine the
1852 * parent. Keep the lock if possible; otherwise,
1853 * release the old lock and acquire one for the parent.
1855 if (parent->locknum != locknum) {
1856 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1857 isc_rwlocktype_write);
1858 locknum = parent->locknum;
1859 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1860 isc_rwlocktype_write);
1864 * We need to gain a reference to the node before
1865 * decrementing it in the next iteration. In addition,
1866 * if the node is in the dead-nodes list, extract it
1867 * from the list beforehand as we do in
1868 * reactivate_node().
1870 new_reference(rbtdb, parent);
1871 if (ISC_LINK_LINKED(parent, deadlink)) {
1872 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1879 } while (node != NULL);
1880 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1881 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1883 detach((dns_db_t **)&rbtdb);
1887 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1888 rbtdb_changedlist_t *cleanup_list)
1891 * Caller must be holding the database lock.
1894 rbtdb->least_serial = version->serial;
1895 *cleanup_list = version->changed_list;
1896 ISC_LIST_INIT(version->changed_list);
1900 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1901 rbtdb_changed_t *changed, *next_changed;
1904 * If the changed record is dirty, then
1905 * an update created multiple versions of
1906 * a given rdataset. We keep this list
1907 * until we're the least open version, at
1908 * which point it's safe to get rid of any
1911 * If the changed record isn't dirty, then
1912 * we don't need it anymore since we're
1913 * committing and not rolling back.
1915 * The caller must be holding the database lock.
1917 for (changed = HEAD(version->changed_list);
1919 changed = next_changed) {
1920 next_changed = NEXT(changed, link);
1921 if (!changed->dirty) {
1922 UNLINK(version->changed_list,
1924 APPEND(*cleanup_list,
1931 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1932 dns_rdataset_t keyset;
1933 dns_rdataset_t nsecset, signsecset;
1934 dns_rdata_t rdata = DNS_RDATA_INIT;
1935 isc_boolean_t haszonekey = ISC_FALSE;
1936 isc_boolean_t hasnsec = ISC_FALSE;
1937 isc_boolean_t hasoptbit = ISC_FALSE;
1938 isc_boolean_t nsec3createflag = ISC_FALSE;
1939 isc_result_t result;
1941 dns_rdataset_init(&keyset);
1942 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1943 0, 0, &keyset, NULL);
1944 if (result == ISC_R_SUCCESS) {
1945 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1946 result = dns_rdataset_first(&keyset);
1947 while (result == ISC_R_SUCCESS) {
1948 dns_rdataset_current(&keyset, &keyrdata);
1949 if (dns_zonekey_iszonekey(&keyrdata)) {
1950 haszonekey = ISC_TRUE;
1953 result = dns_rdataset_next(&keyset);
1955 dns_rdataset_disassociate(&keyset);
1958 version->secure = dns_db_insecure;
1959 version->havensec3 = ISC_FALSE;
1963 dns_rdataset_init(&nsecset);
1964 dns_rdataset_init(&signsecset);
1965 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1966 0, 0, &nsecset, &signsecset);
1967 if (result == ISC_R_SUCCESS) {
1968 if (dns_rdataset_isassociated(&signsecset)) {
1970 result = dns_rdataset_first(&nsecset);
1971 if (result == ISC_R_SUCCESS) {
1972 dns_rdataset_current(&nsecset, &rdata);
1973 hasoptbit = dns_nsec_typepresent(&rdata,
1976 dns_rdataset_disassociate(&signsecset);
1978 dns_rdataset_disassociate(&nsecset);
1981 setnsec3parameters(db, version, &nsec3createflag);
1984 * Do we have a valid NSEC/NSEC3 chain?
1986 if (version->havensec3 || (hasnsec && !hasoptbit))
1987 version->secure = dns_db_secure;
1989 * Do we have a NSEC/NSEC3 chain under creation?
1991 else if (hasoptbit || nsec3createflag)
1992 version->secure = dns_db_partial;
1994 version->secure = dns_db_insecure;
1998 * Walk the origin node looking for NSEC3PARAM records.
1999 * Cache the nsec3 parameters.
2002 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
2003 isc_boolean_t *nsec3createflag)
2005 dns_rbtnode_t *node;
2006 dns_rdata_nsec3param_t nsec3param;
2007 dns_rdata_t rdata = DNS_RDATA_INIT;
2008 isc_region_t region;
2009 isc_result_t result;
2010 rdatasetheader_t *header, *header_next;
2011 unsigned char *raw; /* RDATASLAB */
2012 unsigned int count, length;
2013 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2015 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2016 version->havensec3 = ISC_FALSE;
2017 node = rbtdb->origin_node;
2018 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2019 isc_rwlocktype_read);
2020 for (header = node->data;
2022 header = header_next) {
2023 header_next = header->next;
2025 if (header->serial <= version->serial &&
2027 if (NONEXISTENT(header))
2031 header = header->down;
2032 } while (header != NULL);
2034 if (header != NULL &&
2035 header->type == dns_rdatatype_nsec3param) {
2037 * Find A NSEC3PARAM with a supported algorithm.
2039 raw = (unsigned char *)header + sizeof(*header);
2040 count = raw[0] * 256 + raw[1]; /* count */
2041 #if DNS_RDATASET_FIXED
2042 raw += count * 4 + 2;
2046 while (count-- > 0U) {
2047 length = raw[0] * 256 + raw[1];
2048 #if DNS_RDATASET_FIXED
2054 region.length = length;
2056 dns_rdata_fromregion(&rdata,
2057 rbtdb->common.rdclass,
2058 dns_rdatatype_nsec3param,
2060 result = dns_rdata_tostruct(&rdata,
2063 INSIST(result == ISC_R_SUCCESS);
2064 dns_rdata_reset(&rdata);
2066 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2067 !dns_nsec3_supportedhash(nsec3param.hash))
2070 #ifdef RFC5155_STRICT
2071 if (nsec3param.flags != 0)
2074 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2076 *nsec3createflag = ISC_TRUE;
2077 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2082 memcpy(version->salt, nsec3param.salt,
2083 nsec3param.salt_length);
2084 version->hash = nsec3param.hash;
2085 version->salt_length = nsec3param.salt_length;
2086 version->iterations = nsec3param.iterations;
2087 version->flags = nsec3param.flags;
2088 version->havensec3 = ISC_TRUE;
2090 * Look for a better algorithm than the
2091 * unknown test algorithm.
2093 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2099 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2100 isc_rwlocktype_read);
2101 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2105 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2106 dns_rbtdb_t *rbtdb = event->ev_arg;
2107 isc_boolean_t again = ISC_FALSE;
2108 unsigned int locknum;
2111 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2112 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2113 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2114 isc_rwlocktype_write);
2115 cleanup_dead_nodes(rbtdb, locknum);
2116 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2118 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2119 isc_rwlocktype_write);
2121 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2123 isc_task_send(task, &event);
2125 isc_event_free(&event);
2126 isc_refcount_decrement(&rbtdb->references, &refs);
2128 maybe_free_rbtdb(rbtdb);
2133 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2134 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2135 rbtdb_version_t *version, *cleanup_version, *least_greater;
2136 isc_boolean_t rollback = ISC_FALSE;
2137 rbtdb_changedlist_t cleanup_list;
2138 rdatasetheaderlist_t resigned_list;
2139 rbtdb_changed_t *changed, *next_changed;
2140 rbtdb_serial_t serial, least_serial;
2141 dns_rbtnode_t *rbtnode;
2143 rdatasetheader_t *header;
2144 isc_boolean_t writer;
2146 REQUIRE(VALID_RBTDB(rbtdb));
2147 version = (rbtdb_version_t *)*versionp;
2149 cleanup_version = NULL;
2150 ISC_LIST_INIT(cleanup_list);
2151 ISC_LIST_INIT(resigned_list);
2153 isc_refcount_decrement(&version->references, &refs);
2154 if (refs > 0) { /* typical and easy case first */
2156 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2157 INSIST(!version->writer);
2158 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2163 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2164 serial = version->serial;
2165 writer = version->writer;
2166 if (version->writer) {
2169 rbtdb_version_t *cur_version;
2171 INSIST(version->commit_ok);
2172 INSIST(version == rbtdb->future_version);
2174 * The current version is going to be replaced.
2175 * Release the (likely last) reference to it from the
2176 * DB itself and unlink it from the open list.
2178 cur_version = rbtdb->current_version;
2179 isc_refcount_decrement(&cur_version->references,
2182 if (cur_version->serial == rbtdb->least_serial)
2183 INSIST(EMPTY(cur_version->changed_list));
2184 UNLINK(rbtdb->open_versions,
2187 if (EMPTY(rbtdb->open_versions)) {
2189 * We're going to become the least open
2192 make_least_version(rbtdb, version,
2196 * Some other open version is the
2197 * least version. We can't cleanup
2198 * records that were changed in this
2199 * version because the older versions
2200 * may still be in use by an open
2203 * We can, however, discard the
2204 * changed records for things that
2205 * we've added that didn't exist in
2208 cleanup_nondirty(version, &cleanup_list);
2211 * If the (soon to be former) current version
2212 * isn't being used by anyone, we can clean
2216 cleanup_version = cur_version;
2217 APPENDLIST(version->changed_list,
2218 cleanup_version->changed_list,
2222 * Become the current version.
2224 version->writer = ISC_FALSE;
2225 rbtdb->current_version = version;
2226 rbtdb->current_serial = version->serial;
2227 rbtdb->future_version = NULL;
2230 * Keep the current version in the open list, and
2231 * gain a reference for the DB itself (see the DB
2232 * creation function below). This must be the only
2233 * case where we need to increment the counter from
2234 * zero and need to use isc_refcount_increment0().
2236 isc_refcount_increment0(&version->references,
2238 INSIST(cur_ref == 1);
2239 PREPEND(rbtdb->open_versions,
2240 rbtdb->current_version, link);
2241 resigned_list = version->resigned_list;
2242 ISC_LIST_INIT(version->resigned_list);
2245 * We're rolling back this transaction.
2247 cleanup_list = version->changed_list;
2248 ISC_LIST_INIT(version->changed_list);
2249 resigned_list = version->resigned_list;
2250 ISC_LIST_INIT(version->resigned_list);
2251 rollback = ISC_TRUE;
2252 cleanup_version = version;
2253 rbtdb->future_version = NULL;
2256 if (version != rbtdb->current_version) {
2258 * There are no external or internal references
2259 * to this version and it can be cleaned up.
2261 cleanup_version = version;
2264 * Find the version with the least serial
2265 * number greater than ours.
2267 least_greater = PREV(version, link);
2268 if (least_greater == NULL)
2269 least_greater = rbtdb->current_version;
2271 INSIST(version->serial < least_greater->serial);
2273 * Is this the least open version?
2275 if (version->serial == rbtdb->least_serial) {
2277 * Yes. Install the new least open
2280 make_least_version(rbtdb,
2285 * Add any unexecuted cleanups to
2286 * those of the least greater version.
2288 APPENDLIST(least_greater->changed_list,
2289 version->changed_list,
2292 } else if (version->serial == rbtdb->least_serial)
2293 INSIST(EMPTY(version->changed_list));
2294 UNLINK(rbtdb->open_versions, version, link);
2296 least_serial = rbtdb->least_serial;
2297 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2300 * Update the zone's secure status.
2302 if (writer && commit && !IS_CACHE(rbtdb))
2303 iszonesecure(db, version, rbtdb->origin_node);
2305 if (cleanup_version != NULL) {
2306 INSIST(EMPTY(cleanup_version->changed_list));
2307 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2308 sizeof(*cleanup_version));
2312 * Commit/rollback re-signed headers.
2314 for (header = HEAD(resigned_list);
2316 header = HEAD(resigned_list)) {
2319 ISC_LIST_UNLINK(resigned_list, header, link);
2321 lock = &rbtdb->node_locks[header->node->locknum].lock;
2322 NODE_LOCK(lock, isc_rwlocktype_write);
2324 resign_insert(rbtdb, header->node->locknum, header);
2325 decrement_reference(rbtdb, header->node, least_serial,
2326 isc_rwlocktype_write, isc_rwlocktype_none,
2328 NODE_UNLOCK(lock, isc_rwlocktype_write);
2331 if (!EMPTY(cleanup_list)) {
2332 isc_event_t *event = NULL;
2333 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2335 if (rbtdb->task != NULL)
2336 event = isc_event_allocate(rbtdb->common.mctx, NULL,
2337 DNS_EVENT_RBTDEADNODES,
2338 cleanup_dead_nodes_callback,
2339 rbtdb, sizeof(isc_event_t));
2340 if (event == NULL) {
2342 * We acquire a tree write lock here in order to make
2343 * sure that stale nodes will be removed in
2344 * decrement_reference(). If we didn't have the lock,
2345 * those nodes could miss the chance to be removed
2346 * until the server stops. The write lock is
2347 * expensive, but this event should be rare enough
2348 * to justify the cost.
2350 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2351 tlock = isc_rwlocktype_write;
2354 for (changed = HEAD(cleanup_list);
2356 changed = next_changed) {
2359 next_changed = NEXT(changed, link);
2360 rbtnode = changed->node;
2361 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2363 NODE_LOCK(lock, isc_rwlocktype_write);
2365 * This is a good opportunity to purge any dead nodes,
2369 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2372 rollback_node(rbtnode, serial);
2373 decrement_reference(rbtdb, rbtnode, least_serial,
2374 isc_rwlocktype_write, tlock,
2377 NODE_UNLOCK(lock, isc_rwlocktype_write);
2379 isc_mem_put(rbtdb->common.mctx, changed,
2382 if (event != NULL) {
2383 isc_refcount_increment(&rbtdb->references, NULL);
2384 isc_task_send(rbtdb->task, &event);
2386 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2394 * Add the necessary magic for the wildcard name 'name'
2395 * to be found in 'rbtdb'.
2397 * In order for wildcard matching to work correctly in
2398 * zone_find(), we must ensure that a node for the wildcarding
2399 * level exists in the database, and has its 'find_callback'
2400 * and 'wild' bits set.
2402 * E.g. if the wildcard name is "*.sub.example." then we
2403 * must ensure that "sub.example." exists and is marked as
2407 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2408 isc_result_t result;
2409 dns_name_t foundname;
2410 dns_offsets_t offsets;
2412 dns_rbtnode_t *node = NULL;
2414 dns_name_init(&foundname, offsets);
2415 n = dns_name_countlabels(name);
2418 dns_name_getlabelsequence(name, 1, n, &foundname);
2419 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2420 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2423 node->find_callback = 1;
2425 return (ISC_R_SUCCESS);
2429 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2430 isc_result_t result;
2431 dns_name_t foundname;
2432 dns_offsets_t offsets;
2433 unsigned int n, l, i;
2435 dns_name_init(&foundname, offsets);
2436 n = dns_name_countlabels(name);
2437 l = dns_name_countlabels(&rbtdb->common.origin);
2440 dns_rbtnode_t *node = NULL; /* dummy */
2441 dns_name_getlabelsequence(name, n - i, i, &foundname);
2442 if (dns_name_iswildcard(&foundname)) {
2443 result = add_wildcard_magic(rbtdb, &foundname);
2444 if (result != ISC_R_SUCCESS)
2446 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2448 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2454 return (ISC_R_SUCCESS);
2458 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2459 dns_dbnode_t **nodep)
2461 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2462 dns_rbtnode_t *node = NULL;
2463 dns_name_t nodename;
2464 isc_result_t result;
2465 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2467 REQUIRE(VALID_RBTDB(rbtdb));
2469 dns_name_init(&nodename, NULL);
2470 RWLOCK(&rbtdb->tree_lock, locktype);
2471 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2472 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2473 if (result != ISC_R_SUCCESS) {
2474 RWUNLOCK(&rbtdb->tree_lock, locktype);
2476 if (result == DNS_R_PARTIALMATCH)
2477 result = ISC_R_NOTFOUND;
2481 * It would be nice to try to upgrade the lock instead of
2482 * unlocking then relocking.
2484 locktype = isc_rwlocktype_write;
2485 RWLOCK(&rbtdb->tree_lock, locktype);
2487 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2488 if (result == ISC_R_SUCCESS) {
2489 dns_rbt_namefromnode(node, &nodename);
2490 #ifdef DNS_RBT_USEHASH
2491 node->locknum = node->hashval % rbtdb->node_lock_count;
2493 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2494 rbtdb->node_lock_count;
2497 add_empty_wildcards(rbtdb, name);
2499 if (dns_name_iswildcard(name)) {
2500 result = add_wildcard_magic(rbtdb, name);
2501 if (result != ISC_R_SUCCESS) {
2502 RWUNLOCK(&rbtdb->tree_lock, locktype);
2506 } else if (result != ISC_R_EXISTS) {
2507 RWUNLOCK(&rbtdb->tree_lock, locktype);
2511 reactivate_node(rbtdb, node, locktype);
2512 RWUNLOCK(&rbtdb->tree_lock, locktype);
2514 *nodep = (dns_dbnode_t *)node;
2516 return (ISC_R_SUCCESS);
2520 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2521 dns_dbnode_t **nodep)
2523 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2524 dns_rbtnode_t *node = NULL;
2525 dns_name_t nodename;
2526 isc_result_t result;
2527 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2529 REQUIRE(VALID_RBTDB(rbtdb));
2531 dns_name_init(&nodename, NULL);
2532 RWLOCK(&rbtdb->tree_lock, locktype);
2533 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2534 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2535 if (result != ISC_R_SUCCESS) {
2536 RWUNLOCK(&rbtdb->tree_lock, locktype);
2538 if (result == DNS_R_PARTIALMATCH)
2539 result = ISC_R_NOTFOUND;
2543 * It would be nice to try to upgrade the lock instead of
2544 * unlocking then relocking.
2546 locktype = isc_rwlocktype_write;
2547 RWLOCK(&rbtdb->tree_lock, locktype);
2549 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2550 if (result == ISC_R_SUCCESS) {
2551 dns_rbt_namefromnode(node, &nodename);
2552 #ifdef DNS_RBT_USEHASH
2553 node->locknum = node->hashval % rbtdb->node_lock_count;
2555 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2556 rbtdb->node_lock_count;
2559 } else if (result != ISC_R_EXISTS) {
2560 RWUNLOCK(&rbtdb->tree_lock, locktype);
2564 INSIST(node->nsec3);
2565 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2566 new_reference(rbtdb, node);
2567 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2568 RWUNLOCK(&rbtdb->tree_lock, locktype);
2570 *nodep = (dns_dbnode_t *)node;
2572 return (ISC_R_SUCCESS);
2576 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2577 rbtdb_search_t *search = arg;
2578 rdatasetheader_t *header, *header_next;
2579 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2580 rdatasetheader_t *found;
2581 isc_result_t result;
2582 dns_rbtnode_t *onode;
2585 * We only want to remember the topmost zone cut, since it's the one
2586 * that counts, so we'll just continue if we've already found a
2589 if (search->zonecut != NULL)
2590 return (DNS_R_CONTINUE);
2593 result = DNS_R_CONTINUE;
2594 onode = search->rbtdb->origin_node;
2596 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2597 isc_rwlocktype_read);
2600 * Look for an NS or DNAME rdataset active in our version.
2603 dname_header = NULL;
2604 sigdname_header = NULL;
2605 for (header = node->data; header != NULL; header = header_next) {
2606 header_next = header->next;
2607 if (header->type == dns_rdatatype_ns ||
2608 header->type == dns_rdatatype_dname ||
2609 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2611 if (header->serial <= search->serial &&
2614 * Is this a "this rdataset doesn't
2617 if (NONEXISTENT(header))
2621 header = header->down;
2622 } while (header != NULL);
2623 if (header != NULL) {
2624 if (header->type == dns_rdatatype_dname)
2625 dname_header = header;
2626 else if (header->type ==
2627 RBTDB_RDATATYPE_SIGDNAME)
2628 sigdname_header = header;
2629 else if (node != onode ||
2630 IS_STUB(search->rbtdb)) {
2632 * We've found an NS rdataset that
2633 * isn't at the origin node. We check
2634 * that they're not at the origin node,
2635 * because otherwise we'd erroneously
2636 * treat the zone top as if it were
2646 * Did we find anything?
2648 if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2649 ns_header != NULL) {
2651 * Note that NS has precedence over DNAME if both exist
2652 * in a zone. Otherwise DNAME take precedence over NS.
2655 search->zonecut_sigrdataset = NULL;
2656 } else if (dname_header != NULL) {
2657 found = dname_header;
2658 search->zonecut_sigrdataset = sigdname_header;
2659 } else if (ns_header != NULL) {
2661 search->zonecut_sigrdataset = NULL;
2664 if (found != NULL) {
2666 * We increment the reference count on node to ensure that
2667 * search->zonecut_rdataset will still be valid later.
2669 new_reference(search->rbtdb, node);
2670 search->zonecut = node;
2671 search->zonecut_rdataset = found;
2672 search->need_cleanup = ISC_TRUE;
2674 * Since we've found a zonecut, anything beneath it is
2675 * glue and is not subject to wildcard matching, so we
2676 * may clear search->wild.
2678 search->wild = ISC_FALSE;
2679 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2681 * If the caller does not want to find glue, then
2682 * this is the best answer and the search should
2685 result = DNS_R_PARTIALMATCH;
2690 * The search will continue beneath the zone cut.
2691 * This may or may not be the best match. In case it
2692 * is, we need to remember the node name.
2694 zcname = dns_fixedname_name(&search->zonecut_name);
2695 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2697 search->copy_name = ISC_TRUE;
2701 * There is no zonecut at this node which is active in this
2704 * If this is a "wild" node and the caller hasn't disabled
2705 * wildcard matching, remember that we've seen a wild node
2706 * in case we need to go searching for wildcard matches
2709 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2710 search->wild = ISC_TRUE;
2713 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2714 isc_rwlocktype_read);
2720 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2721 rdatasetheader_t *header, isc_stdtime_t now,
2722 dns_rdataset_t *rdataset)
2724 unsigned char *raw; /* RDATASLAB */
2727 * Caller must be holding the node reader lock.
2728 * XXXJT: technically, we need a writer lock, since we'll increment
2729 * the header count below. However, since the actual counter value
2730 * doesn't matter, we prioritize performance here. (We may want to
2731 * use atomic increment when available).
2734 if (rdataset == NULL)
2737 new_reference(rbtdb, node);
2739 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2741 rdataset->methods = &rdataset_methods;
2742 rdataset->rdclass = rbtdb->common.rdclass;
2743 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2744 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2745 rdataset->ttl = header->rdh_ttl - now;
2746 rdataset->trust = header->trust;
2747 if (NEGATIVE(header))
2748 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2749 if (NXDOMAIN(header))
2750 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2752 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2753 rdataset->private1 = rbtdb;
2754 rdataset->private2 = node;
2755 raw = (unsigned char *)header + sizeof(*header);
2756 rdataset->private3 = raw;
2757 rdataset->count = header->count++;
2758 if (rdataset->count == ISC_UINT32_MAX)
2759 rdataset->count = 0;
2762 * Reset iterator state.
2764 rdataset->privateuint4 = 0;
2765 rdataset->private5 = NULL;
2768 * Add noqname proof.
2770 rdataset->private6 = header->noqname;
2771 if (rdataset->private6 != NULL)
2772 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2773 rdataset->private7 = header->closest;
2774 if (rdataset->private7 != NULL)
2775 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2778 * Copy out re-signing information.
2780 if (RESIGN(header)) {
2781 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2782 rdataset->resign = header->resign;
2784 rdataset->resign = 0;
2787 static inline isc_result_t
2788 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2789 dns_name_t *foundname, dns_rdataset_t *rdataset,
2790 dns_rdataset_t *sigrdataset)
2792 isc_result_t result;
2794 rbtdb_rdatatype_t type;
2795 dns_rbtnode_t *node;
2798 * The caller MUST NOT be holding any node locks.
2801 node = search->zonecut;
2802 type = search->zonecut_rdataset->type;
2805 * If we have to set foundname, we do it before anything else.
2806 * If we were to set foundname after we had set nodep or bound the
2807 * rdataset, then we'd have to undo that work if dns_name_copy()
2808 * failed. By setting foundname first, there's nothing to undo if
2811 if (foundname != NULL && search->copy_name) {
2812 zcname = dns_fixedname_name(&search->zonecut_name);
2813 result = dns_name_copy(zcname, foundname, NULL);
2814 if (result != ISC_R_SUCCESS)
2817 if (nodep != NULL) {
2819 * Note that we don't have to increment the node's reference
2820 * count here because we're going to use the reference we
2821 * already have in the search block.
2824 search->need_cleanup = ISC_FALSE;
2826 if (rdataset != NULL) {
2827 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2828 isc_rwlocktype_read);
2829 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2830 search->now, rdataset);
2831 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2832 bind_rdataset(search->rbtdb, node,
2833 search->zonecut_sigrdataset,
2834 search->now, sigrdataset);
2835 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2836 isc_rwlocktype_read);
2839 if (type == dns_rdatatype_dname)
2840 return (DNS_R_DNAME);
2841 return (DNS_R_DELEGATION);
2844 static inline isc_boolean_t
2845 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2846 dns_rbtnode_t *node)
2848 unsigned char *raw; /* RDATASLAB */
2849 unsigned int count, size;
2851 isc_boolean_t valid = ISC_FALSE;
2852 dns_offsets_t offsets;
2853 isc_region_t region;
2854 rdatasetheader_t *header;
2857 * No additional locking is required.
2861 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2862 * if it occurs at a zone cut, but is not valid below it.
2864 if (type == dns_rdatatype_ns) {
2865 if (node != search->zonecut) {
2868 } else if (type != dns_rdatatype_a &&
2869 type != dns_rdatatype_aaaa &&
2870 type != dns_rdatatype_a6) {
2874 header = search->zonecut_rdataset;
2875 raw = (unsigned char *)header + sizeof(*header);
2876 count = raw[0] * 256 + raw[1];
2877 #if DNS_RDATASET_FIXED
2878 raw += 2 + (4 * count);
2885 size = raw[0] * 256 + raw[1];
2886 #if DNS_RDATASET_FIXED
2892 region.length = size;
2895 * XXX Until we have rdata structures, we have no choice but
2896 * to directly access the rdata format.
2898 dns_name_init(&ns_name, offsets);
2899 dns_name_fromregion(&ns_name, ®ion);
2900 if (dns_name_compare(&ns_name, name) == 0) {
2909 static inline isc_boolean_t
2910 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2913 dns_fixedname_t fnext;
2914 dns_fixedname_t forigin;
2919 dns_rbtnode_t *node;
2920 isc_result_t result;
2921 isc_boolean_t answer = ISC_FALSE;
2922 rdatasetheader_t *header;
2924 rbtdb = search->rbtdb;
2926 dns_name_init(&prefix, NULL);
2927 dns_fixedname_init(&fnext);
2928 next = dns_fixedname_name(&fnext);
2929 dns_fixedname_init(&forigin);
2930 origin = dns_fixedname_name(&forigin);
2932 result = dns_rbtnodechain_next(chain, NULL, NULL);
2933 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2935 result = dns_rbtnodechain_current(chain, &prefix,
2937 if (result != ISC_R_SUCCESS)
2939 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2940 isc_rwlocktype_read);
2941 for (header = node->data;
2943 header = header->next) {
2944 if (header->serial <= search->serial &&
2945 !IGNORE(header) && EXISTS(header))
2948 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2949 isc_rwlocktype_read);
2952 result = dns_rbtnodechain_next(chain, NULL, NULL);
2954 if (result == ISC_R_SUCCESS)
2955 result = dns_name_concatenate(&prefix, origin, next, NULL);
2956 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2961 static inline isc_boolean_t
2962 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2963 dns_fixedname_t fnext;
2964 dns_fixedname_t forigin;
2965 dns_fixedname_t fprev;
2973 dns_rbtnode_t *node;
2974 dns_rbtnodechain_t chain;
2975 isc_boolean_t check_next = ISC_TRUE;
2976 isc_boolean_t check_prev = ISC_TRUE;
2977 isc_boolean_t answer = ISC_FALSE;
2978 isc_result_t result;
2979 rdatasetheader_t *header;
2982 rbtdb = search->rbtdb;
2984 dns_name_init(&name, NULL);
2985 dns_name_init(&tname, NULL);
2986 dns_name_init(&rname, NULL);
2987 dns_fixedname_init(&fnext);
2988 next = dns_fixedname_name(&fnext);
2989 dns_fixedname_init(&fprev);
2990 prev = dns_fixedname_name(&fprev);
2991 dns_fixedname_init(&forigin);
2992 origin = dns_fixedname_name(&forigin);
2995 * Find if qname is at or below a empty node.
2996 * Use our own copy of the chain.
2999 chain = search->chain;
3002 result = dns_rbtnodechain_current(&chain, &name,
3004 if (result != ISC_R_SUCCESS)
3006 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3007 isc_rwlocktype_read);
3008 for (header = node->data;
3010 header = header->next) {
3011 if (header->serial <= search->serial &&
3012 !IGNORE(header) && EXISTS(header))
3015 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3016 isc_rwlocktype_read);
3019 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3020 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3021 if (result == ISC_R_SUCCESS)
3022 result = dns_name_concatenate(&name, origin, prev, NULL);
3023 if (result != ISC_R_SUCCESS)
3024 check_prev = ISC_FALSE;
3026 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3027 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3029 result = dns_rbtnodechain_current(&chain, &name,
3031 if (result != ISC_R_SUCCESS)
3033 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3034 isc_rwlocktype_read);
3035 for (header = node->data;
3037 header = header->next) {
3038 if (header->serial <= search->serial &&
3039 !IGNORE(header) && EXISTS(header))
3042 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3043 isc_rwlocktype_read);
3046 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3048 if (result == ISC_R_SUCCESS)
3049 result = dns_name_concatenate(&name, origin, next, NULL);
3050 if (result != ISC_R_SUCCESS)
3051 check_next = ISC_FALSE;
3053 dns_name_clone(qname, &rname);
3056 * Remove the wildcard label to find the terminal name.
3058 n = dns_name_countlabels(wname);
3059 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3062 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3063 (check_next && dns_name_issubdomain(next, &rname))) {
3068 * Remove the left hand label.
3070 n = dns_name_countlabels(&rname);
3071 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3072 } while (!dns_name_equal(&rname, &tname));
3076 static inline isc_result_t
3077 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3081 dns_rbtnode_t *node, *level_node, *wnode;
3082 rdatasetheader_t *header;
3083 isc_result_t result = ISC_R_NOTFOUND;
3086 dns_fixedname_t fwname;
3088 isc_boolean_t done, wild, active;
3089 dns_rbtnodechain_t wchain;
3092 * Caller must be holding the tree lock and MUST NOT be holding
3097 * Examine each ancestor level. If the level's wild bit
3098 * is set, then construct the corresponding wildcard name and
3099 * search for it. If the wildcard node exists, and is active in
3100 * this version, we're done. If not, then we next check to see
3101 * if the ancestor is active in this version. If so, then there
3102 * can be no possible wildcard match and again we're done. If not,
3103 * continue the search.
3106 rbtdb = search->rbtdb;
3107 i = search->chain.level_matches;
3111 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3112 isc_rwlocktype_read);
3115 * First we try to figure out if this node is active in
3116 * the search's version. We do this now, even though we
3117 * may not need the information, because it simplifies the
3118 * locking and code flow.
3120 for (header = node->data;
3122 header = header->next) {
3123 if (header->serial <= search->serial &&
3124 !IGNORE(header) && EXISTS(header))
3137 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3138 isc_rwlocktype_read);
3142 * Construct the wildcard name for this level.
3144 dns_name_init(&name, NULL);
3145 dns_rbt_namefromnode(node, &name);
3146 dns_fixedname_init(&fwname);
3147 wname = dns_fixedname_name(&fwname);
3148 result = dns_name_concatenate(dns_wildcardname, &name,
3151 while (result == ISC_R_SUCCESS && j != 0) {
3153 level_node = search->chain.levels[j];
3154 dns_name_init(&name, NULL);
3155 dns_rbt_namefromnode(level_node, &name);
3156 result = dns_name_concatenate(wname,
3161 if (result != ISC_R_SUCCESS)
3165 dns_rbtnodechain_init(&wchain, NULL);
3166 result = dns_rbt_findnode(rbtdb->tree, wname,
3167 NULL, &wnode, &wchain,
3168 DNS_RBTFIND_EMPTYDATA,
3170 if (result == ISC_R_SUCCESS) {
3174 * We have found the wildcard node. If it
3175 * is active in the search's version, we're
3178 lock = &rbtdb->node_locks[wnode->locknum].lock;
3179 NODE_LOCK(lock, isc_rwlocktype_read);
3180 for (header = wnode->data;
3182 header = header->next) {
3183 if (header->serial <= search->serial &&
3184 !IGNORE(header) && EXISTS(header))
3187 NODE_UNLOCK(lock, isc_rwlocktype_read);
3188 if (header != NULL ||
3189 activeempty(search, &wchain, wname)) {
3190 if (activeemtpynode(search, qname,
3192 return (ISC_R_NOTFOUND);
3195 * The wildcard node is active!
3197 * Note: result is still ISC_R_SUCCESS
3198 * so we don't have to set it.
3203 } else if (result != ISC_R_NOTFOUND &&
3204 result != DNS_R_PARTIALMATCH) {
3206 * An error has occurred. Bail out.
3214 * The level node is active. Any wildcarding
3215 * present at higher levels has no
3216 * effect and we're done.
3218 result = ISC_R_NOTFOUND;
3224 node = search->chain.levels[i];
3232 static isc_boolean_t
3233 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3235 dns_rdata_t rdata = DNS_RDATA_INIT;
3236 dns_rdata_nsec3_t nsec3;
3237 unsigned char *raw; /* RDATASLAB */
3238 unsigned int rdlen, count;
3239 isc_region_t region;
3240 isc_result_t result;
3242 REQUIRE(header->type == dns_rdatatype_nsec3);
3244 raw = (unsigned char *)header + sizeof(*header);
3245 count = raw[0] * 256 + raw[1]; /* count */
3246 #if DNS_RDATASET_FIXED
3247 raw += count * 4 + 2;
3251 while (count-- > 0) {
3252 rdlen = raw[0] * 256 + raw[1];
3253 #if DNS_RDATASET_FIXED
3259 region.length = rdlen;
3260 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3261 dns_rdatatype_nsec3, ®ion);
3263 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3264 INSIST(result == ISC_R_SUCCESS);
3265 if (nsec3.hash == search->rbtversion->hash &&
3266 nsec3.iterations == search->rbtversion->iterations &&
3267 nsec3.salt_length == search->rbtversion->salt_length &&
3268 memcmp(nsec3.salt, search->rbtversion->salt,
3269 nsec3.salt_length) == 0)
3271 dns_rdata_reset(&rdata);
3277 * Find node of the NSEC/NSEC3 record that is 'name'.
3279 static inline isc_result_t
3280 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3281 dns_name_t *foundname, dns_rdataset_t *rdataset,
3282 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3283 dns_db_secure_t secure)
3285 dns_rbtnode_t *node;
3286 rdatasetheader_t *header, *header_next, *found, *foundsig;
3287 isc_boolean_t empty_node;
3288 isc_result_t result;
3289 dns_fixedname_t fname, forigin;
3290 dns_name_t *name, *origin;
3291 dns_rdatatype_t type;
3292 rbtdb_rdatatype_t sigtype;
3293 isc_boolean_t wraps;
3294 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3296 if (tree == search->rbtdb->nsec3) {
3297 type = dns_rdatatype_nsec3;
3298 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3301 type = dns_rdatatype_nsec;
3302 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3309 dns_fixedname_init(&fname);
3310 name = dns_fixedname_name(&fname);
3311 dns_fixedname_init(&forigin);
3312 origin = dns_fixedname_name(&forigin);
3313 result = dns_rbtnodechain_current(&search->chain, name,
3315 if (result != ISC_R_SUCCESS)
3317 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3318 isc_rwlocktype_read);
3321 empty_node = ISC_TRUE;
3322 for (header = node->data;
3324 header = header_next) {
3325 header_next = header->next;
3327 * Look for an active, extant NSEC or RRSIG NSEC.
3330 if (header->serial <= search->serial &&
3333 * Is this a "this rdataset doesn't
3336 if (NONEXISTENT(header))
3340 header = header->down;
3341 } while (header != NULL);
3342 if (header != NULL) {
3344 * We now know that there is at least one
3345 * active rdataset at this node.
3347 empty_node = ISC_FALSE;
3348 if (header->type == type) {
3350 if (foundsig != NULL)
3352 } else if (header->type == sigtype) {
3360 if (found != NULL && search->rbtversion->havensec3 &&
3361 found->type == dns_rdatatype_nsec3 &&
3362 !matchparams(found, search)) {
3363 empty_node = ISC_TRUE;
3366 result = dns_rbtnodechain_prev(&search->chain,
3368 } else if (found != NULL &&
3369 (foundsig != NULL || !need_sig))
3372 * We've found the right NSEC/NSEC3 record.
3374 * Note: for this to really be the right
3375 * NSEC record, it's essential that the NSEC
3376 * records of any nodes obscured by a zone
3377 * cut have been removed; we assume this is
3380 result = dns_name_concatenate(name, origin,
3382 if (result == ISC_R_SUCCESS) {
3383 if (nodep != NULL) {
3384 new_reference(search->rbtdb,
3388 bind_rdataset(search->rbtdb, node,
3391 if (foundsig != NULL)
3392 bind_rdataset(search->rbtdb,
3398 } else if (found == NULL && foundsig == NULL) {
3400 * This node is active, but has no NSEC or
3401 * RRSIG NSEC. That means it's glue or
3402 * other obscured zone data that isn't
3403 * relevant for our search. Treat the
3404 * node as if it were empty and keep looking.
3406 empty_node = ISC_TRUE;
3407 result = dns_rbtnodechain_prev(&search->chain,
3411 * We found an active node, but either the
3412 * NSEC or the RRSIG NSEC is missing. This
3415 result = DNS_R_BADDB;
3419 * This node isn't active. We've got to keep
3422 result = dns_rbtnodechain_prev(&search->chain, NULL,
3425 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3426 isc_rwlocktype_read);
3427 } while (empty_node && result == ISC_R_SUCCESS);
3429 if (result == ISC_R_NOMORE && wraps) {
3430 result = dns_rbtnodechain_last(&search->chain, tree,
3432 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3439 * If the result is ISC_R_NOMORE, then we got to the beginning of
3440 * the database and didn't find a NSEC record. This shouldn't
3443 if (result == ISC_R_NOMORE)
3444 result = DNS_R_BADDB;
3450 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3451 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3452 dns_dbnode_t **nodep, dns_name_t *foundname,
3453 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3455 dns_rbtnode_t *node = NULL;
3456 isc_result_t result;
3457 rbtdb_search_t search;
3458 isc_boolean_t cname_ok = ISC_TRUE;
3459 isc_boolean_t close_version = ISC_FALSE;
3460 isc_boolean_t maybe_zonecut = ISC_FALSE;
3461 isc_boolean_t at_zonecut = ISC_FALSE;
3463 isc_boolean_t empty_node;
3464 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3465 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3466 rbtdb_rdatatype_t sigtype;
3467 isc_boolean_t active;
3468 dns_rbtnodechain_t chain;
3472 search.rbtdb = (dns_rbtdb_t *)db;
3474 REQUIRE(VALID_RBTDB(search.rbtdb));
3477 * We don't care about 'now'.
3482 * If the caller didn't supply a version, attach to the current
3485 if (version == NULL) {
3486 currentversion(db, &version);
3487 close_version = ISC_TRUE;
3490 search.rbtversion = version;
3491 search.serial = search.rbtversion->serial;
3492 search.options = options;
3493 search.copy_name = ISC_FALSE;
3494 search.need_cleanup = ISC_FALSE;
3495 search.wild = ISC_FALSE;
3496 search.zonecut = NULL;
3497 dns_fixedname_init(&search.zonecut_name);
3498 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3502 * 'wild' will be true iff. we've matched a wildcard.
3506 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3509 * Search down from the root of the tree. If, while going down, we
3510 * encounter a callback node, zone_zonecut_callback() will search the
3511 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3513 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3515 result = dns_rbt_findnode(tree, name, foundname, &node,
3516 &search.chain, DNS_RBTFIND_EMPTYDATA,
3517 zone_zonecut_callback, &search);
3519 if (result == DNS_R_PARTIALMATCH) {
3521 if (search.zonecut != NULL) {
3522 result = setup_delegation(&search, nodep, foundname,
3523 rdataset, sigrdataset);
3529 * At least one of the levels in the search chain
3530 * potentially has a wildcard. For each such level,
3531 * we must see if there's a matching wildcard active
3532 * in the current version.
3534 result = find_wildcard(&search, &node, name);
3535 if (result == ISC_R_SUCCESS) {
3536 result = dns_name_copy(name, foundname, NULL);
3537 if (result != ISC_R_SUCCESS)
3542 else if (result != ISC_R_NOTFOUND)
3546 chain = search.chain;
3547 active = activeempty(&search, &chain, name);
3550 * If we're here, then the name does not exist, is not
3551 * beneath a zonecut, and there's no matching wildcard.
3553 if ((search.rbtversion->secure == dns_db_secure &&
3554 !search.rbtversion->havensec3) ||
3555 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3556 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3558 result = find_closest_nsec(&search, nodep, foundname,
3559 rdataset, sigrdataset, tree,
3560 search.rbtversion->secure);
3561 if (result == ISC_R_SUCCESS)
3562 result = active ? DNS_R_EMPTYNAME :
3565 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3567 } else if (result != ISC_R_SUCCESS)
3572 * We have found a node whose name is the desired name, or we
3573 * have matched a wildcard.
3576 if (search.zonecut != NULL) {
3578 * If we're beneath a zone cut, we don't want to look for
3579 * CNAMEs because they're not legitimate zone glue.
3581 cname_ok = ISC_FALSE;
3584 * The node may be a zone cut itself. If it might be one,
3585 * make sure we check for it later.
3587 * DS records live above the zone cut in ordinary zone so
3588 * we want to ignore any referral.
3590 * Stub zones don't have anything "above" the delgation so
3591 * we always return a referral.
3593 if (node->find_callback &&
3594 ((node != search.rbtdb->origin_node &&
3595 !dns_rdatatype_atparent(type)) ||
3596 IS_STUB(search.rbtdb)))
3597 maybe_zonecut = ISC_TRUE;
3601 * Certain DNSSEC types are not subject to CNAME matching
3602 * (RFC4035, section 2.5 and RFC3007).
3604 * We don't check for RRSIG, because we don't store RRSIG records
3607 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3608 cname_ok = ISC_FALSE;
3611 * We now go looking for rdata...
3614 lock = &search.rbtdb->node_locks[node->locknum].lock;
3615 NODE_LOCK(lock, isc_rwlocktype_read);
3619 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3623 empty_node = ISC_TRUE;
3624 for (header = node->data; header != NULL; header = header_next) {
3625 header_next = header->next;
3627 * Look for an active, extant rdataset.
3630 if (header->serial <= search.serial &&
3633 * Is this a "this rdataset doesn't
3636 if (NONEXISTENT(header))
3640 header = header->down;
3641 } while (header != NULL);
3642 if (header != NULL) {
3644 * We now know that there is at least one active
3645 * rdataset at this node.
3647 empty_node = ISC_FALSE;
3650 * Do special zone cut handling, if requested.
3652 if (maybe_zonecut &&
3653 header->type == dns_rdatatype_ns) {
3655 * We increment the reference count on node to
3656 * ensure that search->zonecut_rdataset will
3657 * still be valid later.
3659 new_reference(search.rbtdb, node);
3660 search.zonecut = node;
3661 search.zonecut_rdataset = header;
3662 search.zonecut_sigrdataset = NULL;
3663 search.need_cleanup = ISC_TRUE;
3664 maybe_zonecut = ISC_FALSE;
3665 at_zonecut = ISC_TRUE;
3667 * It is not clear if KEY should still be
3668 * allowed at the parent side of the zone
3669 * cut or not. It is needed for RFC3007
3670 * validated updates.
3672 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3673 && type != dns_rdatatype_nsec
3674 && type != dns_rdatatype_key) {
3676 * Glue is not OK, but any answer we
3677 * could return would be glue. Return
3683 if (found != NULL && foundsig != NULL)
3689 * If the NSEC3 record doesn't match the chain
3690 * we are using behave as if it isn't here.
3692 if (header->type == dns_rdatatype_nsec3 &&
3693 !matchparams(header, &search)) {
3694 NODE_UNLOCK(lock, isc_rwlocktype_read);
3698 * If we found a type we were looking for,
3701 if (header->type == type ||
3702 type == dns_rdatatype_any ||
3703 (header->type == dns_rdatatype_cname &&
3706 * We've found the answer!
3709 if (header->type == dns_rdatatype_cname &&
3712 * We may be finding a CNAME instead
3713 * of the desired type.
3715 * If we've already got the CNAME RRSIG,
3716 * use it, otherwise change sigtype
3717 * so that we find it.
3719 if (cnamesig != NULL)
3720 foundsig = cnamesig;
3723 RBTDB_RDATATYPE_SIGCNAME;
3726 * If we've got all we need, end the search.
3728 if (!maybe_zonecut && foundsig != NULL)
3730 } else if (header->type == sigtype) {
3732 * We've found the RRSIG rdataset for our
3733 * target type. Remember it.
3737 * If we've got all we need, end the search.
3739 if (!maybe_zonecut && found != NULL)
3741 } else if (header->type == dns_rdatatype_nsec &&
3742 !search.rbtversion->havensec3) {
3744 * Remember a NSEC rdataset even if we're
3745 * not specifically looking for it, because
3746 * we might need it later.
3748 nsecheader = header;
3749 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3750 !search.rbtversion->havensec3) {
3752 * If we need the NSEC rdataset, we'll also
3753 * need its signature.
3756 } else if (cname_ok &&
3757 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3759 * If we get a CNAME match, we'll also need
3769 * We have an exact match for the name, but there are no
3770 * active rdatasets in the desired version. That means that
3771 * this node doesn't exist in the desired version, and that
3772 * we really have a partial match.
3775 NODE_UNLOCK(lock, isc_rwlocktype_read);
3781 * If we didn't find what we were looking for...
3783 if (found == NULL) {
3784 if (search.zonecut != NULL) {
3786 * We were trying to find glue at a node beneath a
3787 * zone cut, but didn't.
3789 * Return the delegation.
3791 NODE_UNLOCK(lock, isc_rwlocktype_read);
3792 result = setup_delegation(&search, nodep, foundname,
3793 rdataset, sigrdataset);
3797 * The desired type doesn't exist.
3799 result = DNS_R_NXRRSET;
3800 if (search.rbtversion->secure == dns_db_secure &&
3801 !search.rbtversion->havensec3 &&
3802 (nsecheader == NULL || nsecsig == NULL)) {
3804 * The zone is secure but there's no NSEC,
3805 * or the NSEC has no signature!
3808 result = DNS_R_BADDB;
3812 NODE_UNLOCK(lock, isc_rwlocktype_read);
3813 result = find_closest_nsec(&search, nodep, foundname,
3814 rdataset, sigrdataset,
3816 search.rbtversion->secure);
3817 if (result == ISC_R_SUCCESS)
3818 result = DNS_R_EMPTYWILD;
3821 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3825 * There's no NSEC record, and we were told
3828 result = DNS_R_BADDB;
3831 if (nodep != NULL) {
3832 new_reference(search.rbtdb, node);
3835 if ((search.rbtversion->secure == dns_db_secure &&
3836 !search.rbtversion->havensec3) ||
3837 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3839 bind_rdataset(search.rbtdb, node, nsecheader,
3841 if (nsecsig != NULL)
3842 bind_rdataset(search.rbtdb, node,
3843 nsecsig, 0, sigrdataset);
3846 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3851 * We found what we were looking for, or we found a CNAME.
3854 if (type != found->type &&
3855 type != dns_rdatatype_any &&
3856 found->type == dns_rdatatype_cname) {
3858 * We weren't doing an ANY query and we found a CNAME instead
3859 * of the type we were looking for, so we need to indicate
3860 * that result to the caller.
3862 result = DNS_R_CNAME;
3863 } else if (search.zonecut != NULL) {
3865 * If we're beneath a zone cut, we must indicate that the
3866 * result is glue, unless we're actually at the zone cut
3867 * and the type is NSEC or KEY.
3869 if (search.zonecut == node) {
3871 * It is not clear if KEY should still be
3872 * allowed at the parent side of the zone
3873 * cut or not. It is needed for RFC3007
3874 * validated updates.
3876 if (type == dns_rdatatype_nsec ||
3877 type == dns_rdatatype_nsec3 ||
3878 type == dns_rdatatype_key)
3879 result = ISC_R_SUCCESS;
3880 else if (type == dns_rdatatype_any)
3881 result = DNS_R_ZONECUT;
3883 result = DNS_R_GLUE;
3885 result = DNS_R_GLUE;
3887 * We might have found data that isn't glue, but was occluded
3888 * by a dynamic update. If the caller cares about this, they
3889 * will have told us to validate glue.
3891 * XXX We should cache the glue validity state!
3893 if (result == DNS_R_GLUE &&
3894 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3895 !valid_glue(&search, foundname, type, node)) {
3896 NODE_UNLOCK(lock, isc_rwlocktype_read);
3897 result = setup_delegation(&search, nodep, foundname,
3898 rdataset, sigrdataset);
3903 * An ordinary successful query!
3905 result = ISC_R_SUCCESS;
3908 if (nodep != NULL) {
3910 new_reference(search.rbtdb, node);
3912 search.need_cleanup = ISC_FALSE;
3916 if (type != dns_rdatatype_any) {
3917 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3918 if (foundsig != NULL)
3919 bind_rdataset(search.rbtdb, node, foundsig, 0,
3924 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3927 NODE_UNLOCK(lock, isc_rwlocktype_read);
3930 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3933 * If we found a zonecut but aren't going to use it, we have to
3936 if (search.need_cleanup) {
3937 node = search.zonecut;
3938 INSIST(node != NULL);
3939 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3941 NODE_LOCK(lock, isc_rwlocktype_read);
3942 decrement_reference(search.rbtdb, node, 0,
3943 isc_rwlocktype_read, isc_rwlocktype_none,
3945 NODE_UNLOCK(lock, isc_rwlocktype_read);
3949 closeversion(db, &version, ISC_FALSE);
3951 dns_rbtnodechain_reset(&search.chain);
3957 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3958 isc_stdtime_t now, dns_dbnode_t **nodep,
3959 dns_name_t *foundname,
3960 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3969 UNUSED(sigrdataset);
3971 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3973 return (ISC_R_NOTIMPLEMENTED);
3977 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3978 rbtdb_search_t *search = arg;
3979 rdatasetheader_t *header, *header_prev, *header_next;
3980 rdatasetheader_t *dname_header, *sigdname_header;
3981 isc_result_t result;
3983 isc_rwlocktype_t locktype;
3987 REQUIRE(search->zonecut == NULL);
3990 * Keep compiler silent.
3994 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3995 locktype = isc_rwlocktype_read;
3996 NODE_LOCK(lock, locktype);
3999 * Look for a DNAME or RRSIG DNAME rdataset.
4001 dname_header = NULL;
4002 sigdname_header = NULL;
4004 for (header = node->data; header != NULL; header = header_next) {
4005 header_next = header->next;
4006 if (header->rdh_ttl <= search->now) {
4008 * This rdataset is stale. If no one else is
4009 * using the node, we can clean it up right
4010 * now, otherwise we mark it as stale, and
4011 * the node as dirty, so it will get cleaned
4014 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4015 (locktype == isc_rwlocktype_write ||
4016 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4018 * We update the node's status only when we
4019 * can get write access; otherwise, we leave
4020 * others to this work. Periodical cleaning
4021 * will eventually take the job as the last
4023 * We won't downgrade the lock, since other
4024 * rdatasets are probably stale, too.
4026 locktype = isc_rwlocktype_write;
4028 if (dns_rbtnode_refcurrent(node) == 0) {
4032 * header->down can be non-NULL if the
4033 * refcount has just decremented to 0
4034 * but decrement_reference() has not
4035 * performed clean_cache_node(), in
4036 * which case we need to purge the
4037 * stale headers first.
4039 mctx = search->rbtdb->common.mctx;
4040 clean_stale_headers(search->rbtdb,
4043 if (header_prev != NULL)
4047 node->data = header->next;
4048 free_rdataset(search->rbtdb, mctx,
4051 header->attributes |=
4052 RDATASET_ATTR_STALE;
4054 header_prev = header;
4057 header_prev = header;
4058 } else if (header->type == dns_rdatatype_dname &&
4060 dname_header = header;
4061 header_prev = header;
4062 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4064 sigdname_header = header;
4065 header_prev = header;
4067 header_prev = header;
4070 if (dname_header != NULL &&
4071 (!DNS_TRUST_PENDING(dname_header->trust) ||
4072 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4074 * We increment the reference count on node to ensure that
4075 * search->zonecut_rdataset will still be valid later.
4077 new_reference(search->rbtdb, node);
4078 INSIST(!ISC_LINK_LINKED(node, deadlink));
4079 search->zonecut = node;
4080 search->zonecut_rdataset = dname_header;
4081 search->zonecut_sigrdataset = sigdname_header;
4082 search->need_cleanup = ISC_TRUE;
4083 result = DNS_R_PARTIALMATCH;
4085 result = DNS_R_CONTINUE;
4087 NODE_UNLOCK(lock, locktype);
4092 static inline isc_result_t
4093 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4094 dns_dbnode_t **nodep, dns_name_t *foundname,
4095 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4098 dns_rbtnode_t *level_node;
4099 rdatasetheader_t *header, *header_prev, *header_next;
4100 rdatasetheader_t *found, *foundsig;
4101 isc_result_t result = ISC_R_NOTFOUND;
4106 isc_rwlocktype_t locktype;
4109 * Caller must be holding the tree lock.
4112 rbtdb = search->rbtdb;
4113 i = search->chain.level_matches;
4116 locktype = isc_rwlocktype_read;
4117 lock = &rbtdb->node_locks[node->locknum].lock;
4118 NODE_LOCK(lock, locktype);
4121 * Look for NS and RRSIG NS rdatasets.
4126 for (header = node->data;
4128 header = header_next) {
4129 header_next = header->next;
4130 if (header->rdh_ttl <= search->now) {
4132 * This rdataset is stale. If no one else is
4133 * using the node, we can clean it up right
4134 * now, otherwise we mark it as stale, and
4135 * the node as dirty, so it will get cleaned
4138 if ((header->rdh_ttl <= search->now -
4140 (locktype == isc_rwlocktype_write ||
4141 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4143 * We update the node's status only
4144 * when we can get write access.
4146 locktype = isc_rwlocktype_write;
4148 if (dns_rbtnode_refcurrent(node)
4152 m = search->rbtdb->common.mctx;
4153 clean_stale_headers(
4156 if (header_prev != NULL)
4162 free_rdataset(rbtdb, m,
4165 header->attributes |=
4166 RDATASET_ATTR_STALE;
4168 header_prev = header;
4171 header_prev = header;
4172 } else if (EXISTS(header)) {
4174 * We've found an extant rdataset. See if
4175 * we're interested in it.
4177 if (header->type == dns_rdatatype_ns) {
4179 if (foundsig != NULL)
4181 } else if (header->type ==
4182 RBTDB_RDATATYPE_SIGNS) {
4187 header_prev = header;
4189 header_prev = header;
4192 if (found != NULL) {
4194 * If we have to set foundname, we do it before
4195 * anything else. If we were to set foundname after
4196 * we had set nodep or bound the rdataset, then we'd
4197 * have to undo that work if dns_name_concatenate()
4198 * failed. By setting foundname first, there's
4199 * nothing to undo if we have trouble.
4201 if (foundname != NULL) {
4202 dns_name_init(&name, NULL);
4203 dns_rbt_namefromnode(node, &name);
4204 result = dns_name_copy(&name, foundname, NULL);
4205 while (result == ISC_R_SUCCESS && i > 0) {
4207 level_node = search->chain.levels[i];
4208 dns_name_init(&name, NULL);
4209 dns_rbt_namefromnode(level_node,
4212 dns_name_concatenate(foundname,
4217 if (result != ISC_R_SUCCESS) {
4222 result = DNS_R_DELEGATION;
4223 if (nodep != NULL) {
4224 new_reference(search->rbtdb, node);
4227 bind_rdataset(search->rbtdb, node, found, search->now,
4229 if (foundsig != NULL)
4230 bind_rdataset(search->rbtdb, node, foundsig,
4231 search->now, sigrdataset);
4232 if (need_headerupdate(found, search->now) ||
4233 (foundsig != NULL &&
4234 need_headerupdate(foundsig, search->now))) {
4235 if (locktype != isc_rwlocktype_write) {
4236 NODE_UNLOCK(lock, locktype);
4237 NODE_LOCK(lock, isc_rwlocktype_write);
4238 locktype = isc_rwlocktype_write;
4241 if (need_headerupdate(found, search->now))
4242 update_header(search->rbtdb, found,
4244 if (foundsig != NULL &&
4245 need_headerupdate(foundsig, search->now)) {
4246 update_header(search->rbtdb, foundsig,
4253 NODE_UNLOCK(lock, locktype);
4255 if (found == NULL && i > 0) {
4257 node = search->chain.levels[i];
4267 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4268 isc_stdtime_t now, dns_name_t *foundname,
4269 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4271 dns_rbtnode_t *node;
4272 rdatasetheader_t *header, *header_next, *header_prev;
4273 rdatasetheader_t *found, *foundsig;
4274 isc_boolean_t empty_node;
4275 isc_result_t result;
4276 dns_fixedname_t fname, forigin;
4277 dns_name_t *name, *origin;
4278 rbtdb_rdatatype_t matchtype, sigmatchtype;
4280 isc_rwlocktype_t locktype;
4282 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4283 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4284 dns_rdatatype_nsec);
4288 dns_fixedname_init(&fname);
4289 name = dns_fixedname_name(&fname);
4290 dns_fixedname_init(&forigin);
4291 origin = dns_fixedname_name(&forigin);
4292 result = dns_rbtnodechain_current(&search->chain, name,
4294 if (result != ISC_R_SUCCESS)
4296 locktype = isc_rwlocktype_read;
4297 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4298 NODE_LOCK(lock, locktype);
4301 empty_node = ISC_TRUE;
4303 for (header = node->data;
4305 header = header_next) {
4306 header_next = header->next;
4307 if (header->rdh_ttl <= now) {
4309 * This rdataset is stale. If no one else is
4310 * using the node, we can clean it up right
4311 * now, otherwise we mark it as stale, and the
4312 * node as dirty, so it will get cleaned up
4315 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4316 (locktype == isc_rwlocktype_write ||
4317 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4319 * We update the node's status only
4320 * when we can get write access.
4322 locktype = isc_rwlocktype_write;
4324 if (dns_rbtnode_refcurrent(node)
4328 m = search->rbtdb->common.mctx;
4329 clean_stale_headers(
4332 if (header_prev != NULL)
4336 node->data = header->next;
4337 free_rdataset(search->rbtdb, m,
4340 header->attributes |=
4341 RDATASET_ATTR_STALE;
4343 header_prev = header;
4346 header_prev = header;
4349 if (NONEXISTENT(header) ||
4350 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4351 header_prev = header;
4354 empty_node = ISC_FALSE;
4355 if (header->type == matchtype)
4357 else if (header->type == sigmatchtype)
4359 header_prev = header;
4361 if (found != NULL) {
4362 result = dns_name_concatenate(name, origin,
4364 if (result != ISC_R_SUCCESS)
4366 bind_rdataset(search->rbtdb, node, found,
4368 if (foundsig != NULL)
4369 bind_rdataset(search->rbtdb, node, foundsig,
4371 new_reference(search->rbtdb, node);
4373 result = DNS_R_COVERINGNSEC;
4374 } else if (!empty_node) {
4375 result = ISC_R_NOTFOUND;
4377 result = dns_rbtnodechain_prev(&search->chain, NULL,
4380 NODE_UNLOCK(lock, locktype);
4381 } while (empty_node && result == ISC_R_SUCCESS);
4386 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4387 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4388 dns_dbnode_t **nodep, dns_name_t *foundname,
4389 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4391 dns_rbtnode_t *node = NULL;
4392 isc_result_t result;
4393 rbtdb_search_t search;
4394 isc_boolean_t cname_ok = ISC_TRUE;
4395 isc_boolean_t empty_node;
4397 isc_rwlocktype_t locktype;
4398 rdatasetheader_t *header, *header_prev, *header_next;
4399 rdatasetheader_t *found, *nsheader;
4400 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4401 rdatasetheader_t *update, *updatesig;
4402 rbtdb_rdatatype_t sigtype, negtype;
4406 search.rbtdb = (dns_rbtdb_t *)db;
4408 REQUIRE(VALID_RBTDB(search.rbtdb));
4409 REQUIRE(version == NULL);
4412 isc_stdtime_get(&now);
4414 search.rbtversion = NULL;
4416 search.options = options;
4417 search.copy_name = ISC_FALSE;
4418 search.need_cleanup = ISC_FALSE;
4419 search.wild = ISC_FALSE;
4420 search.zonecut = NULL;
4421 dns_fixedname_init(&search.zonecut_name);
4422 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4427 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4430 * Search down from the root of the tree. If, while going down, we
4431 * encounter a callback node, cache_zonecut_callback() will search the
4432 * rdatasets at the zone cut for a DNAME rdataset.
4434 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4435 &search.chain, DNS_RBTFIND_EMPTYDATA,
4436 cache_zonecut_callback, &search);
4438 if (result == DNS_R_PARTIALMATCH) {
4439 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4440 result = find_coveringnsec(&search, nodep, now,
4441 foundname, rdataset,
4443 if (result == DNS_R_COVERINGNSEC)
4446 if (search.zonecut != NULL) {
4447 result = setup_delegation(&search, nodep, foundname,
4448 rdataset, sigrdataset);
4452 result = find_deepest_zonecut(&search, node, nodep,
4453 foundname, rdataset,
4457 } else if (result != ISC_R_SUCCESS)
4461 * Certain DNSSEC types are not subject to CNAME matching
4462 * (RFC4035, section 2.5 and RFC3007).
4464 * We don't check for RRSIG, because we don't store RRSIG records
4467 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4468 cname_ok = ISC_FALSE;
4471 * We now go looking for rdata...
4474 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4475 locktype = isc_rwlocktype_read;
4476 NODE_LOCK(lock, locktype);
4480 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4481 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4485 empty_node = ISC_TRUE;
4487 for (header = node->data; header != NULL; header = header_next) {
4488 header_next = header->next;
4489 if (header->rdh_ttl <= now) {
4491 * This rdataset is stale. If no one else is using the
4492 * node, we can clean it up right now, otherwise we
4493 * mark it as stale, and the node as dirty, so it will
4494 * get cleaned up later.
4496 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4497 (locktype == isc_rwlocktype_write ||
4498 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4500 * We update the node's status only when we
4501 * can get write access.
4503 locktype = isc_rwlocktype_write;
4505 if (dns_rbtnode_refcurrent(node) == 0) {
4508 mctx = search.rbtdb->common.mctx;
4509 clean_stale_headers(search.rbtdb, mctx,
4511 if (header_prev != NULL)
4515 node->data = header->next;
4516 free_rdataset(search.rbtdb, mctx,
4519 header->attributes |=
4520 RDATASET_ATTR_STALE;
4522 header_prev = header;
4525 header_prev = header;
4526 } else if (EXISTS(header)) {
4528 * We now know that there is at least one active
4529 * non-stale rdataset at this node.
4531 empty_node = ISC_FALSE;
4534 * If we found a type we were looking for, remember
4537 if (header->type == type ||
4538 (type == dns_rdatatype_any &&
4539 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4540 (cname_ok && header->type ==
4541 dns_rdatatype_cname)) {
4543 * We've found the answer.
4546 if (header->type == dns_rdatatype_cname &&
4550 * If we've already got the
4551 * CNAME RRSIG, use it.
4553 foundsig = cnamesig;
4555 } else if (header->type == sigtype) {
4557 * We've found the RRSIG rdataset for our
4558 * target type. Remember it.
4561 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4562 header->type == negtype) {
4564 * We've found a negative cache entry.
4567 } else if (header->type == dns_rdatatype_ns) {
4569 * Remember a NS rdataset even if we're
4570 * not specifically looking for it, because
4571 * we might need it later.
4574 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4576 * If we need the NS rdataset, we'll also
4577 * need its signature.
4580 } else if (cname_ok &&
4581 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4583 * If we get a CNAME match, we'll also need
4588 header_prev = header;
4590 header_prev = header;
4595 * We have an exact match for the name, but there are no
4596 * extant rdatasets. That means that this node doesn't
4597 * meaningfully exist, and that we really have a partial match.
4599 NODE_UNLOCK(lock, locktype);
4604 * If we didn't find what we were looking for...
4606 if (found == NULL ||
4607 (DNS_TRUST_ADDITIONAL(found->trust) &&
4608 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4609 (found->trust == dns_trust_glue &&
4610 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4611 (DNS_TRUST_PENDING(found->trust) &&
4612 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4614 * If there is an NS rdataset at this node, then this is the
4617 if (nsheader != NULL) {
4618 if (nodep != NULL) {
4619 new_reference(search.rbtdb, node);
4620 INSIST(!ISC_LINK_LINKED(node, deadlink));
4623 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4625 if (need_headerupdate(nsheader, search.now))
4627 if (nssig != NULL) {
4628 bind_rdataset(search.rbtdb, node, nssig,
4629 search.now, sigrdataset);
4630 if (need_headerupdate(nssig, search.now))
4633 result = DNS_R_DELEGATION;
4638 * Go find the deepest zone cut.
4640 NODE_UNLOCK(lock, locktype);
4645 * We found what we were looking for, or we found a CNAME.
4648 if (nodep != NULL) {
4649 new_reference(search.rbtdb, node);
4650 INSIST(!ISC_LINK_LINKED(node, deadlink));
4654 if (NEGATIVE(found)) {
4656 * We found a negative cache entry.
4658 if (NXDOMAIN(found))
4659 result = DNS_R_NCACHENXDOMAIN;
4661 result = DNS_R_NCACHENXRRSET;
4662 } else if (type != found->type &&
4663 type != dns_rdatatype_any &&
4664 found->type == dns_rdatatype_cname) {
4666 * We weren't doing an ANY query and we found a CNAME instead
4667 * of the type we were looking for, so we need to indicate
4668 * that result to the caller.
4670 result = DNS_R_CNAME;
4673 * An ordinary successful query!
4675 result = ISC_R_SUCCESS;
4678 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4679 result == DNS_R_NCACHENXRRSET) {
4680 bind_rdataset(search.rbtdb, node, found, search.now,
4682 if (need_headerupdate(found, search.now))
4684 if (foundsig != NULL) {
4685 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4687 if (need_headerupdate(foundsig, search.now))
4688 updatesig = foundsig;
4693 if ((update != NULL || updatesig != NULL) &&
4694 locktype != isc_rwlocktype_write) {
4695 NODE_UNLOCK(lock, locktype);
4696 NODE_LOCK(lock, isc_rwlocktype_write);
4697 locktype = isc_rwlocktype_write;
4700 if (update != NULL && need_headerupdate(update, search.now))
4701 update_header(search.rbtdb, update, search.now);
4702 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4703 update_header(search.rbtdb, updatesig, search.now);
4705 NODE_UNLOCK(lock, locktype);
4708 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4711 * If we found a zonecut but aren't going to use it, we have to
4714 if (search.need_cleanup) {
4715 node = search.zonecut;
4716 INSIST(node != NULL);
4717 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4719 NODE_LOCK(lock, isc_rwlocktype_read);
4720 decrement_reference(search.rbtdb, node, 0,
4721 isc_rwlocktype_read, isc_rwlocktype_none,
4723 NODE_UNLOCK(lock, isc_rwlocktype_read);
4726 dns_rbtnodechain_reset(&search.chain);
4732 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4733 isc_stdtime_t now, dns_dbnode_t **nodep,
4734 dns_name_t *foundname,
4735 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4737 dns_rbtnode_t *node = NULL;
4739 isc_result_t result;
4740 rbtdb_search_t search;
4741 rdatasetheader_t *header, *header_prev, *header_next;
4742 rdatasetheader_t *found, *foundsig;
4743 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4744 isc_rwlocktype_t locktype;
4746 search.rbtdb = (dns_rbtdb_t *)db;
4748 REQUIRE(VALID_RBTDB(search.rbtdb));
4751 isc_stdtime_get(&now);
4753 search.rbtversion = NULL;
4755 search.options = options;
4756 search.copy_name = ISC_FALSE;
4757 search.need_cleanup = ISC_FALSE;
4758 search.wild = ISC_FALSE;
4759 search.zonecut = NULL;
4760 dns_fixedname_init(&search.zonecut_name);
4761 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4764 if ((options & DNS_DBFIND_NOEXACT) != 0)
4765 rbtoptions |= DNS_RBTFIND_NOEXACT;
4767 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4770 * Search down from the root of the tree.
4772 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4773 &search.chain, rbtoptions, NULL, &search);
4775 if (result == DNS_R_PARTIALMATCH) {
4777 result = find_deepest_zonecut(&search, node, nodep, foundname,
4778 rdataset, sigrdataset);
4780 } else if (result != ISC_R_SUCCESS)
4784 * We now go looking for an NS rdataset at the node.
4787 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4788 locktype = isc_rwlocktype_read;
4789 NODE_LOCK(lock, locktype);
4794 for (header = node->data; header != NULL; header = header_next) {
4795 header_next = header->next;
4796 if (header->rdh_ttl <= now) {
4798 * This rdataset is stale. If no one else is using the
4799 * node, we can clean it up right now, otherwise we
4800 * mark it as stale, and the node as dirty, so it will
4801 * get cleaned up later.
4803 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4804 (locktype == isc_rwlocktype_write ||
4805 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4807 * We update the node's status only when we
4808 * can get write access.
4810 locktype = isc_rwlocktype_write;
4812 if (dns_rbtnode_refcurrent(node) == 0) {
4815 mctx = search.rbtdb->common.mctx;
4816 clean_stale_headers(search.rbtdb, mctx,
4818 if (header_prev != NULL)
4822 node->data = header->next;
4823 free_rdataset(search.rbtdb, mctx,
4826 header->attributes |=
4827 RDATASET_ATTR_STALE;
4829 header_prev = header;
4832 header_prev = header;
4833 } else if (EXISTS(header)) {
4835 * If we found a type we were looking for, remember
4838 if (header->type == dns_rdatatype_ns) {
4840 * Remember a NS rdataset even if we're
4841 * not specifically looking for it, because
4842 * we might need it later.
4845 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4847 * If we need the NS rdataset, we'll also
4848 * need its signature.
4852 header_prev = header;
4854 header_prev = header;
4857 if (found == NULL) {
4859 * No NS records here.
4861 NODE_UNLOCK(lock, locktype);
4865 if (nodep != NULL) {
4866 new_reference(search.rbtdb, node);
4867 INSIST(!ISC_LINK_LINKED(node, deadlink));
4871 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4872 if (foundsig != NULL)
4873 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4876 if (need_headerupdate(found, search.now) ||
4877 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4878 if (locktype != isc_rwlocktype_write) {
4879 NODE_UNLOCK(lock, locktype);
4880 NODE_LOCK(lock, isc_rwlocktype_write);
4881 locktype = isc_rwlocktype_write;
4884 if (need_headerupdate(found, search.now))
4885 update_header(search.rbtdb, found, search.now);
4886 if (foundsig != NULL &&
4887 need_headerupdate(foundsig, search.now)) {
4888 update_header(search.rbtdb, foundsig, search.now);
4892 NODE_UNLOCK(lock, locktype);
4895 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4897 INSIST(!search.need_cleanup);
4899 dns_rbtnodechain_reset(&search.chain);
4901 if (result == DNS_R_DELEGATION)
4902 result = ISC_R_SUCCESS;
4908 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4909 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4910 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4913 REQUIRE(VALID_RBTDB(rbtdb));
4914 REQUIRE(targetp != NULL && *targetp == NULL);
4916 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4917 dns_rbtnode_refincrement(node, &refs);
4919 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4925 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4926 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4927 dns_rbtnode_t *node;
4928 isc_boolean_t want_free = ISC_FALSE;
4929 isc_boolean_t inactive = ISC_FALSE;
4930 rbtdb_nodelock_t *nodelock;
4932 REQUIRE(VALID_RBTDB(rbtdb));
4933 REQUIRE(targetp != NULL && *targetp != NULL);
4935 node = (dns_rbtnode_t *)(*targetp);
4936 nodelock = &rbtdb->node_locks[node->locknum];
4938 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4940 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4941 isc_rwlocktype_none, ISC_FALSE)) {
4942 if (isc_refcount_current(&nodelock->references) == 0 &&
4943 nodelock->exiting) {
4944 inactive = ISC_TRUE;
4948 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4953 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4955 if (rbtdb->active == 0)
4956 want_free = ISC_TRUE;
4957 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4959 char buf[DNS_NAME_FORMATSIZE];
4960 if (dns_name_dynamic(&rbtdb->common.origin))
4961 dns_name_format(&rbtdb->common.origin, buf,
4964 strcpy(buf, "<UNKNOWN>");
4965 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4966 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4967 "calling free_rbtdb(%s)", buf);
4968 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4974 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4975 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4976 dns_rbtnode_t *rbtnode = node;
4977 rdatasetheader_t *header;
4978 isc_boolean_t force_expire = ISC_FALSE;
4980 * These are the category and module used by the cache cleaner.
4982 isc_boolean_t log = ISC_FALSE;
4983 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4984 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4985 int level = ISC_LOG_DEBUG(2);
4986 char printname[DNS_NAME_FORMATSIZE];
4988 REQUIRE(VALID_RBTDB(rbtdb));
4991 * Caller must hold a tree lock.
4995 isc_stdtime_get(&now);
4997 if (isc_mem_isovermem(rbtdb->common.mctx)) {
5000 isc_random_get(&val);
5002 * XXXDCL Could stand to have a better policy, like LRU.
5004 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5007 * Note that 'log' can be true IFF overmem is also true.
5008 * overmem can currently only be true for cache
5009 * databases -- hence all of the "overmem cache" log strings.
5011 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5013 isc_log_write(dns_lctx, category, module, level,
5014 "overmem cache: %s %s",
5015 force_expire ? "FORCE" : "check",
5016 dns_rbt_formatnodename(rbtnode,
5018 sizeof(printname)));
5022 * We may not need write access, but this code path is not performance
5023 * sensitive, so it should be okay to always lock as a writer.
5025 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5026 isc_rwlocktype_write);
5028 for (header = rbtnode->data; header != NULL; header = header->next)
5029 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5031 * We don't check if refcurrent(rbtnode) == 0 and try
5032 * to free like we do in cache_find(), because
5033 * refcurrent(rbtnode) must be non-zero. This is so
5034 * because 'node' is an argument to the function.
5036 header->attributes |= RDATASET_ATTR_STALE;
5039 isc_log_write(dns_lctx, category, module,
5040 level, "overmem cache: stale %s",
5042 } else if (force_expire) {
5043 if (! RETAIN(header)) {
5044 set_ttl(rbtdb, header, 0);
5045 header->attributes |= RDATASET_ATTR_STALE;
5048 isc_log_write(dns_lctx, category, module,
5049 level, "overmem cache: "
5050 "reprieve by RETAIN() %s",
5053 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5054 isc_log_write(dns_lctx, category, module, level,
5055 "overmem cache: saved %s", printname);
5057 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5058 isc_rwlocktype_write);
5060 return (ISC_R_SUCCESS);
5064 overmem(dns_db_t *db, isc_boolean_t overmem) {
5065 /* This is an empty callback. See adb.c:water() */
5074 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5075 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5076 dns_rbtnode_t *rbtnode = node;
5077 isc_boolean_t first;
5079 REQUIRE(VALID_RBTDB(rbtdb));
5081 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5082 isc_rwlocktype_read);
5084 fprintf(out, "node %p, %u references, locknum = %u\n",
5085 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5087 if (rbtnode->data != NULL) {
5088 rdatasetheader_t *current, *top_next;
5090 for (current = rbtnode->data; current != NULL;
5091 current = top_next) {
5092 top_next = current->next;
5094 fprintf(out, "\ttype %u", current->type);
5100 "\tserial = %lu, ttl = %u, "
5101 "trust = %u, attributes = %u, "
5103 (unsigned long)current->serial,
5106 current->attributes,
5108 current = current->down;
5109 } while (current != NULL);
5112 fprintf(out, "(empty)\n");
5114 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5115 isc_rwlocktype_read);
5119 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5121 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5122 rbtdb_dbiterator_t *rbtdbiter;
5124 REQUIRE(VALID_RBTDB(rbtdb));
5126 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5127 if (rbtdbiter == NULL)
5128 return (ISC_R_NOMEMORY);
5130 rbtdbiter->common.methods = &dbiterator_methods;
5131 rbtdbiter->common.db = NULL;
5132 dns_db_attach(db, &rbtdbiter->common.db);
5133 rbtdbiter->common.relative_names =
5134 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5135 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5136 rbtdbiter->common.cleaning = ISC_FALSE;
5137 rbtdbiter->paused = ISC_TRUE;
5138 rbtdbiter->tree_locked = isc_rwlocktype_none;
5139 rbtdbiter->result = ISC_R_SUCCESS;
5140 dns_fixedname_init(&rbtdbiter->name);
5141 dns_fixedname_init(&rbtdbiter->origin);
5142 rbtdbiter->node = NULL;
5143 rbtdbiter->delete = 0;
5144 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5145 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5146 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5147 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5148 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5149 if (rbtdbiter->nsec3only)
5150 rbtdbiter->current = &rbtdbiter->nsec3chain;
5152 rbtdbiter->current = &rbtdbiter->chain;
5154 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5156 return (ISC_R_SUCCESS);
5160 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5161 dns_rdatatype_t type, dns_rdatatype_t covers,
5162 isc_stdtime_t now, dns_rdataset_t *rdataset,
5163 dns_rdataset_t *sigrdataset)
5165 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5166 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5167 rdatasetheader_t *header, *header_next, *found, *foundsig;
5168 rbtdb_serial_t serial;
5169 rbtdb_version_t *rbtversion = version;
5170 isc_boolean_t close_version = ISC_FALSE;
5171 rbtdb_rdatatype_t matchtype, sigmatchtype;
5173 REQUIRE(VALID_RBTDB(rbtdb));
5174 REQUIRE(type != dns_rdatatype_any);
5176 if (rbtversion == NULL) {
5177 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5178 close_version = ISC_TRUE;
5180 serial = rbtversion->serial;
5183 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5184 isc_rwlocktype_read);
5188 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5190 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5194 for (header = rbtnode->data; header != NULL; header = header_next) {
5195 header_next = header->next;
5197 if (header->serial <= serial &&
5200 * Is this a "this rdataset doesn't
5203 if (NONEXISTENT(header))
5207 header = header->down;
5208 } while (header != NULL);
5209 if (header != NULL) {
5211 * We have an active, extant rdataset. If it's a
5212 * type we're looking for, remember it.
5214 if (header->type == matchtype) {
5216 if (foundsig != NULL)
5218 } else if (header->type == sigmatchtype) {
5225 if (found != NULL) {
5226 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5227 if (foundsig != NULL)
5228 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5232 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5233 isc_rwlocktype_read);
5236 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5240 return (ISC_R_NOTFOUND);
5242 return (ISC_R_SUCCESS);
5246 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5247 dns_rdatatype_t type, dns_rdatatype_t covers,
5248 isc_stdtime_t now, dns_rdataset_t *rdataset,
5249 dns_rdataset_t *sigrdataset)
5251 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5252 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5253 rdatasetheader_t *header, *header_next, *found, *foundsig;
5254 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5255 isc_result_t result;
5257 isc_rwlocktype_t locktype;
5259 REQUIRE(VALID_RBTDB(rbtdb));
5260 REQUIRE(type != dns_rdatatype_any);
5264 result = ISC_R_SUCCESS;
5267 isc_stdtime_get(&now);
5269 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5270 locktype = isc_rwlocktype_read;
5271 NODE_LOCK(lock, locktype);
5275 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5276 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5278 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5282 for (header = rbtnode->data; header != NULL; header = header_next) {
5283 header_next = header->next;
5284 if (header->rdh_ttl <= now) {
5285 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5286 (locktype == isc_rwlocktype_write ||
5287 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5289 * We update the node's status only when we
5290 * can get write access.
5292 locktype = isc_rwlocktype_write;
5295 * We don't check if refcurrent(rbtnode) == 0
5296 * and try to free like we do in cache_find(),
5297 * because refcurrent(rbtnode) must be
5298 * non-zero. This is so because 'node' is an
5299 * argument to the function.
5301 header->attributes |= RDATASET_ATTR_STALE;
5304 } else if (EXISTS(header)) {
5305 if (header->type == matchtype)
5307 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5308 header->type == negtype)
5310 else if (header->type == sigmatchtype)
5314 if (found != NULL) {
5315 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5316 if (foundsig != NULL)
5317 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5321 NODE_UNLOCK(lock, locktype);
5324 return (ISC_R_NOTFOUND);
5326 if (NEGATIVE(found)) {
5328 * We found a negative cache entry.
5330 if (NXDOMAIN(found))
5331 result = DNS_R_NCACHENXDOMAIN;
5333 result = DNS_R_NCACHENXRRSET;
5340 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5341 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5343 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5344 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5345 rbtdb_version_t *rbtversion = version;
5346 rbtdb_rdatasetiter_t *iterator;
5349 REQUIRE(VALID_RBTDB(rbtdb));
5351 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5352 if (iterator == NULL)
5353 return (ISC_R_NOMEMORY);
5355 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5357 if (rbtversion == NULL)
5359 (dns_dbversion_t **) (void *)(&rbtversion));
5363 isc_refcount_increment(&rbtversion->references,
5369 isc_stdtime_get(&now);
5373 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5374 iterator->common.methods = &rdatasetiter_methods;
5375 iterator->common.db = db;
5376 iterator->common.node = node;
5377 iterator->common.version = (dns_dbversion_t *)rbtversion;
5378 iterator->common.now = now;
5380 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5382 dns_rbtnode_refincrement(rbtnode, &refs);
5385 iterator->current = NULL;
5387 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5389 *iteratorp = (dns_rdatasetiter_t *)iterator;
5391 return (ISC_R_SUCCESS);
5394 static isc_boolean_t
5395 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5396 rdatasetheader_t *header, *header_next;
5397 isc_boolean_t cname, other_data;
5398 dns_rdatatype_t rdtype;
5401 * The caller must hold the node lock.
5405 * Look for CNAME and "other data" rdatasets active in our version.
5408 other_data = ISC_FALSE;
5409 for (header = node->data; header != NULL; header = header_next) {
5410 header_next = header->next;
5411 if (header->type == dns_rdatatype_cname) {
5413 * Look for an active extant CNAME.
5416 if (header->serial <= serial &&
5419 * Is this a "this rdataset doesn't
5422 if (NONEXISTENT(header))
5426 header = header->down;
5427 } while (header != NULL);
5432 * Look for active extant "other data".
5434 * "Other data" is any rdataset whose type is not
5435 * KEY, NSEC, SIG or RRSIG.
5437 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5438 if (rdtype != dns_rdatatype_key &&
5439 rdtype != dns_rdatatype_sig &&
5440 rdtype != dns_rdatatype_nsec &&
5441 rdtype != dns_rdatatype_rrsig) {
5443 * Is it active and extant?
5446 if (header->serial <= serial &&
5449 * Is this a "this rdataset
5450 * doesn't exist" record?
5452 if (NONEXISTENT(header))
5456 header = header->down;
5457 } while (header != NULL);
5459 other_data = ISC_TRUE;
5464 if (cname && other_data)
5471 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5472 isc_result_t result;
5474 INSIST(!IS_CACHE(rbtdb));
5475 INSIST(newheader->heap_index == 0);
5476 INSIST(!ISC_LINK_LINKED(newheader, link));
5478 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5483 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5484 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5485 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5487 rbtdb_changed_t *changed = NULL;
5488 rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5489 unsigned char *merged;
5490 isc_result_t result;
5491 isc_boolean_t header_nx;
5492 isc_boolean_t newheader_nx;
5493 isc_boolean_t merge;
5494 dns_rdatatype_t rdtype, covers;
5495 rbtdb_rdatatype_t negtype, sigtype;
5500 * Add an rdatasetheader_t to a node.
5504 * Caller must be holding the node lock.
5507 if ((options & DNS_DBADD_MERGE) != 0) {
5508 REQUIRE(rbtversion != NULL);
5513 if ((options & DNS_DBADD_FORCE) != 0)
5514 trust = dns_trust_ultimate;
5516 trust = newheader->trust;
5518 if (rbtversion != NULL && !loading) {
5520 * We always add a changed record, even if no changes end up
5521 * being made to this node, because it's harmless and
5522 * simplifies the code.
5524 changed = add_changed(rbtdb, rbtversion, rbtnode);
5525 if (changed == NULL) {
5526 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5527 return (ISC_R_NOMEMORY);
5531 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5532 topheader_prev = NULL;
5535 if (rbtversion == NULL && !newheader_nx) {
5536 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5537 if (NEGATIVE(newheader)) {
5539 * We're adding a negative cache entry.
5541 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5542 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5544 for (topheader = rbtnode->data;
5546 topheader = topheader->next) {
5548 * If we're adding an negative cache entry
5549 * which covers all types (NXDOMAIN,
5550 * NODATA(QTYPE=ANY)).
5552 * We make all other data stale so that the
5553 * only rdataset that can be found at this
5554 * node is the negative cache entry.
5556 * Otherwise look for any RRSIGs of the
5557 * given type so they can be marked stale
5560 if (covers == dns_rdatatype_any) {
5561 set_ttl(rbtdb, topheader, 0);
5562 topheader->attributes |=
5563 RDATASET_ATTR_STALE;
5565 } else if (topheader->type == sigtype)
5566 sigheader = topheader;
5568 if (covers == dns_rdatatype_any)
5570 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5573 * We're adding something that isn't a
5574 * negative cache entry. Look for an extant
5575 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5578 for (topheader = rbtnode->data;
5580 topheader = topheader->next) {
5581 if (topheader->type ==
5582 RBTDB_RDATATYPE_NCACHEANY)
5585 if (topheader != NULL && EXISTS(topheader) &&
5586 topheader->rdh_ttl > now) {
5590 if (trust < topheader->trust) {
5592 * The NXDOMAIN/NODATA(QTYPE=ANY)
5595 free_rdataset(rbtdb,
5598 if (addedrdataset != NULL)
5599 bind_rdataset(rbtdb, rbtnode,
5602 return (DNS_R_UNCHANGED);
5605 * The new rdataset is better. Expire the
5606 * NXDOMAIN/NODATA(QTYPE=ANY).
5608 set_ttl(rbtdb, topheader, 0);
5609 topheader->attributes |= RDATASET_ATTR_STALE;
5614 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5618 for (topheader = rbtnode->data;
5620 topheader = topheader->next) {
5621 if (topheader->type == newheader->type ||
5622 topheader->type == negtype)
5624 topheader_prev = topheader;
5629 * If header isn't NULL, we've found the right type. There may be
5630 * IGNORE rdatasets between the top of the chain and the first real
5631 * data. We skip over them.
5634 while (header != NULL && IGNORE(header))
5635 header = header->down;
5636 if (header != NULL) {
5637 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5640 * Deleting an already non-existent rdataset has no effect.
5642 if (header_nx && newheader_nx) {
5643 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5644 return (DNS_R_UNCHANGED);
5648 * Trying to add an rdataset with lower trust to a cache DB
5649 * has no effect, provided that the cache data isn't stale.
5651 if (rbtversion == NULL && trust < header->trust &&
5652 (header->rdh_ttl > now || header_nx)) {
5653 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5654 if (addedrdataset != NULL)
5655 bind_rdataset(rbtdb, rbtnode, header, now,
5657 return (DNS_R_UNCHANGED);
5661 * Don't merge if a nonexistent rdataset is involved.
5663 if (merge && (header_nx || newheader_nx))
5667 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5668 * that is the union of 'newheader' and 'header'.
5671 unsigned int flags = 0;
5672 INSIST(rbtversion->serial >= header->serial);
5674 result = ISC_R_SUCCESS;
5676 if ((options & DNS_DBADD_EXACT) != 0)
5677 flags |= DNS_RDATASLAB_EXACT;
5678 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5679 newheader->rdh_ttl != header->rdh_ttl)
5680 result = DNS_R_NOTEXACT;
5681 else if (newheader->rdh_ttl != header->rdh_ttl)
5682 flags |= DNS_RDATASLAB_FORCE;
5683 if (result == ISC_R_SUCCESS)
5684 result = dns_rdataslab_merge(
5685 (unsigned char *)header,
5686 (unsigned char *)newheader,
5687 (unsigned int)(sizeof(*newheader)),
5689 rbtdb->common.rdclass,
5690 (dns_rdatatype_t)header->type,
5692 if (result == ISC_R_SUCCESS) {
5694 * If 'header' has the same serial number as
5695 * we do, we could clean it up now if we knew
5696 * that our caller had no references to it.
5697 * We don't know this, however, so we leave it
5698 * alone. It will get cleaned up when
5699 * clean_zone_node() runs.
5701 free_rdataset(rbtdb, rbtdb->common.mctx,
5703 newheader = (rdatasetheader_t *)merged;
5704 if (loading && RESIGN(newheader) &&
5706 header->resign < newheader->resign)
5707 newheader->resign = header->resign;
5709 free_rdataset(rbtdb, rbtdb->common.mctx,
5715 * Don't replace existing NS, A and AAAA RRsets
5716 * in the cache if they are already exist. This
5717 * prevents named being locked to old servers.
5718 * Don't lower trust of existing record if the
5721 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5722 header->type == dns_rdatatype_ns &&
5723 !header_nx && !newheader_nx &&
5724 header->trust >= newheader->trust &&
5725 dns_rdataslab_equalx((unsigned char *)header,
5726 (unsigned char *)newheader,
5727 (unsigned int)(sizeof(*newheader)),
5728 rbtdb->common.rdclass,
5729 (dns_rdatatype_t)header->type)) {
5731 * Honour the new ttl if it is less than the
5734 if (header->rdh_ttl > newheader->rdh_ttl)
5735 set_ttl(rbtdb, header, newheader->rdh_ttl);
5736 if (header->noqname == NULL &&
5737 newheader->noqname != NULL) {
5738 header->noqname = newheader->noqname;
5739 newheader->noqname = NULL;
5741 if (header->closest == NULL &&
5742 newheader->closest != NULL) {
5743 header->closest = newheader->closest;
5744 newheader->closest = NULL;
5746 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5747 if (addedrdataset != NULL)
5748 bind_rdataset(rbtdb, rbtnode, header, now,
5750 return (ISC_R_SUCCESS);
5752 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5753 (header->type == dns_rdatatype_a ||
5754 header->type == dns_rdatatype_aaaa) &&
5755 !header_nx && !newheader_nx &&
5756 header->trust >= newheader->trust &&
5757 dns_rdataslab_equal((unsigned char *)header,
5758 (unsigned char *)newheader,
5759 (unsigned int)(sizeof(*newheader)))) {
5761 * Honour the new ttl if it is less than the
5764 if (header->rdh_ttl > newheader->rdh_ttl)
5765 set_ttl(rbtdb, header, newheader->rdh_ttl);
5766 if (header->noqname == NULL &&
5767 newheader->noqname != NULL) {
5768 header->noqname = newheader->noqname;
5769 newheader->noqname = NULL;
5771 if (header->closest == NULL &&
5772 newheader->closest != NULL) {
5773 header->closest = newheader->closest;
5774 newheader->closest = NULL;
5776 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5777 if (addedrdataset != NULL)
5778 bind_rdataset(rbtdb, rbtnode, header, now,
5780 return (ISC_R_SUCCESS);
5782 INSIST(rbtversion == NULL ||
5783 rbtversion->serial >= topheader->serial);
5784 if (topheader_prev != NULL)
5785 topheader_prev->next = newheader;
5787 rbtnode->data = newheader;
5788 newheader->next = topheader->next;
5791 * There are no other references to 'header' when
5792 * loading, so we MAY clean up 'header' now.
5793 * Since we don't generate changed records when
5794 * loading, we MUST clean up 'header' now.
5796 newheader->down = NULL;
5797 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5799 newheader->down = topheader;
5800 topheader->next = newheader;
5802 if (changed != NULL)
5803 changed->dirty = ISC_TRUE;
5804 if (rbtversion == NULL) {
5805 set_ttl(rbtdb, header, 0);
5806 header->attributes |= RDATASET_ATTR_STALE;
5807 if (sigheader != NULL) {
5808 set_ttl(rbtdb, sigheader, 0);
5809 sigheader->attributes |=
5810 RDATASET_ATTR_STALE;
5813 idx = newheader->node->locknum;
5814 if (IS_CACHE(rbtdb)) {
5815 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5818 * XXXMLG We don't check the return value
5819 * here. If it fails, we will not do TTL
5820 * based expiry on this node. However, we
5821 * will do it on the LRU side, so memory
5822 * will not leak... for long.
5824 isc_heap_insert(rbtdb->heaps[idx], newheader);
5825 } else if (RESIGN(newheader))
5826 resign_insert(rbtdb, idx, newheader);
5830 * No non-IGNORED rdatasets of the given type exist at
5835 * If we're trying to delete the type, don't bother.
5838 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5839 return (DNS_R_UNCHANGED);
5842 if (topheader != NULL) {
5844 * We have an list of rdatasets of the given type,
5845 * but they're all marked IGNORE. We simply insert
5846 * the new rdataset at the head of the list.
5848 * Ignored rdatasets cannot occur during loading, so
5852 INSIST(rbtversion == NULL ||
5853 rbtversion->serial >= topheader->serial);
5854 if (topheader_prev != NULL)
5855 topheader_prev->next = newheader;
5857 rbtnode->data = newheader;
5858 newheader->next = topheader->next;
5859 newheader->down = topheader;
5860 topheader->next = newheader;
5862 if (changed != NULL)
5863 changed->dirty = ISC_TRUE;
5866 * No rdatasets of the given type exist at the node.
5868 newheader->next = rbtnode->data;
5869 newheader->down = NULL;
5870 rbtnode->data = newheader;
5872 idx = newheader->node->locknum;
5873 if (IS_CACHE(rbtdb)) {
5874 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5876 isc_heap_insert(rbtdb->heaps[idx], newheader);
5877 } else if (RESIGN(newheader)) {
5878 resign_insert(rbtdb, idx, newheader);
5883 * Check if the node now contains CNAME and other data.
5885 if (rbtversion != NULL &&
5886 cname_and_other_data(rbtnode, rbtversion->serial))
5887 return (DNS_R_CNAMEANDOTHER);
5889 if (addedrdataset != NULL)
5890 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5892 return (ISC_R_SUCCESS);
5895 static inline isc_boolean_t
5896 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5897 rbtdb_rdatatype_t type)
5899 if (IS_CACHE(rbtdb)) {
5900 if (type == dns_rdatatype_dname)
5904 } else if (type == dns_rdatatype_dname ||
5905 (type == dns_rdatatype_ns &&
5906 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5911 static inline isc_result_t
5912 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5913 dns_rdataset_t *rdataset)
5915 struct noqname *noqname;
5916 isc_mem_t *mctx = rbtdb->common.mctx;
5918 dns_rdataset_t neg, negsig;
5919 isc_result_t result;
5922 dns_name_init(&name, NULL);
5923 dns_rdataset_init(&neg);
5924 dns_rdataset_init(&negsig);
5926 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5927 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5929 noqname = isc_mem_get(mctx, sizeof(*noqname));
5930 if (noqname == NULL) {
5931 result = ISC_R_NOMEMORY;
5934 dns_name_init(&noqname->name, NULL);
5935 noqname->neg = NULL;
5936 noqname->negsig = NULL;
5937 noqname->type = neg.type;
5938 result = dns_name_dup(&name, mctx, &noqname->name);
5939 if (result != ISC_R_SUCCESS)
5941 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5942 if (result != ISC_R_SUCCESS)
5944 noqname->neg = r.base;
5945 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5946 if (result != ISC_R_SUCCESS)
5948 noqname->negsig = r.base;
5949 dns_rdataset_disassociate(&neg);
5950 dns_rdataset_disassociate(&negsig);
5951 newheader->noqname = noqname;
5952 return (ISC_R_SUCCESS);
5955 dns_rdataset_disassociate(&neg);
5956 dns_rdataset_disassociate(&negsig);
5957 free_noqname(mctx, &noqname);
5961 static inline isc_result_t
5962 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5963 dns_rdataset_t *rdataset)
5965 struct noqname *closest;
5966 isc_mem_t *mctx = rbtdb->common.mctx;
5968 dns_rdataset_t neg, negsig;
5969 isc_result_t result;
5972 dns_name_init(&name, NULL);
5973 dns_rdataset_init(&neg);
5974 dns_rdataset_init(&negsig);
5976 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5977 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5979 closest = isc_mem_get(mctx, sizeof(*closest));
5980 if (closest == NULL) {
5981 result = ISC_R_NOMEMORY;
5984 dns_name_init(&closest->name, NULL);
5985 closest->neg = NULL;
5986 closest->negsig = NULL;
5987 closest->type = neg.type;
5988 result = dns_name_dup(&name, mctx, &closest->name);
5989 if (result != ISC_R_SUCCESS)
5991 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5992 if (result != ISC_R_SUCCESS)
5994 closest->neg = r.base;
5995 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5996 if (result != ISC_R_SUCCESS)
5998 closest->negsig = r.base;
5999 dns_rdataset_disassociate(&neg);
6000 dns_rdataset_disassociate(&negsig);
6001 newheader->closest = closest;
6002 return (ISC_R_SUCCESS);
6005 dns_rdataset_disassociate(&neg);
6006 dns_rdataset_disassociate(&negsig);
6007 free_noqname(mctx, &closest);
6011 static dns_dbmethods_t zone_methods;
6014 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6015 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6016 dns_rdataset_t *addedrdataset)
6018 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6019 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6020 rbtdb_version_t *rbtversion = version;
6021 isc_region_t region;
6022 rdatasetheader_t *newheader;
6023 rdatasetheader_t *header;
6024 isc_result_t result;
6025 isc_boolean_t delegating;
6026 isc_boolean_t tree_locked = ISC_FALSE;
6027 isc_boolean_t cache_is_overmem = ISC_FALSE;
6029 REQUIRE(VALID_RBTDB(rbtdb));
6031 if (rbtdb->common.methods == &zone_methods)
6032 REQUIRE(((rbtnode->nsec3 &&
6033 (rdataset->type == dns_rdatatype_nsec3 ||
6034 rdataset->covers == dns_rdatatype_nsec3)) ||
6036 rdataset->type != dns_rdatatype_nsec3 &&
6037 rdataset->covers != dns_rdatatype_nsec3)));
6039 if (rbtversion == NULL) {
6041 isc_stdtime_get(&now);
6045 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6047 sizeof(rdatasetheader_t));
6048 if (result != ISC_R_SUCCESS)
6051 newheader = (rdatasetheader_t *)region.base;
6052 init_rdataset(rbtdb, newheader);
6053 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6054 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6056 newheader->attributes = 0;
6057 newheader->noqname = NULL;
6058 newheader->closest = NULL;
6059 newheader->count = init_count++;
6060 newheader->trust = rdataset->trust;
6061 newheader->additional_auth = NULL;
6062 newheader->additional_glue = NULL;
6063 newheader->last_used = now;
6064 newheader->node = rbtnode;
6065 if (rbtversion != NULL) {
6066 newheader->serial = rbtversion->serial;
6069 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6070 newheader->attributes |= RDATASET_ATTR_RESIGN;
6071 newheader->resign = rdataset->resign;
6073 newheader->resign = 0;
6075 newheader->serial = 1;
6076 newheader->resign = 0;
6077 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6078 newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6079 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6080 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6081 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6082 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6083 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6084 result = addnoqname(rbtdb, newheader, rdataset);
6085 if (result != ISC_R_SUCCESS) {
6086 free_rdataset(rbtdb, rbtdb->common.mctx,
6091 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6092 result = addclosest(rbtdb, newheader, rdataset);
6093 if (result != ISC_R_SUCCESS) {
6094 free_rdataset(rbtdb, rbtdb->common.mctx,
6102 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6103 * just DNAME for the cache), then we need to set the callback bit
6106 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6107 delegating = ISC_TRUE;
6109 delegating = ISC_FALSE;
6112 * If we're adding a delegation type or the DB is a cache in an overmem
6113 * state, hold an exclusive lock on the tree. In the latter case
6114 * the lock does not necessarily have to be acquired but it will help
6115 * purge stale entries more effectively.
6117 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6118 cache_is_overmem = ISC_TRUE;
6119 if (delegating || cache_is_overmem) {
6120 tree_locked = ISC_TRUE;
6121 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6124 if (cache_is_overmem)
6125 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6127 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6128 isc_rwlocktype_write);
6130 if (rbtdb->rrsetstats != NULL) {
6131 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6132 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6135 if (IS_CACHE(rbtdb)) {
6137 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6139 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6140 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6141 expire_header(rbtdb, header, tree_locked);
6144 * If we've been holding a write lock on the tree just for
6145 * cleaning, we can release it now. However, we still need the
6148 if (tree_locked && !delegating) {
6149 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6150 tree_locked = ISC_FALSE;
6154 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6155 addedrdataset, now);
6156 if (result == ISC_R_SUCCESS && delegating)
6157 rbtnode->find_callback = 1;
6159 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6160 isc_rwlocktype_write);
6163 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6166 * Update the zone's secure status. If version is non-NULL
6167 * this is deferred until closeversion() is called.
6169 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6170 iszonesecure(db, version, rbtdb->origin_node);
6176 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6177 dns_rdataset_t *rdataset, unsigned int options,
6178 dns_rdataset_t *newrdataset)
6180 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6181 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6182 rbtdb_version_t *rbtversion = version;
6183 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6184 unsigned char *subresult;
6185 isc_region_t region;
6186 isc_result_t result;
6187 rbtdb_changed_t *changed;
6189 REQUIRE(VALID_RBTDB(rbtdb));
6191 if (rbtdb->common.methods == &zone_methods)
6192 REQUIRE(((rbtnode->nsec3 &&
6193 (rdataset->type == dns_rdatatype_nsec3 ||
6194 rdataset->covers == dns_rdatatype_nsec3)) ||
6196 rdataset->type != dns_rdatatype_nsec3 &&
6197 rdataset->covers != dns_rdatatype_nsec3)));
6199 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6201 sizeof(rdatasetheader_t));
6202 if (result != ISC_R_SUCCESS)
6204 newheader = (rdatasetheader_t *)region.base;
6205 init_rdataset(rbtdb, newheader);
6206 set_ttl(rbtdb, newheader, rdataset->ttl);
6207 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6209 newheader->attributes = 0;
6210 newheader->serial = rbtversion->serial;
6211 newheader->trust = 0;
6212 newheader->noqname = NULL;
6213 newheader->closest = NULL;
6214 newheader->count = init_count++;
6215 newheader->additional_auth = NULL;
6216 newheader->additional_glue = NULL;
6217 newheader->last_used = 0;
6218 newheader->node = rbtnode;
6219 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6220 newheader->attributes |= RDATASET_ATTR_RESIGN;
6221 newheader->resign = rdataset->resign;
6223 newheader->resign = 0;
6225 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6226 isc_rwlocktype_write);
6228 changed = add_changed(rbtdb, rbtversion, rbtnode);
6229 if (changed == NULL) {
6230 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6231 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6232 isc_rwlocktype_write);
6233 return (ISC_R_NOMEMORY);
6236 topheader_prev = NULL;
6237 for (topheader = rbtnode->data;
6239 topheader = topheader->next) {
6240 if (topheader->type == newheader->type)
6242 topheader_prev = topheader;
6245 * If header isn't NULL, we've found the right type. There may be
6246 * IGNORE rdatasets between the top of the chain and the first real
6247 * data. We skip over them.
6250 while (header != NULL && IGNORE(header))
6251 header = header->down;
6252 if (header != NULL && EXISTS(header)) {
6253 unsigned int flags = 0;
6255 result = ISC_R_SUCCESS;
6256 if ((options & DNS_DBSUB_EXACT) != 0) {
6257 flags |= DNS_RDATASLAB_EXACT;
6258 if (newheader->rdh_ttl != header->rdh_ttl)
6259 result = DNS_R_NOTEXACT;
6261 if (result == ISC_R_SUCCESS)
6262 result = dns_rdataslab_subtract(
6263 (unsigned char *)header,
6264 (unsigned char *)newheader,
6265 (unsigned int)(sizeof(*newheader)),
6267 rbtdb->common.rdclass,
6268 (dns_rdatatype_t)header->type,
6270 if (result == ISC_R_SUCCESS) {
6271 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6272 newheader = (rdatasetheader_t *)subresult;
6273 init_rdataset(rbtdb, newheader);
6275 * We have to set the serial since the rdataslab
6276 * subtraction routine copies the reserved portion of
6277 * header, not newheader.
6279 newheader->serial = rbtversion->serial;
6281 * XXXJT: dns_rdataslab_subtract() copied the pointers
6282 * to additional info. We need to clear these fields
6283 * to avoid having duplicated references.
6285 newheader->additional_auth = NULL;
6286 newheader->additional_glue = NULL;
6287 } else if (result == DNS_R_NXRRSET) {
6289 * This subtraction would remove all of the rdata;
6290 * add a nonexistent header instead.
6292 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6293 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6294 if (newheader == NULL) {
6295 result = ISC_R_NOMEMORY;
6298 set_ttl(rbtdb, newheader, 0);
6299 newheader->type = topheader->type;
6300 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6301 newheader->trust = 0;
6302 newheader->serial = rbtversion->serial;
6303 newheader->noqname = NULL;
6304 newheader->closest = NULL;
6305 newheader->count = 0;
6306 newheader->additional_auth = NULL;
6307 newheader->additional_glue = NULL;
6308 newheader->node = rbtnode;
6309 newheader->resign = 0;
6310 newheader->last_used = 0;
6312 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6317 * If we're here, we want to link newheader in front of
6320 INSIST(rbtversion->serial >= topheader->serial);
6321 if (topheader_prev != NULL)
6322 topheader_prev->next = newheader;
6324 rbtnode->data = newheader;
6325 newheader->next = topheader->next;
6326 newheader->down = topheader;
6327 topheader->next = newheader;
6329 changed->dirty = ISC_TRUE;
6332 * The rdataset doesn't exist, so we don't need to do anything
6333 * to satisfy the deletion request.
6335 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6336 if ((options & DNS_DBSUB_EXACT) != 0)
6337 result = DNS_R_NOTEXACT;
6339 result = DNS_R_UNCHANGED;
6342 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6343 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6346 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6347 isc_rwlocktype_write);
6350 * Update the zone's secure status. If version is non-NULL
6351 * this is deferred until closeversion() is called.
6353 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6354 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6360 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6361 dns_rdatatype_t type, dns_rdatatype_t covers)
6363 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6364 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6365 rbtdb_version_t *rbtversion = version;
6366 isc_result_t result;
6367 rdatasetheader_t *newheader;
6369 REQUIRE(VALID_RBTDB(rbtdb));
6371 if (type == dns_rdatatype_any)
6372 return (ISC_R_NOTIMPLEMENTED);
6373 if (type == dns_rdatatype_rrsig && covers == 0)
6374 return (ISC_R_NOTIMPLEMENTED);
6376 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6377 if (newheader == NULL)
6378 return (ISC_R_NOMEMORY);
6379 set_ttl(rbtdb, newheader, 0);
6380 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6381 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6382 newheader->trust = 0;
6383 newheader->noqname = NULL;
6384 newheader->closest = NULL;
6385 newheader->additional_auth = NULL;
6386 newheader->additional_glue = NULL;
6387 if (rbtversion != NULL)
6388 newheader->serial = rbtversion->serial;
6390 newheader->serial = 0;
6391 newheader->count = 0;
6392 newheader->last_used = 0;
6393 newheader->node = rbtnode;
6395 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6396 isc_rwlocktype_write);
6398 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6399 ISC_FALSE, NULL, 0);
6401 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6402 isc_rwlocktype_write);
6405 * Update the zone's secure status. If version is non-NULL
6406 * this is deferred until closeversion() is called.
6408 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6409 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6415 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6416 rbtdb_load_t *loadctx = arg;
6417 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6418 dns_rbtnode_t *node;
6419 isc_result_t result;
6420 isc_region_t region;
6421 rdatasetheader_t *newheader;
6424 * This routine does no node locking. See comments in
6425 * 'load' below for more information on loading and
6431 * SOA records are only allowed at top of zone.
6433 if (rdataset->type == dns_rdatatype_soa &&
6434 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6435 return (DNS_R_NOTZONETOP);
6437 if (rdataset->type != dns_rdatatype_nsec3 &&
6438 rdataset->covers != dns_rdatatype_nsec3)
6439 add_empty_wildcards(rbtdb, name);
6441 if (dns_name_iswildcard(name)) {
6443 * NS record owners cannot legally be wild cards.
6445 if (rdataset->type == dns_rdatatype_ns)
6446 return (DNS_R_INVALIDNS);
6448 * NSEC3 record owners cannot legally be wild cards.
6450 if (rdataset->type == dns_rdatatype_nsec3)
6451 return (DNS_R_INVALIDNSEC3);
6452 result = add_wildcard_magic(rbtdb, name);
6453 if (result != ISC_R_SUCCESS)
6458 if (rdataset->type == dns_rdatatype_nsec3 ||
6459 rdataset->covers == dns_rdatatype_nsec3) {
6460 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6461 if (result == ISC_R_SUCCESS)
6464 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6465 if (result == ISC_R_SUCCESS)
6468 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6470 if (result != ISC_R_EXISTS) {
6471 dns_name_t foundname;
6472 dns_name_init(&foundname, NULL);
6473 dns_rbt_namefromnode(node, &foundname);
6474 #ifdef DNS_RBT_USEHASH
6475 node->locknum = node->hashval % rbtdb->node_lock_count;
6477 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6478 rbtdb->node_lock_count;
6482 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6484 sizeof(rdatasetheader_t));
6485 if (result != ISC_R_SUCCESS)
6487 newheader = (rdatasetheader_t *)region.base;
6488 init_rdataset(rbtdb, newheader);
6489 set_ttl(rbtdb, newheader,
6490 rdataset->ttl + loadctx->now); /* XXX overflow check */
6491 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6493 newheader->attributes = 0;
6494 newheader->trust = rdataset->trust;
6495 newheader->serial = 1;
6496 newheader->noqname = NULL;
6497 newheader->closest = NULL;
6498 newheader->count = init_count++;
6499 newheader->additional_auth = NULL;
6500 newheader->additional_glue = NULL;
6501 newheader->last_used = 0;
6502 newheader->node = node;
6503 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6504 newheader->attributes |= RDATASET_ATTR_RESIGN;
6505 newheader->resign = rdataset->resign;
6507 newheader->resign = 0;
6509 result = add(rbtdb, node, rbtdb->current_version, newheader,
6510 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6511 if (result == ISC_R_SUCCESS &&
6512 delegating_type(rbtdb, node, rdataset->type))
6513 node->find_callback = 1;
6514 else if (result == DNS_R_UNCHANGED)
6515 result = ISC_R_SUCCESS;
6521 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6522 rbtdb_load_t *loadctx;
6525 rbtdb = (dns_rbtdb_t *)db;
6527 REQUIRE(VALID_RBTDB(rbtdb));
6529 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6530 if (loadctx == NULL)
6531 return (ISC_R_NOMEMORY);
6533 loadctx->rbtdb = rbtdb;
6534 if (IS_CACHE(rbtdb))
6535 isc_stdtime_get(&loadctx->now);
6539 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6541 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6543 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6545 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6547 *addp = loading_addrdataset;
6550 return (ISC_R_SUCCESS);
6554 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6555 rbtdb_load_t *loadctx;
6556 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6558 REQUIRE(VALID_RBTDB(rbtdb));
6559 REQUIRE(dbloadp != NULL);
6561 REQUIRE(loadctx->rbtdb == rbtdb);
6563 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6565 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6566 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6568 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6569 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6571 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6574 * If there's a KEY rdataset at the zone origin containing a
6575 * zone key, we consider the zone secure.
6577 if (! IS_CACHE(rbtdb))
6578 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6582 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6584 return (ISC_R_SUCCESS);
6588 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6589 dns_masterformat_t masterformat) {
6592 rbtdb = (dns_rbtdb_t *)db;
6594 REQUIRE(VALID_RBTDB(rbtdb));
6596 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6597 &dns_master_style_default,
6598 filename, masterformat));
6602 delete_callback(void *data, void *arg) {
6603 dns_rbtdb_t *rbtdb = arg;
6604 rdatasetheader_t *current, *next;
6605 unsigned int locknum;
6608 locknum = current->node->locknum;
6609 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6610 while (current != NULL) {
6611 next = current->next;
6612 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6615 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6618 static isc_boolean_t
6619 issecure(dns_db_t *db) {
6621 isc_boolean_t secure;
6623 rbtdb = (dns_rbtdb_t *)db;
6625 REQUIRE(VALID_RBTDB(rbtdb));
6627 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6628 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6629 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6634 static isc_boolean_t
6635 isdnssec(dns_db_t *db) {
6637 isc_boolean_t dnssec;
6639 rbtdb = (dns_rbtdb_t *)db;
6641 REQUIRE(VALID_RBTDB(rbtdb));
6643 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6644 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6645 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6651 nodecount(dns_db_t *db) {
6655 rbtdb = (dns_rbtdb_t *)db;
6657 REQUIRE(VALID_RBTDB(rbtdb));
6659 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6660 count = dns_rbt_nodecount(rbtdb->tree);
6661 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6667 settask(dns_db_t *db, isc_task_t *task) {
6670 rbtdb = (dns_rbtdb_t *)db;
6672 REQUIRE(VALID_RBTDB(rbtdb));
6674 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6675 if (rbtdb->task != NULL)
6676 isc_task_detach(&rbtdb->task);
6678 isc_task_attach(task, &rbtdb->task);
6679 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6682 static isc_boolean_t
6683 ispersistent(dns_db_t *db) {
6689 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6690 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6691 dns_rbtnode_t *onode;
6692 isc_result_t result = ISC_R_SUCCESS;
6694 REQUIRE(VALID_RBTDB(rbtdb));
6695 REQUIRE(nodep != NULL && *nodep == NULL);
6697 /* Note that the access to origin_node doesn't require a DB lock */
6698 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6699 if (onode != NULL) {
6700 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6701 new_reference(rbtdb, onode);
6702 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6704 *nodep = rbtdb->origin_node;
6706 INSIST(IS_CACHE(rbtdb));
6707 result = ISC_R_NOTFOUND;
6714 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6715 isc_uint8_t *flags, isc_uint16_t *iterations,
6716 unsigned char *salt, size_t *salt_length)
6719 isc_result_t result = ISC_R_NOTFOUND;
6720 rbtdb_version_t *rbtversion = version;
6722 rbtdb = (dns_rbtdb_t *)db;
6724 REQUIRE(VALID_RBTDB(rbtdb));
6726 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6728 if (rbtversion == NULL)
6729 rbtversion = rbtdb->current_version;
6731 if (rbtversion->havensec3) {
6733 *hash = rbtversion->hash;
6734 if (salt != NULL && salt_length != NULL) {
6735 REQUIRE(*salt_length >= rbtversion->salt_length);
6736 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6738 if (salt_length != NULL)
6739 *salt_length = rbtversion->salt_length;
6740 if (iterations != NULL)
6741 *iterations = rbtversion->iterations;
6743 *flags = rbtversion->flags;
6744 result = ISC_R_SUCCESS;
6746 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6752 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6753 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6754 isc_stdtime_t oldresign;
6755 isc_result_t result = ISC_R_SUCCESS;
6756 rdatasetheader_t *header;
6758 REQUIRE(VALID_RBTDB(rbtdb));
6759 REQUIRE(!IS_CACHE(rbtdb));
6760 REQUIRE(rdataset != NULL);
6762 header = rdataset->private3;
6765 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6766 isc_rwlocktype_write);
6768 oldresign = header->resign;
6769 header->resign = resign;
6770 if (header->heap_index != 0) {
6771 INSIST(RESIGN(header));
6773 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6774 header->heap_index);
6775 header->heap_index = 0;
6776 } else if (resign < oldresign)
6777 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6778 header->heap_index);
6780 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6781 header->heap_index);
6782 } else if (resign && header->heap_index == 0) {
6783 header->attributes |= RDATASET_ATTR_RESIGN;
6784 result = resign_insert(rbtdb, header->node->locknum, header);
6786 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6787 isc_rwlocktype_write);
6792 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6793 dns_name_t *foundname)
6795 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6796 rdatasetheader_t *header = NULL, *this;
6798 isc_result_t result = ISC_R_NOTFOUND;
6799 unsigned int locknum;
6801 REQUIRE(VALID_RBTDB(rbtdb));
6803 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6805 for (i = 0; i < rbtdb->node_lock_count; i++) {
6806 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6807 this = isc_heap_element(rbtdb->heaps[i], 1);
6809 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6810 isc_rwlocktype_read);
6815 else if (isc_serial_lt(this->resign, header->resign)) {
6816 locknum = header->node->locknum;
6817 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6818 isc_rwlocktype_read);
6821 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6822 isc_rwlocktype_read);
6828 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6830 if (foundname != NULL)
6831 dns_rbt_fullnamefromnode(header->node, foundname);
6833 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6834 isc_rwlocktype_read);
6836 result = ISC_R_SUCCESS;
6839 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6845 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6847 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6848 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6849 dns_rbtnode_t *node;
6850 rdatasetheader_t *header;
6852 REQUIRE(VALID_RBTDB(rbtdb));
6853 REQUIRE(rdataset != NULL);
6854 REQUIRE(rbtdb->future_version == rbtversion);
6855 REQUIRE(rbtversion->writer);
6857 node = rdataset->private2;
6858 header = rdataset->private3;
6861 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6862 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6863 isc_rwlocktype_write);
6865 * Delete from heap and save to re-signed list so that it can
6866 * be restored if we backout of this change.
6868 new_reference(rbtdb, node);
6869 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6870 header->heap_index = 0;
6871 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6873 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6874 isc_rwlocktype_write);
6875 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6878 static dns_stats_t *
6879 getrrsetstats(dns_db_t *db) {
6880 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6882 REQUIRE(VALID_RBTDB(rbtdb));
6883 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6885 return (rbtdb->rrsetstats);
6888 static dns_dbmethods_t zone_methods = {
6927 static dns_dbmethods_t cache_methods = {
6967 #ifdef DNS_RBTDB_VERSION64
6972 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6973 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6974 void *driverarg, dns_db_t **dbp)
6977 isc_result_t result;
6980 isc_boolean_t (*sooner)(void *, void *);
6981 isc_mem_t *hmctx = mctx;
6983 /* Keep the compiler happy. */
6986 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6988 return (ISC_R_NOMEMORY);
6991 * If argv[0] exists, it points to a memory context to use for heap
6994 hmctx = (isc_mem_t *) argv[0];
6996 memset(rbtdb, '\0', sizeof(*rbtdb));
6997 dns_name_init(&rbtdb->common.origin, NULL);
6998 rbtdb->common.attributes = 0;
6999 if (type == dns_dbtype_cache) {
7000 rbtdb->common.methods = &cache_methods;
7001 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7002 } else if (type == dns_dbtype_stub) {
7003 rbtdb->common.methods = &zone_methods;
7004 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7006 rbtdb->common.methods = &zone_methods;
7007 rbtdb->common.rdclass = rdclass;
7008 rbtdb->common.mctx = NULL;
7010 result = RBTDB_INITLOCK(&rbtdb->lock);
7011 if (result != ISC_R_SUCCESS)
7014 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7015 if (result != ISC_R_SUCCESS)
7019 * Initialize node_lock_count in a generic way to support future
7020 * extension which allows the user to specify this value on creation.
7021 * Note that when specified for a cache DB it must be larger than 1
7022 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7024 if (rbtdb->node_lock_count == 0) {
7025 if (IS_CACHE(rbtdb))
7026 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7028 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7029 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7030 result = ISC_R_RANGE;
7031 goto cleanup_tree_lock;
7033 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7034 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7035 sizeof(rbtdb_nodelock_t));
7036 if (rbtdb->node_locks == NULL) {
7037 result = ISC_R_NOMEMORY;
7038 goto cleanup_tree_lock;
7041 rbtdb->rrsetstats = NULL;
7042 if (IS_CACHE(rbtdb)) {
7043 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7044 if (result != ISC_R_SUCCESS)
7045 goto cleanup_node_locks;
7046 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7047 sizeof(rdatasetheaderlist_t));
7048 if (rbtdb->rdatasets == NULL) {
7049 result = ISC_R_NOMEMORY;
7050 goto cleanup_rrsetstats;
7052 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7053 ISC_LIST_INIT(rbtdb->rdatasets[i]);
7055 rbtdb->rdatasets = NULL;
7060 rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7061 sizeof(isc_heap_t *));
7062 if (rbtdb->heaps == NULL) {
7063 result = ISC_R_NOMEMORY;
7064 goto cleanup_rdatasets;
7066 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7067 rbtdb->heaps[i] = NULL;
7068 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7069 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7070 result = isc_heap_create(hmctx, sooner, set_index, 0,
7072 if (result != ISC_R_SUCCESS)
7077 * Create deadnode lists.
7079 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7080 sizeof(rbtnodelist_t));
7081 if (rbtdb->deadnodes == NULL) {
7082 result = ISC_R_NOMEMORY;
7085 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7086 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7088 rbtdb->active = rbtdb->node_lock_count;
7090 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7091 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7092 if (result == ISC_R_SUCCESS) {
7093 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7094 if (result != ISC_R_SUCCESS)
7095 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7097 if (result != ISC_R_SUCCESS) {
7099 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7100 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7101 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7103 goto cleanup_deadnodes;
7105 rbtdb->node_locks[i].exiting = ISC_FALSE;
7109 * Attach to the mctx. The database will persist so long as there
7110 * are references to it, and attaching to the mctx ensures that our
7111 * mctx won't disappear out from under us.
7113 isc_mem_attach(mctx, &rbtdb->common.mctx);
7114 isc_mem_attach(hmctx, &rbtdb->hmctx);
7117 * Must be initialized before free_rbtdb() is called.
7119 isc_ondestroy_init(&rbtdb->common.ondest);
7122 * Make a copy of the origin name.
7124 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7125 if (result != ISC_R_SUCCESS) {
7126 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7131 * Make the Red-Black Trees.
7133 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7134 if (result != ISC_R_SUCCESS) {
7135 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7139 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7140 if (result != ISC_R_SUCCESS) {
7141 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7146 * In order to set the node callback bit correctly in zone databases,
7147 * we need to know if the node has the origin name of the zone.
7148 * In loading_addrdataset() we could simply compare the new name
7149 * to the origin name, but this is expensive. Also, we don't know the
7150 * node name in addrdataset(), so we need another way of knowing the
7153 * We now explicitly create a node for the zone's origin, and then
7154 * we simply remember the node's address. This is safe, because
7155 * the top-of-zone node can never be deleted, nor can its address
7158 if (!IS_CACHE(rbtdb)) {
7159 dns_rbtnode_t *nsec3node;
7161 rbtdb->origin_node = NULL;
7162 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7163 &rbtdb->origin_node);
7164 if (result != ISC_R_SUCCESS) {
7165 INSIST(result != ISC_R_EXISTS);
7166 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7169 rbtdb->origin_node->nsec3 = 0;
7171 * We need to give the origin node the right locknum.
7173 dns_name_init(&name, NULL);
7174 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7175 #ifdef DNS_RBT_USEHASH
7176 rbtdb->origin_node->locknum =
7177 rbtdb->origin_node->hashval %
7178 rbtdb->node_lock_count;
7180 rbtdb->origin_node->locknum =
7181 dns_name_hash(&name, ISC_TRUE) %
7182 rbtdb->node_lock_count;
7185 * Add an apex node to the NSEC3 tree so that NSEC3 searches
7186 * return partial matches when there is only a single NSEC3
7187 * record in the tree.
7190 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7192 if (result != ISC_R_SUCCESS) {
7193 INSIST(result != ISC_R_EXISTS);
7194 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7197 nsec3node->nsec3 = 1;
7199 * We need to give the nsec3 origin node the right locknum.
7201 dns_name_init(&name, NULL);
7202 dns_rbt_namefromnode(nsec3node, &name);
7203 #ifdef DNS_RBT_USEHASH
7204 nsec3node->locknum = nsec3node->hashval %
7205 rbtdb->node_lock_count;
7207 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7208 rbtdb->node_lock_count;
7213 * Misc. Initialization.
7215 result = isc_refcount_init(&rbtdb->references, 1);
7216 if (result != ISC_R_SUCCESS) {
7217 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7220 rbtdb->attributes = 0;
7224 * Version Initialization.
7226 rbtdb->current_serial = 1;
7227 rbtdb->least_serial = 1;
7228 rbtdb->next_serial = 2;
7229 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7230 if (rbtdb->current_version == NULL) {
7231 isc_refcount_decrement(&rbtdb->references, NULL);
7232 isc_refcount_destroy(&rbtdb->references);
7233 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7234 return (ISC_R_NOMEMORY);
7236 rbtdb->current_version->secure = dns_db_insecure;
7237 rbtdb->current_version->havensec3 = ISC_FALSE;
7238 rbtdb->current_version->flags = 0;
7239 rbtdb->current_version->iterations = 0;
7240 rbtdb->current_version->hash = 0;
7241 rbtdb->current_version->salt_length = 0;
7242 memset(rbtdb->current_version->salt, 0,
7243 sizeof(rbtdb->current_version->salt));
7244 rbtdb->future_version = NULL;
7245 ISC_LIST_INIT(rbtdb->open_versions);
7247 * Keep the current version in the open list so that list operation
7248 * won't happen in normal lookup operations.
7250 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7252 rbtdb->common.magic = DNS_DB_MAGIC;
7253 rbtdb->common.impmagic = RBTDB_MAGIC;
7255 *dbp = (dns_db_t *)rbtdb;
7257 return (ISC_R_SUCCESS);
7260 isc_mem_put(mctx, rbtdb->deadnodes,
7261 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7264 if (rbtdb->heaps != NULL) {
7265 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7266 if (rbtdb->heaps[i] != NULL)
7267 isc_heap_destroy(&rbtdb->heaps[i]);
7268 isc_mem_put(mctx, rbtdb->heaps,
7269 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7273 if (rbtdb->rdatasets != NULL)
7274 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7275 sizeof(rdatasetheaderlist_t));
7277 if (rbtdb->rrsetstats != NULL)
7278 dns_stats_detach(&rbtdb->rrsetstats);
7281 isc_mem_put(mctx, rbtdb->node_locks,
7282 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7285 isc_rwlock_destroy(&rbtdb->tree_lock);
7288 RBTDB_DESTROYLOCK(&rbtdb->lock);
7291 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7297 * Slabbed Rdataset Methods
7301 rdataset_disassociate(dns_rdataset_t *rdataset) {
7302 dns_db_t *db = rdataset->private1;
7303 dns_dbnode_t *node = rdataset->private2;
7305 detachnode(db, &node);
7309 rdataset_first(dns_rdataset_t *rdataset) {
7310 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7313 count = raw[0] * 256 + raw[1];
7315 rdataset->private5 = NULL;
7316 return (ISC_R_NOMORE);
7319 #if DNS_RDATASET_FIXED
7320 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7321 raw += 2 + (4 * count);
7327 * The privateuint4 field is the number of rdata beyond the
7328 * cursor position, so we decrement the total count by one
7329 * before storing it.
7331 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7332 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7333 * to the first entry in the offset table.
7336 rdataset->privateuint4 = count;
7337 rdataset->private5 = raw;
7339 return (ISC_R_SUCCESS);
7343 rdataset_next(dns_rdataset_t *rdataset) {
7345 unsigned int length;
7346 unsigned char *raw; /* RDATASLAB */
7348 count = rdataset->privateuint4;
7350 return (ISC_R_NOMORE);
7352 rdataset->privateuint4 = count;
7355 * Skip forward one record (length + 4) or one offset (4).
7357 raw = rdataset->private5;
7358 #if DNS_RDATASET_FIXED
7359 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7361 length = raw[0] * 256 + raw[1];
7363 #if DNS_RDATASET_FIXED
7365 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7367 rdataset->private5 = raw + 2; /* length(2) */
7370 return (ISC_R_SUCCESS);
7374 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7375 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7376 #if DNS_RDATASET_FIXED
7377 unsigned int offset;
7379 unsigned int length;
7381 unsigned int flags = 0;
7383 REQUIRE(raw != NULL);
7386 * Find the start of the record if not already in private5
7387 * then skip the length and order fields.
7389 #if DNS_RDATASET_FIXED
7390 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7391 offset = (raw[0] << 24) + (raw[1] << 16) +
7392 (raw[2] << 8) + raw[3];
7393 raw = rdataset->private3;
7397 length = raw[0] * 256 + raw[1];
7398 #if DNS_RDATASET_FIXED
7403 if (rdataset->type == dns_rdatatype_rrsig) {
7404 if (*raw & DNS_RDATASLAB_OFFLINE)
7405 flags |= DNS_RDATA_OFFLINE;
7411 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7412 rdata->flags |= flags;
7416 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7417 dns_db_t *db = source->private1;
7418 dns_dbnode_t *node = source->private2;
7419 dns_dbnode_t *cloned_node = NULL;
7421 attachnode(db, node, &cloned_node);
7425 * Reset iterator state.
7427 target->privateuint4 = 0;
7428 target->private5 = NULL;
7432 rdataset_count(dns_rdataset_t *rdataset) {
7433 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7436 count = raw[0] * 256 + raw[1];
7442 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7443 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7445 dns_db_t *db = rdataset->private1;
7446 dns_dbnode_t *node = rdataset->private2;
7447 dns_dbnode_t *cloned_node;
7448 struct noqname *noqname = rdataset->private6;
7451 attachnode(db, node, &cloned_node);
7452 nsec->methods = &rdataset_methods;
7453 nsec->rdclass = db->rdclass;
7454 nsec->type = noqname->type;
7456 nsec->ttl = rdataset->ttl;
7457 nsec->trust = rdataset->trust;
7458 nsec->private1 = rdataset->private1;
7459 nsec->private2 = rdataset->private2;
7460 nsec->private3 = noqname->neg;
7461 nsec->privateuint4 = 0;
7462 nsec->private5 = NULL;
7463 nsec->private6 = NULL;
7464 nsec->private7 = NULL;
7467 attachnode(db, node, &cloned_node);
7468 nsecsig->methods = &rdataset_methods;
7469 nsecsig->rdclass = db->rdclass;
7470 nsecsig->type = dns_rdatatype_rrsig;
7471 nsecsig->covers = noqname->type;
7472 nsecsig->ttl = rdataset->ttl;
7473 nsecsig->trust = rdataset->trust;
7474 nsecsig->private1 = rdataset->private1;
7475 nsecsig->private2 = rdataset->private2;
7476 nsecsig->private3 = noqname->negsig;
7477 nsecsig->privateuint4 = 0;
7478 nsecsig->private5 = NULL;
7479 nsec->private6 = NULL;
7480 nsec->private7 = NULL;
7482 dns_name_clone(&noqname->name, name);
7484 return (ISC_R_SUCCESS);
7488 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7489 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7491 dns_db_t *db = rdataset->private1;
7492 dns_dbnode_t *node = rdataset->private2;
7493 dns_dbnode_t *cloned_node;
7494 struct noqname *closest = rdataset->private7;
7497 attachnode(db, node, &cloned_node);
7498 nsec->methods = &rdataset_methods;
7499 nsec->rdclass = db->rdclass;
7500 nsec->type = closest->type;
7502 nsec->ttl = rdataset->ttl;
7503 nsec->trust = rdataset->trust;
7504 nsec->private1 = rdataset->private1;
7505 nsec->private2 = rdataset->private2;
7506 nsec->private3 = closest->neg;
7507 nsec->privateuint4 = 0;
7508 nsec->private5 = NULL;
7509 nsec->private6 = NULL;
7510 nsec->private7 = NULL;
7513 attachnode(db, node, &cloned_node);
7514 nsecsig->methods = &rdataset_methods;
7515 nsecsig->rdclass = db->rdclass;
7516 nsecsig->type = dns_rdatatype_rrsig;
7517 nsecsig->covers = closest->type;
7518 nsecsig->ttl = rdataset->ttl;
7519 nsecsig->trust = rdataset->trust;
7520 nsecsig->private1 = rdataset->private1;
7521 nsecsig->private2 = rdataset->private2;
7522 nsecsig->private3 = closest->negsig;
7523 nsecsig->privateuint4 = 0;
7524 nsecsig->private5 = NULL;
7525 nsec->private6 = NULL;
7526 nsec->private7 = NULL;
7528 dns_name_clone(&closest->name, name);
7530 return (ISC_R_SUCCESS);
7534 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7535 dns_rbtdb_t *rbtdb = rdataset->private1;
7536 dns_rbtnode_t *rbtnode = rdataset->private2;
7537 rdatasetheader_t *header = rdataset->private3;
7540 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7541 isc_rwlocktype_write);
7542 header->trust = rdataset->trust = trust;
7543 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7544 isc_rwlocktype_write);
7548 rdataset_expire(dns_rdataset_t *rdataset) {
7549 dns_rbtdb_t *rbtdb = rdataset->private1;
7550 dns_rbtnode_t *rbtnode = rdataset->private2;
7551 rdatasetheader_t *header = rdataset->private3;
7554 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7555 isc_rwlocktype_write);
7556 expire_header(rbtdb, header, ISC_FALSE);
7557 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7558 isc_rwlocktype_write);
7562 * Rdataset Iterator Methods
7566 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7567 rbtdb_rdatasetiter_t *rbtiterator;
7569 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7571 if (rbtiterator->common.version != NULL)
7572 closeversion(rbtiterator->common.db,
7573 &rbtiterator->common.version, ISC_FALSE);
7574 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7575 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7576 sizeof(*rbtiterator));
7582 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7583 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7584 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7585 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7586 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7587 rdatasetheader_t *header, *top_next;
7588 rbtdb_serial_t serial;
7591 if (IS_CACHE(rbtdb)) {
7593 now = rbtiterator->common.now;
7595 serial = rbtversion->serial;
7599 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7600 isc_rwlocktype_read);
7602 for (header = rbtnode->data; header != NULL; header = top_next) {
7603 top_next = header->next;
7605 if (header->serial <= serial && !IGNORE(header)) {
7607 * Is this a "this rdataset doesn't exist"
7608 * record? Or is it too old in the cache?
7610 * Note: unlike everywhere else, we
7611 * check for now > header->rdh_ttl instead
7612 * of now >= header->rdh_ttl. This allows
7613 * ANY and RRSIG queries for 0 TTL
7614 * rdatasets to work.
7616 if (NONEXISTENT(header) ||
7617 (now != 0 && now > header->rdh_ttl))
7621 header = header->down;
7622 } while (header != NULL);
7627 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7628 isc_rwlocktype_read);
7630 rbtiterator->current = header;
7633 return (ISC_R_NOMORE);
7635 return (ISC_R_SUCCESS);
7639 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7640 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7641 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7642 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7643 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7644 rdatasetheader_t *header, *top_next;
7645 rbtdb_serial_t serial;
7647 rbtdb_rdatatype_t type, negtype;
7648 dns_rdatatype_t rdtype, covers;
7650 header = rbtiterator->current;
7652 return (ISC_R_NOMORE);
7654 if (IS_CACHE(rbtdb)) {
7656 now = rbtiterator->common.now;
7658 serial = rbtversion->serial;
7662 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7663 isc_rwlocktype_read);
7665 type = header->type;
7666 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7667 if (NEGATIVE(header)) {
7668 covers = RBTDB_RDATATYPE_EXT(header->type);
7669 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7671 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7672 for (header = header->next; header != NULL; header = top_next) {
7673 top_next = header->next;
7675 * If not walking back up the down list.
7677 if (header->type != type && header->type != negtype) {
7679 if (header->serial <= serial &&
7682 * Is this a "this rdataset doesn't
7685 * Note: unlike everywhere else, we
7686 * check for now > header->ttl instead
7687 * of now >= header->ttl. This allows
7688 * ANY and RRSIG queries for 0 TTL
7689 * rdatasets to work.
7691 if ((header->attributes &
7692 RDATASET_ATTR_NONEXISTENT) != 0 ||
7693 (now != 0 && now > header->rdh_ttl))
7697 header = header->down;
7698 } while (header != NULL);
7704 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7705 isc_rwlocktype_read);
7707 rbtiterator->current = header;
7710 return (ISC_R_NOMORE);
7712 return (ISC_R_SUCCESS);
7716 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7717 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7718 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7719 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7720 rdatasetheader_t *header;
7722 header = rbtiterator->current;
7723 REQUIRE(header != NULL);
7725 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7726 isc_rwlocktype_read);
7728 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7731 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7732 isc_rwlocktype_read);
7737 * Database Iterator Methods
7741 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7742 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7743 dns_rbtnode_t *node = rbtdbiter->node;
7748 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7749 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7753 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7754 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7755 dns_rbtnode_t *node = rbtdbiter->node;
7761 lock = &rbtdb->node_locks[node->locknum].lock;
7762 NODE_LOCK(lock, isc_rwlocktype_read);
7763 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7764 rbtdbiter->tree_locked, ISC_FALSE);
7765 NODE_UNLOCK(lock, isc_rwlocktype_read);
7767 rbtdbiter->node = NULL;
7771 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7772 dns_rbtnode_t *node;
7773 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7774 isc_boolean_t was_read_locked = ISC_FALSE;
7778 if (rbtdbiter->delete != 0) {
7780 * Note that "%d node of %d in tree" can report things like
7781 * "flush_deletions: 59 nodes of 41 in tree". This means
7782 * That some nodes appear on the deletions list more than
7783 * once. Only the last occurence will actually be deleted.
7785 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7786 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7787 "flush_deletions: %d nodes of %d in tree",
7789 dns_rbt_nodecount(rbtdb->tree));
7791 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7792 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7793 was_read_locked = ISC_TRUE;
7795 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7796 rbtdbiter->tree_locked = isc_rwlocktype_write;
7798 for (i = 0; i < rbtdbiter->delete; i++) {
7799 node = rbtdbiter->deletions[i];
7800 lock = &rbtdb->node_locks[node->locknum].lock;
7802 NODE_LOCK(lock, isc_rwlocktype_read);
7803 decrement_reference(rbtdb, node, 0,
7804 isc_rwlocktype_read,
7805 rbtdbiter->tree_locked, ISC_FALSE);
7806 NODE_UNLOCK(lock, isc_rwlocktype_read);
7809 rbtdbiter->delete = 0;
7811 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7812 if (was_read_locked) {
7813 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7814 rbtdbiter->tree_locked = isc_rwlocktype_read;
7817 rbtdbiter->tree_locked = isc_rwlocktype_none;
7823 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7824 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7826 REQUIRE(rbtdbiter->paused);
7827 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7829 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7830 rbtdbiter->tree_locked = isc_rwlocktype_read;
7832 rbtdbiter->paused = ISC_FALSE;
7836 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7837 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7838 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7839 dns_db_t *db = NULL;
7841 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7842 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7843 rbtdbiter->tree_locked = isc_rwlocktype_none;
7845 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7847 dereference_iter_node(rbtdbiter);
7849 flush_deletions(rbtdbiter);
7851 dns_db_attach(rbtdbiter->common.db, &db);
7852 dns_db_detach(&rbtdbiter->common.db);
7854 dns_rbtnodechain_reset(&rbtdbiter->chain);
7855 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7856 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7863 dbiterator_first(dns_dbiterator_t *iterator) {
7864 isc_result_t result;
7865 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7866 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7867 dns_name_t *name, *origin;
7869 if (rbtdbiter->result != ISC_R_SUCCESS &&
7870 rbtdbiter->result != ISC_R_NOMORE)
7871 return (rbtdbiter->result);
7873 if (rbtdbiter->paused)
7874 resume_iteration(rbtdbiter);
7876 dereference_iter_node(rbtdbiter);
7878 name = dns_fixedname_name(&rbtdbiter->name);
7879 origin = dns_fixedname_name(&rbtdbiter->origin);
7880 dns_rbtnodechain_reset(&rbtdbiter->chain);
7881 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7883 if (rbtdbiter->nsec3only) {
7884 rbtdbiter->current = &rbtdbiter->nsec3chain;
7885 result = dns_rbtnodechain_first(rbtdbiter->current,
7886 rbtdb->nsec3, name, origin);
7888 rbtdbiter->current = &rbtdbiter->chain;
7889 result = dns_rbtnodechain_first(rbtdbiter->current,
7890 rbtdb->tree, name, origin);
7891 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7892 rbtdbiter->current = &rbtdbiter->nsec3chain;
7893 result = dns_rbtnodechain_first(rbtdbiter->current,
7898 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7899 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7900 NULL, &rbtdbiter->node);
7901 if (result == ISC_R_SUCCESS) {
7902 rbtdbiter->new_origin = ISC_TRUE;
7903 reference_iter_node(rbtdbiter);
7906 INSIST(result == ISC_R_NOTFOUND);
7907 result = ISC_R_NOMORE; /* The tree is empty. */
7910 rbtdbiter->result = result;
7916 dbiterator_last(dns_dbiterator_t *iterator) {
7917 isc_result_t result;
7918 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7919 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7920 dns_name_t *name, *origin;
7922 if (rbtdbiter->result != ISC_R_SUCCESS &&
7923 rbtdbiter->result != ISC_R_NOMORE)
7924 return (rbtdbiter->result);
7926 if (rbtdbiter->paused)
7927 resume_iteration(rbtdbiter);
7929 dereference_iter_node(rbtdbiter);
7931 name = dns_fixedname_name(&rbtdbiter->name);
7932 origin = dns_fixedname_name(&rbtdbiter->origin);
7933 dns_rbtnodechain_reset(&rbtdbiter->chain);
7934 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7936 result = ISC_R_NOTFOUND;
7937 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7938 rbtdbiter->current = &rbtdbiter->nsec3chain;
7939 result = dns_rbtnodechain_last(rbtdbiter->current,
7940 rbtdb->nsec3, name, origin);
7942 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7943 rbtdbiter->current = &rbtdbiter->chain;
7944 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7947 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7948 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7949 NULL, &rbtdbiter->node);
7950 if (result == ISC_R_SUCCESS) {
7951 rbtdbiter->new_origin = ISC_TRUE;
7952 reference_iter_node(rbtdbiter);
7955 INSIST(result == ISC_R_NOTFOUND);
7956 result = ISC_R_NOMORE; /* The tree is empty. */
7959 rbtdbiter->result = result;
7965 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7966 isc_result_t result;
7967 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7968 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7969 dns_name_t *iname, *origin;
7971 if (rbtdbiter->result != ISC_R_SUCCESS &&
7972 rbtdbiter->result != ISC_R_NOTFOUND &&
7973 rbtdbiter->result != ISC_R_NOMORE)
7974 return (rbtdbiter->result);
7976 if (rbtdbiter->paused)
7977 resume_iteration(rbtdbiter);
7979 dereference_iter_node(rbtdbiter);
7981 iname = dns_fixedname_name(&rbtdbiter->name);
7982 origin = dns_fixedname_name(&rbtdbiter->origin);
7983 dns_rbtnodechain_reset(&rbtdbiter->chain);
7984 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7986 if (rbtdbiter->nsec3only) {
7987 rbtdbiter->current = &rbtdbiter->nsec3chain;
7988 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7991 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7992 } else if (rbtdbiter->nonsec3) {
7993 rbtdbiter->current = &rbtdbiter->chain;
7994 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7997 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8000 * Stay on main chain if not found on either chain.
8002 rbtdbiter->current = &rbtdbiter->chain;
8003 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8006 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8007 if (result == DNS_R_PARTIALMATCH) {
8008 dns_rbtnode_t *node = NULL;
8009 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8010 &node, &rbtdbiter->nsec3chain,
8011 DNS_RBTFIND_EMPTYDATA,
8013 if (result == ISC_R_SUCCESS) {
8014 rbtdbiter->node = node;
8015 rbtdbiter->current = &rbtdbiter->nsec3chain;
8021 if (result == ISC_R_SUCCESS) {
8022 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8024 if (result == ISC_R_SUCCESS) {
8025 rbtdbiter->new_origin = ISC_TRUE;
8026 reference_iter_node(rbtdbiter);
8028 } else if (result == DNS_R_PARTIALMATCH) {
8029 result = ISC_R_NOTFOUND;
8030 rbtdbiter->node = NULL;
8033 rbtdbiter->result = result;
8035 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8036 isc_result_t tresult;
8037 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8039 if (tresult == ISC_R_SUCCESS) {
8040 rbtdbiter->new_origin = ISC_TRUE;
8041 reference_iter_node(rbtdbiter);
8044 rbtdbiter->node = NULL;
8047 rbtdbiter->node = NULL;
8049 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8050 ISC_R_SUCCESS : result;
8057 dbiterator_prev(dns_dbiterator_t *iterator) {
8058 isc_result_t result;
8059 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8060 dns_name_t *name, *origin;
8061 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8063 REQUIRE(rbtdbiter->node != NULL);
8065 if (rbtdbiter->result != ISC_R_SUCCESS)
8066 return (rbtdbiter->result);
8068 if (rbtdbiter->paused)
8069 resume_iteration(rbtdbiter);
8071 name = dns_fixedname_name(&rbtdbiter->name);
8072 origin = dns_fixedname_name(&rbtdbiter->origin);
8073 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8074 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8075 !rbtdbiter->nonsec3 &&
8076 &rbtdbiter->nsec3chain == rbtdbiter->current) {
8077 rbtdbiter->current = &rbtdbiter->chain;
8078 dns_rbtnodechain_reset(rbtdbiter->current);
8079 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8081 if (result == ISC_R_NOTFOUND)
8082 result = ISC_R_NOMORE;
8085 dereference_iter_node(rbtdbiter);
8087 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8088 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8089 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8090 NULL, &rbtdbiter->node);
8093 if (result == ISC_R_SUCCESS)
8094 reference_iter_node(rbtdbiter);
8096 rbtdbiter->result = result;
8102 dbiterator_next(dns_dbiterator_t *iterator) {
8103 isc_result_t result;
8104 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8105 dns_name_t *name, *origin;
8106 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8108 REQUIRE(rbtdbiter->node != NULL);
8110 if (rbtdbiter->result != ISC_R_SUCCESS)
8111 return (rbtdbiter->result);
8113 if (rbtdbiter->paused)
8114 resume_iteration(rbtdbiter);
8116 name = dns_fixedname_name(&rbtdbiter->name);
8117 origin = dns_fixedname_name(&rbtdbiter->origin);
8118 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8119 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8120 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8121 rbtdbiter->current = &rbtdbiter->nsec3chain;
8122 dns_rbtnodechain_reset(rbtdbiter->current);
8123 result = dns_rbtnodechain_first(rbtdbiter->current,
8124 rbtdb->nsec3, name, origin);
8125 if (result == ISC_R_NOTFOUND)
8126 result = ISC_R_NOMORE;
8129 dereference_iter_node(rbtdbiter);
8131 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8132 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8133 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8134 NULL, &rbtdbiter->node);
8136 if (result == ISC_R_SUCCESS)
8137 reference_iter_node(rbtdbiter);
8139 rbtdbiter->result = result;
8145 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8148 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8149 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8150 dns_rbtnode_t *node = rbtdbiter->node;
8151 isc_result_t result;
8152 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8153 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8155 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8156 REQUIRE(rbtdbiter->node != NULL);
8158 if (rbtdbiter->paused)
8159 resume_iteration(rbtdbiter);
8162 if (rbtdbiter->common.relative_names)
8164 result = dns_name_concatenate(nodename, origin, name, NULL);
8165 if (result != ISC_R_SUCCESS)
8167 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8168 result = DNS_R_NEWORIGIN;
8170 result = ISC_R_SUCCESS;
8172 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8173 new_reference(rbtdb, node);
8174 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8176 *nodep = rbtdbiter->node;
8178 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8179 isc_result_t expire_result;
8182 * If the deletion array is full, flush it before trying
8183 * to expire the current node. The current node can't
8184 * fully deleted while the iteration cursor is still on it.
8186 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8187 flush_deletions(rbtdbiter);
8189 expire_result = expirenode(iterator->db, *nodep, 0);
8192 * expirenode() currently always returns success.
8194 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8197 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8198 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8199 dns_rbtnode_refincrement(node, &refs);
8201 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8209 dbiterator_pause(dns_dbiterator_t *iterator) {
8210 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8211 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8213 if (rbtdbiter->result != ISC_R_SUCCESS &&
8214 rbtdbiter->result != ISC_R_NOMORE)
8215 return (rbtdbiter->result);
8217 if (rbtdbiter->paused)
8218 return (ISC_R_SUCCESS);
8220 rbtdbiter->paused = ISC_TRUE;
8222 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8223 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8224 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8225 rbtdbiter->tree_locked = isc_rwlocktype_none;
8228 flush_deletions(rbtdbiter);
8230 return (ISC_R_SUCCESS);
8234 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8235 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8236 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8238 if (rbtdbiter->result != ISC_R_SUCCESS)
8239 return (rbtdbiter->result);
8241 return (dns_name_copy(origin, name, NULL));
8245 * Additional cache routines.
8248 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8249 dns_rdatatype_t qtype, dns_acache_t *acache,
8250 dns_zone_t **zonep, dns_db_t **dbp,
8251 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8252 dns_name_t *fname, dns_message_t *msg,
8255 dns_rbtdb_t *rbtdb = rdataset->private1;
8256 dns_rbtnode_t *rbtnode = rdataset->private2;
8257 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8258 unsigned int current_count = rdataset->privateuint4;
8260 rdatasetheader_t *header;
8261 nodelock_t *nodelock;
8262 unsigned int total_count;
8263 acachectl_t *acarray;
8264 dns_acacheentry_t *entry;
8265 isc_result_t result;
8267 UNUSED(qtype); /* we do not use this value at least for now */
8270 header = (struct rdatasetheader *)(raw - sizeof(*header));
8272 total_count = raw[0] * 256 + raw[1];
8273 INSIST(total_count > current_count);
8274 count = total_count - current_count - 1;
8278 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8279 NODE_LOCK(nodelock, isc_rwlocktype_read);
8282 case dns_rdatasetadditional_fromauth:
8283 acarray = header->additional_auth;
8285 case dns_rdatasetadditional_fromcache:
8288 case dns_rdatasetadditional_fromglue:
8289 acarray = header->additional_glue;
8295 if (acarray == NULL) {
8296 if (type != dns_rdatasetadditional_fromcache)
8297 dns_acache_countquerymiss(acache);
8298 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8299 return (ISC_R_NOTFOUND);
8302 if (acarray[count].entry == NULL) {
8303 dns_acache_countquerymiss(acache);
8304 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8305 return (ISC_R_NOTFOUND);
8309 dns_acache_attachentry(acarray[count].entry, &entry);
8311 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8313 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8314 nodep, fname, msg, now);
8316 dns_acache_detachentry(&entry);
8322 acache_callback(dns_acacheentry_t *entry, void **arg) {
8324 dns_rbtnode_t *rbtnode;
8325 nodelock_t *nodelock;
8326 acachectl_t *acarray = NULL;
8327 acache_cbarg_t *cbarg;
8330 REQUIRE(arg != NULL);
8334 * The caller must hold the entry lock.
8337 rbtdb = (dns_rbtdb_t *)cbarg->db;
8338 rbtnode = (dns_rbtnode_t *)cbarg->node;
8340 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8341 NODE_LOCK(nodelock, isc_rwlocktype_write);
8343 switch (cbarg->type) {
8344 case dns_rdatasetadditional_fromauth:
8345 acarray = cbarg->header->additional_auth;
8347 case dns_rdatasetadditional_fromglue:
8348 acarray = cbarg->header->additional_glue;
8354 count = cbarg->count;
8355 if (acarray != NULL && acarray[count].entry == entry) {
8356 acarray[count].entry = NULL;
8357 INSIST(acarray[count].cbarg == cbarg);
8358 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8359 acarray[count].cbarg = NULL;
8361 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8363 dns_acache_detachentry(&entry);
8365 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8367 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8368 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8374 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8375 acache_cbarg_t **cbargp)
8377 acache_cbarg_t *cbarg;
8379 REQUIRE(mctx != NULL);
8380 REQUIRE(entry != NULL);
8381 REQUIRE(cbargp != NULL && *cbargp != NULL);
8385 dns_acache_cancelentry(entry);
8386 dns_db_detachnode(cbarg->db, &cbarg->node);
8387 dns_db_detach(&cbarg->db);
8389 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8395 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8396 dns_rdatatype_t qtype, dns_acache_t *acache,
8397 dns_zone_t *zone, dns_db_t *db,
8398 dns_dbversion_t *version, dns_dbnode_t *node,
8401 dns_rbtdb_t *rbtdb = rdataset->private1;
8402 dns_rbtnode_t *rbtnode = rdataset->private2;
8403 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8404 unsigned int current_count = rdataset->privateuint4;
8405 rdatasetheader_t *header;
8406 unsigned int total_count, count;
8407 nodelock_t *nodelock;
8408 isc_result_t result;
8409 acachectl_t *acarray;
8410 dns_acacheentry_t *newentry, *oldentry = NULL;
8411 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8415 if (type == dns_rdatasetadditional_fromcache)
8416 return (ISC_R_SUCCESS);
8418 header = (struct rdatasetheader *)(raw - sizeof(*header));
8420 total_count = raw[0] * 256 + raw[1];
8421 INSIST(total_count > current_count);
8422 count = total_count - current_count - 1; /* should be private data */
8424 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8425 if (newcbarg == NULL)
8426 return (ISC_R_NOMEMORY);
8427 newcbarg->type = type;
8428 newcbarg->count = count;
8429 newcbarg->header = header;
8430 newcbarg->db = NULL;
8431 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8432 newcbarg->node = NULL;
8433 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8436 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8437 acache_callback, newcbarg, &newentry);
8438 if (result != ISC_R_SUCCESS)
8440 /* Set cache data in the new entry. */
8441 result = dns_acache_setentry(acache, newentry, zone, db,
8442 version, node, fname);
8443 if (result != ISC_R_SUCCESS)
8446 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8447 NODE_LOCK(nodelock, isc_rwlocktype_write);
8451 case dns_rdatasetadditional_fromauth:
8452 acarray = header->additional_auth;
8454 case dns_rdatasetadditional_fromglue:
8455 acarray = header->additional_glue;
8461 if (acarray == NULL) {
8464 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8465 sizeof(acachectl_t));
8467 if (acarray == NULL) {
8468 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8472 for (i = 0; i < total_count; i++) {
8473 acarray[i].entry = NULL;
8474 acarray[i].cbarg = NULL;
8478 case dns_rdatasetadditional_fromauth:
8479 header->additional_auth = acarray;
8481 case dns_rdatasetadditional_fromglue:
8482 header->additional_glue = acarray;
8488 if (acarray[count].entry != NULL) {
8490 * Swap the entry. Delay cleaning-up the old entry since
8491 * it would require a node lock.
8493 oldentry = acarray[count].entry;
8494 INSIST(acarray[count].cbarg != NULL);
8495 oldcbarg = acarray[count].cbarg;
8497 acarray[count].entry = newentry;
8498 acarray[count].cbarg = newcbarg;
8500 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8502 if (oldentry != NULL) {
8503 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8504 dns_acache_detachentry(&oldentry);
8507 return (ISC_R_SUCCESS);
8510 if (newcbarg != NULL) {
8511 if (newentry != NULL) {
8512 acache_cancelentry(rbtdb->common.mctx, newentry,
8514 dns_acache_detachentry(&newentry);
8516 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8517 dns_db_detach(&newcbarg->db);
8518 isc_mem_put(rbtdb->common.mctx, newcbarg,
8527 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8528 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8530 dns_rbtdb_t *rbtdb = rdataset->private1;
8531 dns_rbtnode_t *rbtnode = rdataset->private2;
8532 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8533 unsigned int current_count = rdataset->privateuint4;
8534 rdatasetheader_t *header;
8535 nodelock_t *nodelock;
8536 unsigned int total_count, count;
8537 acachectl_t *acarray;
8538 dns_acacheentry_t *entry;
8539 acache_cbarg_t *cbarg;
8541 UNUSED(qtype); /* we do not use this value at least for now */
8544 if (type == dns_rdatasetadditional_fromcache)
8545 return (ISC_R_SUCCESS);
8547 header = (struct rdatasetheader *)(raw - sizeof(*header));
8549 total_count = raw[0] * 256 + raw[1];
8550 INSIST(total_count > current_count);
8551 count = total_count - current_count - 1;
8556 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8557 NODE_LOCK(nodelock, isc_rwlocktype_write);
8560 case dns_rdatasetadditional_fromauth:
8561 acarray = header->additional_auth;
8563 case dns_rdatasetadditional_fromglue:
8564 acarray = header->additional_glue;
8570 if (acarray == NULL) {
8571 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8572 return (ISC_R_NOTFOUND);
8575 entry = acarray[count].entry;
8576 if (entry == NULL) {
8577 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8578 return (ISC_R_NOTFOUND);
8581 acarray[count].entry = NULL;
8582 cbarg = acarray[count].cbarg;
8583 acarray[count].cbarg = NULL;
8585 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8587 if (entry != NULL) {
8589 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8590 dns_acache_detachentry(&entry);
8593 return (ISC_R_SUCCESS);
8597 * Routines for LRU-based cache management.
8601 * See if a given cache entry that is being reused needs to be updated
8602 * in the LRU-list. From the LRU management point of view, this function is
8603 * expected to return true for almost all cases. When used with threads,
8604 * however, this may cause a non-negligible performance penalty because a
8605 * writer lock will have to be acquired before updating the list.
8606 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8607 * function returns true if the entry has not been updated for some period of
8608 * time. We differentiate the NS or glue address case and the others since
8609 * experiments have shown that the former tends to be accessed relatively
8610 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8611 * may cause external queries at a higher level zone, involving more
8614 * Caller must hold the node (read or write) lock.
8616 static inline isc_boolean_t
8617 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8618 if ((header->attributes &
8619 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8622 #if DNS_RBTDB_LIMITLRUUPDATE
8623 if (header->type == dns_rdatatype_ns ||
8624 (header->trust == dns_trust_glue &&
8625 (header->type == dns_rdatatype_a ||
8626 header->type == dns_rdatatype_aaaa))) {
8628 * Glue records are updated if at least 60 seconds have passed
8629 * since the previous update time.
8631 return (header->last_used + 60 <= now);
8634 /* Other records are updated if 5 minutes have passed. */
8635 return (header->last_used + 300 <= now);
8644 * Update the timestamp of a given cache entry and move it to the head
8645 * of the corresponding LRU list.
8647 * Caller must hold the node (write) lock.
8649 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8652 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8655 INSIST(IS_CACHE(rbtdb));
8657 /* To be checked: can we really assume this? XXXMLG */
8658 INSIST(ISC_LINK_LINKED(header, link));
8660 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8661 header->last_used = now;
8662 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8666 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8667 * under an overmem condition. To recover from this condition quickly, up to
8668 * 2 entries will be purged. This process is triggered while adding a new
8669 * entry, and we specifically avoid purging entries in the same LRU bucket as
8670 * the one to which the new entry will belong. Otherwise, we might purge
8671 * entries of the same name of different RR types while adding RRsets from a
8672 * single response (consider the case where we're adding A and AAAA glue records
8673 * of the same NS name).
8676 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8677 isc_stdtime_t now, isc_boolean_t tree_locked)
8679 rdatasetheader_t *header, *header_prev;
8680 unsigned int locknum;
8683 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8684 locknum != locknum_start && purgecount > 0;
8685 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8686 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8687 isc_rwlocktype_write);
8689 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8690 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8691 expire_header(rbtdb, header, tree_locked);
8695 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8696 header != NULL && purgecount > 0;
8697 header = header_prev) {
8698 header_prev = ISC_LIST_PREV(header, link);
8700 * Unlink the entry at this point to avoid checking it
8701 * again even if it's currently used someone else and
8702 * cannot be purged at this moment. This entry won't be
8703 * referenced any more (so unlinking is safe) since the
8704 * TTL was reset to 0.
8706 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8708 expire_header(rbtdb, header, tree_locked);
8712 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8713 isc_rwlocktype_write);
8718 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8719 isc_boolean_t tree_locked)
8721 set_ttl(rbtdb, header, 0);
8722 header->attributes |= RDATASET_ATTR_STALE;
8723 header->node->dirty = 1;
8726 * Caller must hold the node (write) lock.
8729 if (dns_rbtnode_refcurrent(header->node) == 0) {
8731 * If no one else is using the node, we can clean it up now.
8732 * We first need to gain a new reference to the node to meet a
8733 * requirement of decrement_reference().
8735 new_reference(rbtdb, header->node);
8736 decrement_reference(rbtdb, header->node, 0,
8737 isc_rwlocktype_write,
8738 tree_locked ? isc_rwlocktype_write :
8739 isc_rwlocktype_none, ISC_FALSE);