2 * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.270.12.16.8.3 2010/02/26 00:24:39 marka Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
65 #include <dns/zonekey.h>
67 #ifdef DNS_RBTDB_VERSION64
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
80 * Note that "impmagic" is not the first four bytes of the struct, so
81 * ISC_MAGIC_VALID cannot be used.
83 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
84 (rbtdb)->common.impmagic == RBTDB_MAGIC)
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t rbtdb_serial_t;
89 * Make casting easier in symbolic debuggers by using different names
90 * for the 64 bit version.
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
96 typedef isc_uint32_t rbtdb_serial_t;
99 typedef isc_uint32_t rbtdb_rdatatype_t;
101 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
105 #define RBTDB_RDATATYPE_SIGNSEC \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
119 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120 * Using rwlock is effective with regard to lookup performance only when
121 * it is implemented in an efficient way.
122 * Otherwise, it is generally wise to stick to the simple locking since rwlock
123 * would require more memory or can even make lookups slower due to its own
124 * overhead (when it internally calls mutex locks).
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
129 #define DNS_RBTDB_USERWLOCK 0
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
138 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t) LOCK(l)
141 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
145 * Since node locking is sensitive to both performance and memory footprint,
146 * we need some trick here. If we have both high-performance rwlock and
147 * high performance and small-memory reference counters, we use rwlock for
148 * node lock and isc_refcount for node references. In this case, we don't have
149 * to protect the access to the counters by locks.
150 * Otherwise, we simply use ordinary mutex lock for node locking, and use
151 * simple integers as reference counters which is protected by the lock.
152 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153 * NODE_UNLOCK. In some other cases, however, we need to protect reference
154 * counters first and then protect other parts of a node as read-only data.
155 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156 * provided for these special cases. When we can use the efficient backend
157 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159 * section including the access to the reference counter.
160 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161 * section is also protected by NODE_STRONGLOCK().
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
166 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t) RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
172 #define NODE_STRONGLOCK(l) ((void)0)
173 #define NODE_STRONGUNLOCK(l) ((void)0)
174 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
178 typedef isc_mutex_t nodelock_t;
180 #define NODE_INITLOCK(l) isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
182 #define NODE_LOCK(l, t) LOCK(l)
183 #define NODE_UNLOCK(l, t) UNLOCK(l)
184 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
186 #define NODE_STRONGLOCK(l) LOCK(l)
187 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t) ((void)0)
189 #define NODE_WEAKUNLOCK(l, t) ((void)0)
190 #define NODE_WEAKDOWNGRADE(l) ((void)0)
194 * Whether to rate-limit updating the LRU to avoid possible thread contention.
195 * Our performance measurement has shown the cost is marginal, so it's defined
196 * to be 0 by default either with or without threads.
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 * Allow clients with a virtual time of up to 5 minutes in the past to see
204 * records that would have otherwise have expired.
206 #define RBTDB_VIRTUAL 300
212 dns_rdatatype_t type;
215 typedef struct acachectl acachectl_t;
217 typedef struct rdatasetheader {
219 * Locked by the owning node's lock.
221 rbtdb_serial_t serial;
223 rbtdb_rdatatype_t type;
224 isc_uint16_t attributes;
226 struct noqname *noqname;
227 struct noqname *closest;
229 * We don't use the LIST macros, because the LIST structure has
230 * both head and tail pointers, and is doubly linked.
233 struct rdatasetheader *next;
235 * If this is the top header for an rdataset, 'next' points
236 * to the top header for the next rdataset (i.e., the next type).
237 * Otherwise, it points up to the header whose down pointer points
241 struct rdatasetheader *down;
243 * Points to the header for the next older version of
249 * Monotonously increased every time this rdataset is bound so that
250 * it is used as the base of the starting point in DNS responses
251 * when the "cyclic" rrset-order is required. Since the ordering
252 * should not be so crucial, no lock is set for the counter for
253 * performance reasons.
256 acachectl_t *additional_auth;
257 acachectl_t *additional_glue;
260 isc_stdtime_t last_used;
261 ISC_LINK(struct rdatasetheader) link;
263 unsigned int heap_index;
265 * Used for TTL-based cache cleaning.
267 isc_stdtime_t resign;
270 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
273 #define RDATASET_ATTR_NONEXISTENT 0x0001
274 #define RDATASET_ATTR_STALE 0x0002
275 #define RDATASET_ATTR_IGNORE 0x0004
276 #define RDATASET_ATTR_RETAIN 0x0008
277 #define RDATASET_ATTR_NXDOMAIN 0x0010
278 #define RDATASET_ATTR_RESIGN 0x0020
279 #define RDATASET_ATTR_STATCOUNT 0x0040
280 #define RDATASET_ATTR_OPTOUT 0x0080
282 typedef struct acache_cbarg {
283 dns_rdatasetadditional_t type;
287 rdatasetheader_t *header;
291 dns_acacheentry_t *entry;
292 acache_cbarg_t *cbarg;
297 * When the cache will pre-expire data (due to memory low or other
298 * situations) before the rdataset's TTL has expired, it MUST
299 * respect the RETAIN bit and not expire the data until its TTL is
303 #undef IGNORE /* WIN32 winbase.h defines this. */
305 #define EXISTS(header) \
306 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
323 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324 * There is a tradeoff issue about configuring this value: if this is too
325 * small, it may cause heavier contention between threads; if this is too large,
326 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327 * The default value should work well for most environments, but this can
328 * also be configurable at compilation time via the
329 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
330 * 1 due to the assumption of overmem_purge().
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
340 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
344 /* Protected in the refcount routines. */
345 isc_refcount_t references;
346 /* Locked by lock. */
347 isc_boolean_t exiting;
350 typedef struct rbtdb_changed {
351 dns_rbtnode_t * node;
353 ISC_LINK(struct rbtdb_changed) link;
356 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
364 typedef struct rbtdb_version {
366 rbtdb_serial_t serial;
368 * Protected in the refcount routines.
369 * XXXJT: should we change the lock policy based on the refcount
372 isc_refcount_t references;
373 /* Locked by database lock. */
374 isc_boolean_t writer;
375 isc_boolean_t commit_ok;
376 rbtdb_changedlist_t changed_list;
377 rdatasetheaderlist_t resigned_list;
378 ISC_LINK(struct rbtdb_version) link;
379 dns_db_secure_t secure;
380 isc_boolean_t havensec3;
381 /* NSEC3 parameters */
384 isc_uint16_t iterations;
385 isc_uint8_t salt_length;
386 unsigned char salt[DNS_NSEC3_SALTSIZE];
389 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
394 #if DNS_RBTDB_USERWLOCK
399 isc_rwlock_t tree_lock;
400 unsigned int node_lock_count;
401 rbtdb_nodelock_t * node_locks;
402 dns_rbtnode_t * origin_node;
403 dns_stats_t * rrsetstats; /* cache DB only */
404 /* Locked by lock. */
406 isc_refcount_t references;
407 unsigned int attributes;
408 rbtdb_serial_t current_serial;
409 rbtdb_serial_t least_serial;
410 rbtdb_serial_t next_serial;
411 rbtdb_version_t * current_version;
412 rbtdb_version_t * future_version;
413 rbtdb_versionlist_t open_versions;
414 isc_boolean_t overmem;
416 dns_dbnode_t *soanode;
417 dns_dbnode_t *nsnode;
420 * This is a linked list used to implement the LRU cache. There will
421 * be node_lock_count linked lists here. Nodes in bucket 1 will be
422 * placed on the linked list rdatasets[1].
424 rdatasetheaderlist_t *rdatasets;
427 * Temporary storage for stale cache nodes and dynamically deleted
428 * nodes that await being cleaned up.
430 rbtnodelist_t *deadnodes;
433 * Heaps. Each of these is used for TTL based expiry.
437 /* Locked by tree_lock. */
442 unsigned int quantum;
445 #define RBTDB_ATTR_LOADED 0x01
446 #define RBTDB_ATTR_LOADING 0x02
453 rbtdb_version_t * rbtversion;
454 rbtdb_serial_t serial;
455 unsigned int options;
456 dns_rbtnodechain_t chain;
457 isc_boolean_t copy_name;
458 isc_boolean_t need_cleanup;
460 dns_rbtnode_t * zonecut;
461 rdatasetheader_t * zonecut_rdataset;
462 rdatasetheader_t * zonecut_sigrdataset;
463 dns_fixedname_t zonecut_name;
475 static void rdataset_disassociate(dns_rdataset_t *rdataset);
476 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
477 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
478 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
479 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
480 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
481 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
484 dns_rdataset_t *negsig);
485 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
488 dns_rdataset_t *negsig);
489 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
490 dns_rdatasetadditional_t type,
491 dns_rdatatype_t qtype,
492 dns_acache_t *acache,
495 dns_dbversion_t **versionp,
496 dns_dbnode_t **nodep,
500 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
501 dns_rdatasetadditional_t type,
502 dns_rdatatype_t qtype,
503 dns_acache_t *acache,
506 dns_dbversion_t *version,
509 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
510 dns_rdataset_t *rdataset,
511 dns_rdatasetadditional_t type,
512 dns_rdatatype_t qtype);
513 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
515 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
517 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
518 isc_boolean_t tree_locked);
519 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
520 isc_stdtime_t now, isc_boolean_t tree_locked);
521 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
522 rdatasetheader_t *newheader);
523 static void prune_tree(isc_task_t *task, isc_event_t *event);
524 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
525 static void rdataset_expire(dns_rdataset_t *rdataset);
527 static dns_rdatasetmethods_t rdataset_methods = {
528 rdataset_disassociate,
538 rdataset_getadditional,
539 rdataset_setadditional,
540 rdataset_putadditional,
545 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
546 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
547 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
548 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
549 dns_rdataset_t *rdataset);
551 static dns_rdatasetitermethods_t rdatasetiter_methods = {
552 rdatasetiter_destroy,
558 typedef struct rbtdb_rdatasetiter {
559 dns_rdatasetiter_t common;
560 rdatasetheader_t * current;
561 } rbtdb_rdatasetiter_t;
563 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
564 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
565 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
566 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
568 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
569 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
570 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
571 dns_dbnode_t **nodep,
573 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
574 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
577 static dns_dbiteratormethods_t dbiterator_methods = {
589 #define DELETION_BATCH_MAX 64
592 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
594 typedef struct rbtdb_dbiterator {
595 dns_dbiterator_t common;
596 isc_boolean_t paused;
597 isc_boolean_t new_origin;
598 isc_rwlocktype_t tree_locked;
600 dns_fixedname_t name;
601 dns_fixedname_t origin;
602 dns_rbtnodechain_t chain;
603 dns_rbtnodechain_t nsec3chain;
604 dns_rbtnodechain_t *current;
606 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
608 isc_boolean_t nsec3only;
609 isc_boolean_t nonsec3;
610 } rbtdb_dbiterator_t;
613 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
614 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
616 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
618 static void overmem(dns_db_t *db, isc_boolean_t overmem);
619 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
620 isc_boolean_t *nsec3createflag);
623 * 'init_count' is used to initialize 'newheader->count' which inturn
624 * is used to determine where in the cycle rrset-order cyclic starts.
625 * We don't lock this as we don't care about simultaneous updates.
628 * Both init_count and header->count can be ISC_UINT32_MAX.
629 * The count on the returned rdataset however can't be as
630 * that indicates that the database does not implement cyclic
633 static unsigned int init_count;
638 * If a routine is going to lock more than one lock in this module, then
639 * the locking must be done in the following order:
643 * Node Lock (Only one from the set may be locked at one time by
648 * Failure to follow this hierarchy can result in deadlock.
654 * For zone databases the node for the origin of the zone MUST NOT be deleted.
663 attach(dns_db_t *source, dns_db_t **targetp) {
664 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
666 REQUIRE(VALID_RBTDB(rbtdb));
668 isc_refcount_increment(&rbtdb->references, NULL);
674 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
675 dns_rbtdb_t *rbtdb = event->ev_arg;
679 free_rbtdb(rbtdb, ISC_TRUE, event);
683 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
684 isc_boolean_t increment)
686 dns_rdatastatstype_t statattributes = 0;
687 dns_rdatastatstype_t base = 0;
688 dns_rdatastatstype_t type;
690 /* At the moment we count statistics only for cache DB */
691 INSIST(IS_CACHE(rbtdb));
693 if (NXDOMAIN(header))
694 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
695 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
696 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
697 base = RBTDB_RDATATYPE_EXT(header->type);
699 base = RBTDB_RDATATYPE_BASE(header->type);
701 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
703 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
705 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
709 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
714 oldttl = header->rdh_ttl;
715 header->rdh_ttl = newttl;
717 if (!IS_CACHE(rbtdb))
721 * It's possible the rbtdb is not a cache. If this is the case,
722 * we will not have a heap, and we move on. If we do, though,
723 * we might need to adjust things.
725 if (header->heap_index == 0 || newttl == oldttl)
727 idx = header->node->locknum;
728 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
730 heap = rbtdb->heaps[idx];
733 isc_heap_increased(heap, header->heap_index);
735 isc_heap_decreased(heap, header->heap_index);
739 * These functions allow the heap code to rank the priority of each
740 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
743 ttl_sooner(void *v1, void *v2) {
744 rdatasetheader_t *h1 = v1;
745 rdatasetheader_t *h2 = v2;
747 if (h1->rdh_ttl < h2->rdh_ttl)
753 resign_sooner(void *v1, void *v2) {
754 rdatasetheader_t *h1 = v1;
755 rdatasetheader_t *h2 = v2;
757 if (h1->resign < h2->resign)
763 * This function sets the heap index into the header.
766 set_index(void *what, unsigned int index) {
767 rdatasetheader_t *h = what;
769 h->heap_index = index;
773 * Work out how many nodes can be deleted in the time between two
774 * requests to the nameserver. Smooth the resulting number and use it
775 * as a estimate for the number of nodes to be deleted in the next
779 adjust_quantum(unsigned int old, isc_time_t *start) {
780 unsigned int pps = dns_pps; /* packets per second */
781 unsigned int interval;
790 interval = 1000000 / pps; /* interval in usec */
793 usecs = isc_time_microdiff(&end, start);
796 * We were unable to measure the amount of time taken.
797 * Double the nodes deleted next time.
804 new = old * interval;
805 new /= (unsigned int)usecs;
812 new = (new + old * 3) / 4;
814 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
815 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
821 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
823 isc_ondestroy_t ondest;
825 char buf[DNS_NAME_FORMATSIZE];
828 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
829 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
831 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
832 REQUIRE(rbtdb->future_version == NULL);
834 if (rbtdb->current_version != NULL) {
837 isc_refcount_decrement(&rbtdb->current_version->references,
840 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
841 isc_refcount_destroy(&rbtdb->current_version->references);
842 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
843 sizeof(rbtdb_version_t));
847 * We assume the number of remaining dead nodes is reasonably small;
848 * the overhead of unlinking all nodes here should be negligible.
850 for (i = 0; i < rbtdb->node_lock_count; i++) {
853 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
854 while (node != NULL) {
855 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
856 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
861 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
863 if (rbtdb->tree != NULL) {
864 isc_time_now(&start);
865 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
866 if (result == ISC_R_QUOTA) {
867 INSIST(rbtdb->task != NULL);
868 if (rbtdb->quantum != 0)
869 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
872 event = isc_event_allocate(rbtdb->common.mctx,
874 DNS_EVENT_FREESTORAGE,
877 sizeof(isc_event_t));
880 isc_task_send(rbtdb->task, &event);
883 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
886 if (rbtdb->nsec3 != NULL) {
887 isc_time_now(&start);
888 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
889 if (result == ISC_R_QUOTA) {
890 INSIST(rbtdb->task != NULL);
891 if (rbtdb->quantum != 0)
892 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
895 event = isc_event_allocate(rbtdb->common.mctx,
897 DNS_EVENT_FREESTORAGE,
900 sizeof(isc_event_t));
903 isc_task_send(rbtdb->task, &event);
906 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
910 isc_event_free(&event);
912 if (dns_name_dynamic(&rbtdb->common.origin))
913 dns_name_format(&rbtdb->common.origin, buf,
916 strcpy(buf, "<UNKNOWN>");
917 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
918 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
919 "done free_rbtdb(%s)", buf);
921 if (dns_name_dynamic(&rbtdb->common.origin))
922 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
923 for (i = 0; i < rbtdb->node_lock_count; i++) {
924 isc_refcount_destroy(&rbtdb->node_locks[i].references);
925 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
929 * Clean up LRU / re-signing order lists.
931 if (rbtdb->rdatasets != NULL) {
932 for (i = 0; i < rbtdb->node_lock_count; i++)
933 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
934 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
935 rbtdb->node_lock_count *
936 sizeof(rdatasetheaderlist_t));
939 * Clean up dead node buckets.
941 if (rbtdb->deadnodes != NULL) {
942 for (i = 0; i < rbtdb->node_lock_count; i++)
943 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
944 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
945 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
948 * Clean up heap objects.
950 if (rbtdb->heaps != NULL) {
951 for (i = 0; i < rbtdb->node_lock_count; i++)
952 isc_heap_destroy(&rbtdb->heaps[i]);
953 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
954 rbtdb->node_lock_count *
955 sizeof(isc_heap_t *));
958 if (rbtdb->rrsetstats != NULL)
959 dns_stats_detach(&rbtdb->rrsetstats);
961 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
962 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
963 isc_rwlock_destroy(&rbtdb->tree_lock);
964 isc_refcount_destroy(&rbtdb->references);
965 if (rbtdb->task != NULL)
966 isc_task_detach(&rbtdb->task);
968 RBTDB_DESTROYLOCK(&rbtdb->lock);
969 rbtdb->common.magic = 0;
970 rbtdb->common.impmagic = 0;
971 ondest = rbtdb->common.ondest;
972 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
973 isc_ondestroy_notify(&ondest, rbtdb);
977 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
978 isc_boolean_t want_free = ISC_FALSE;
980 unsigned int inactive = 0;
982 /* XXX check for open versions here */
984 if (rbtdb->soanode != NULL)
985 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
986 if (rbtdb->nsnode != NULL)
987 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
990 * Even though there are no external direct references, there still
991 * may be nodes in use.
993 for (i = 0; i < rbtdb->node_lock_count; i++) {
994 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
995 rbtdb->node_locks[i].exiting = ISC_TRUE;
996 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
997 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1003 if (inactive != 0) {
1004 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1005 rbtdb->active -= inactive;
1006 if (rbtdb->active == 0)
1007 want_free = ISC_TRUE;
1008 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1010 char buf[DNS_NAME_FORMATSIZE];
1011 if (dns_name_dynamic(&rbtdb->common.origin))
1012 dns_name_format(&rbtdb->common.origin, buf,
1015 strcpy(buf, "<UNKNOWN>");
1016 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1017 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1018 "calling free_rbtdb(%s)", buf);
1019 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1025 detach(dns_db_t **dbp) {
1026 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1029 REQUIRE(VALID_RBTDB(rbtdb));
1031 isc_refcount_decrement(&rbtdb->references, &refs);
1034 maybe_free_rbtdb(rbtdb);
1040 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1041 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1042 rbtdb_version_t *version;
1045 REQUIRE(VALID_RBTDB(rbtdb));
1047 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1048 version = rbtdb->current_version;
1049 isc_refcount_increment(&version->references, &refs);
1050 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1052 *versionp = (dns_dbversion_t *)version;
1055 static inline rbtdb_version_t *
1056 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1057 unsigned int references, isc_boolean_t writer)
1059 isc_result_t result;
1060 rbtdb_version_t *version;
1062 version = isc_mem_get(mctx, sizeof(*version));
1063 if (version == NULL)
1065 version->serial = serial;
1066 result = isc_refcount_init(&version->references, references);
1067 if (result != ISC_R_SUCCESS) {
1068 isc_mem_put(mctx, version, sizeof(*version));
1071 version->writer = writer;
1072 version->commit_ok = ISC_FALSE;
1073 ISC_LIST_INIT(version->changed_list);
1074 ISC_LIST_INIT(version->resigned_list);
1075 ISC_LINK_INIT(version, link);
1081 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1082 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1083 rbtdb_version_t *version;
1085 REQUIRE(VALID_RBTDB(rbtdb));
1086 REQUIRE(versionp != NULL && *versionp == NULL);
1087 REQUIRE(rbtdb->future_version == NULL);
1089 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1090 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1091 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1093 if (version != NULL) {
1094 version->commit_ok = ISC_TRUE;
1095 version->secure = rbtdb->current_version->secure;
1096 version->havensec3 = rbtdb->current_version->havensec3;
1097 if (version->havensec3) {
1098 version->flags = rbtdb->current_version->flags;
1099 version->iterations =
1100 rbtdb->current_version->iterations;
1101 version->hash = rbtdb->current_version->hash;
1102 version->salt_length =
1103 rbtdb->current_version->salt_length;
1104 memcpy(version->salt, rbtdb->current_version->salt,
1105 version->salt_length);
1108 version->iterations = 0;
1110 version->salt_length = 0;
1111 memset(version->salt, 0, sizeof(version->salt));
1113 rbtdb->next_serial++;
1114 rbtdb->future_version = version;
1116 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1118 if (version == NULL)
1119 return (ISC_R_NOMEMORY);
1121 *versionp = version;
1123 return (ISC_R_SUCCESS);
1127 attachversion(dns_db_t *db, dns_dbversion_t *source,
1128 dns_dbversion_t **targetp)
1130 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1131 rbtdb_version_t *rbtversion = source;
1134 REQUIRE(VALID_RBTDB(rbtdb));
1136 isc_refcount_increment(&rbtversion->references, &refs);
1139 *targetp = rbtversion;
1142 static rbtdb_changed_t *
1143 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1144 dns_rbtnode_t *node)
1146 rbtdb_changed_t *changed;
1150 * Caller must be holding the node lock if its reference must be
1151 * protected by the lock.
1154 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1156 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1158 REQUIRE(version->writer);
1160 if (changed != NULL) {
1161 dns_rbtnode_refincrement(node, &refs);
1163 changed->node = node;
1164 changed->dirty = ISC_FALSE;
1165 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1167 version->commit_ok = ISC_FALSE;
1169 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1175 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1180 unsigned char *raw; /* RDATASLAB */
1183 * The caller must be holding the corresponding node lock.
1189 raw = (unsigned char *)header + sizeof(*header);
1190 count = raw[0] * 256 + raw[1];
1193 * Sanity check: since an additional cache entry has a reference to
1194 * the original DB node (in the callback arg), there should be no
1195 * acache entries when the node can be freed.
1197 for (i = 0; i < count; i++)
1198 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1200 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1204 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1206 if (dns_name_dynamic(&(*noqname)->name))
1207 dns_name_free(&(*noqname)->name, mctx);
1208 if ((*noqname)->neg != NULL)
1209 isc_mem_put(mctx, (*noqname)->neg,
1210 dns_rdataslab_size((*noqname)->neg, 0));
1211 if ((*noqname)->negsig != NULL)
1212 isc_mem_put(mctx, (*noqname)->negsig,
1213 dns_rdataslab_size((*noqname)->negsig, 0));
1214 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1219 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1221 ISC_LINK_INIT(h, link);
1225 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1226 fprintf(stderr, "initialized header: %p\n", h);
1232 static inline rdatasetheader_t *
1233 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1235 rdatasetheader_t *h;
1237 h = isc_mem_get(mctx, sizeof(*h));
1242 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1243 fprintf(stderr, "allocated header: %p\n", h);
1245 init_rdataset(rbtdb, h);
1250 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1255 if (EXISTS(rdataset) &&
1256 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1257 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1260 idx = rdataset->node->locknum;
1261 if (ISC_LINK_LINKED(rdataset, link)) {
1262 INSIST(IS_CACHE(rbtdb));
1263 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1265 if (rdataset->heap_index != 0)
1266 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1267 rdataset->heap_index = 0;
1269 if (rdataset->noqname != NULL)
1270 free_noqname(mctx, &rdataset->noqname);
1271 if (rdataset->closest != NULL)
1272 free_noqname(mctx, &rdataset->closest);
1274 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1275 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1277 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1278 size = sizeof(*rdataset);
1280 size = dns_rdataslab_size((unsigned char *)rdataset,
1282 isc_mem_put(mctx, rdataset, size);
1286 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1287 rdatasetheader_t *header, *dcurrent;
1288 isc_boolean_t make_dirty = ISC_FALSE;
1291 * Caller must hold the node lock.
1295 * We set the IGNORE attribute on rdatasets with serial number
1296 * 'serial'. When the reference count goes to zero, these rdatasets
1297 * will be cleaned up; until that time, they will be ignored.
1299 for (header = node->data; header != NULL; header = header->next) {
1300 if (header->serial == serial) {
1301 header->attributes |= RDATASET_ATTR_IGNORE;
1302 make_dirty = ISC_TRUE;
1304 for (dcurrent = header->down;
1306 dcurrent = dcurrent->down) {
1307 if (dcurrent->serial == serial) {
1308 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1309 make_dirty = ISC_TRUE;
1318 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1320 rdatasetheader_t *d, *down_next;
1322 for (d = top->down; d != NULL; d = down_next) {
1323 down_next = d->down;
1324 free_rdataset(rbtdb, mctx, d);
1330 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1331 rdatasetheader_t *current, *top_prev, *top_next;
1332 isc_mem_t *mctx = rbtdb->common.mctx;
1335 * Caller must be holding the node lock.
1339 for (current = node->data; current != NULL; current = top_next) {
1340 top_next = current->next;
1341 clean_stale_headers(rbtdb, mctx, current);
1343 * If current is nonexistent or stale, we can clean it up.
1345 if ((current->attributes &
1346 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1347 if (top_prev != NULL)
1348 top_prev->next = current->next;
1350 node->data = current->next;
1351 free_rdataset(rbtdb, mctx, current);
1359 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1360 rbtdb_serial_t least_serial)
1362 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1363 rdatasetheader_t *top_prev, *top_next;
1364 isc_mem_t *mctx = rbtdb->common.mctx;
1365 isc_boolean_t still_dirty = ISC_FALSE;
1368 * Caller must be holding the node lock.
1370 REQUIRE(least_serial != 0);
1373 for (current = node->data; current != NULL; current = top_next) {
1374 top_next = current->next;
1377 * First, we clean up any instances of multiple rdatasets
1378 * with the same serial number, or that have the IGNORE
1382 for (dcurrent = current->down;
1384 dcurrent = down_next) {
1385 down_next = dcurrent->down;
1386 INSIST(dcurrent->serial <= dparent->serial);
1387 if (dcurrent->serial == dparent->serial ||
1389 if (down_next != NULL)
1390 down_next->next = dparent;
1391 dparent->down = down_next;
1392 free_rdataset(rbtdb, mctx, dcurrent);
1398 * We've now eliminated all IGNORE datasets with the possible
1399 * exception of current, which we now check.
1401 if (IGNORE(current)) {
1402 down_next = current->down;
1403 if (down_next == NULL) {
1404 if (top_prev != NULL)
1405 top_prev->next = current->next;
1407 node->data = current->next;
1408 free_rdataset(rbtdb, mctx, current);
1410 * current no longer exists, so we can
1411 * just continue with the loop.
1416 * Pull up current->down, making it the new
1419 if (top_prev != NULL)
1420 top_prev->next = down_next;
1422 node->data = down_next;
1423 down_next->next = top_next;
1424 free_rdataset(rbtdb, mctx, current);
1425 current = down_next;
1430 * We now try to find the first down node less than the
1434 for (dcurrent = current->down;
1436 dcurrent = down_next) {
1437 down_next = dcurrent->down;
1438 if (dcurrent->serial < least_serial)
1444 * If there is a such an rdataset, delete it and any older
1447 if (dcurrent != NULL) {
1449 down_next = dcurrent->down;
1450 INSIST(dcurrent->serial <= least_serial);
1451 free_rdataset(rbtdb, mctx, dcurrent);
1452 dcurrent = down_next;
1453 } while (dcurrent != NULL);
1454 dparent->down = NULL;
1458 * Note. The serial number of 'current' might be less than
1459 * least_serial too, but we cannot delete it because it is
1460 * the most recent version, unless it is a NONEXISTENT
1463 if (current->down != NULL) {
1464 still_dirty = ISC_TRUE;
1468 * If this is a NONEXISTENT rdataset, we can delete it.
1470 if (NONEXISTENT(current)) {
1471 if (top_prev != NULL)
1472 top_prev->next = current->next;
1474 node->data = current->next;
1475 free_rdataset(rbtdb, mctx, current);
1485 * Clean up dead nodes. These are nodes which have no references, and
1486 * have no data. They are dead but we could not or chose not to delete
1487 * them when we deleted all the data at that node because we did not want
1488 * to wait for the tree write lock.
1490 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1493 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1494 dns_rbtnode_t *node;
1495 isc_result_t result;
1496 int count = 10; /* XXXJT: should be adjustable */
1498 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1499 while (node != NULL && count > 0) {
1500 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1503 * Since we're holding a tree write lock, it should be
1504 * impossible for this node to be referenced by others.
1506 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1507 node->data == NULL);
1509 INSIST(!ISC_LINK_LINKED(node, deadlink));
1511 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1514 result = dns_rbt_deletenode(rbtdb->tree, node,
1516 if (result != ISC_R_SUCCESS)
1517 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1518 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1519 "cleanup_dead_nodes: "
1520 "dns_rbt_deletenode: %s",
1521 isc_result_totext(result));
1522 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1528 * Caller must be holding the node lock if its reference must be protected
1532 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1533 unsigned int lockrefs, noderefs;
1534 isc_refcount_t *lockref;
1536 dns_rbtnode_refincrement0(node, &noderefs);
1537 if (noderefs == 1) { /* this is the first reference to the node */
1538 lockref = &rbtdb->node_locks[node->locknum].references;
1539 isc_refcount_increment0(lockref, &lockrefs);
1540 INSIST(lockrefs != 0);
1542 INSIST(noderefs != 0);
1546 * This function is assumed to be called when a node is newly referenced
1547 * and can be in the deadnode list. In that case the node must be retrieved
1548 * from the list because it is going to be used. In addition, if the caller
1549 * happens to hold a write lock on the tree, it's a good chance to purge dead
1551 * Note: while a new reference is gained in multiple places, there are only very
1552 * few cases where the node can be in the deadnode list (only empty nodes can
1553 * have been added to the list).
1556 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1557 isc_rwlocktype_t treelocktype)
1559 isc_boolean_t need_relock = ISC_FALSE;
1561 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1562 new_reference(rbtdb, node);
1564 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1565 isc_rwlocktype_read);
1566 if (ISC_LINK_LINKED(node, deadlink))
1567 need_relock = ISC_TRUE;
1568 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1569 treelocktype == isc_rwlocktype_write)
1570 need_relock = ISC_TRUE;
1571 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1572 isc_rwlocktype_read);
1574 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1575 isc_rwlocktype_write);
1576 if (ISC_LINK_LINKED(node, deadlink))
1577 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1579 if (treelocktype == isc_rwlocktype_write)
1580 cleanup_dead_nodes(rbtdb, node->locknum);
1581 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1582 isc_rwlocktype_write);
1585 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1589 * Caller must be holding the node lock; either the "strong", read or write
1590 * lock. Note that the lock must be held even when node references are
1591 * atomically modified; in that case the decrement operation itself does not
1592 * have to be protected, but we must avoid a race condition where multiple
1593 * threads are decreasing the reference to zero simultaneously and at least
1594 * one of them is going to free the node.
1595 * This function returns ISC_TRUE if and only if the node reference decreases
1598 static isc_boolean_t
1599 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1600 rbtdb_serial_t least_serial,
1601 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1602 isc_boolean_t pruning)
1604 isc_result_t result;
1605 isc_boolean_t write_locked;
1606 rbtdb_nodelock_t *nodelock;
1607 unsigned int refs, nrefs;
1608 int bucket = node->locknum;
1609 isc_boolean_t no_reference;
1611 nodelock = &rbtdb->node_locks[bucket];
1613 /* Handle easy and typical case first. */
1614 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1615 dns_rbtnode_refdecrement(node, &nrefs);
1616 INSIST((int)nrefs >= 0);
1618 isc_refcount_decrement(&nodelock->references, &refs);
1619 INSIST((int)refs >= 0);
1621 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1624 /* Upgrade the lock? */
1625 if (nlock == isc_rwlocktype_read) {
1626 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1627 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1629 dns_rbtnode_refdecrement(node, &nrefs);
1630 INSIST((int)nrefs >= 0);
1632 /* Restore the lock? */
1633 if (nlock == isc_rwlocktype_read)
1634 NODE_WEAKDOWNGRADE(&nodelock->lock);
1638 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1639 if (IS_CACHE(rbtdb))
1640 clean_cache_node(rbtdb, node);
1642 if (least_serial == 0) {
1644 * Caller doesn't know the least serial.
1647 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1648 least_serial = rbtdb->least_serial;
1649 RBTDB_UNLOCK(&rbtdb->lock,
1650 isc_rwlocktype_read);
1652 clean_zone_node(rbtdb, node, least_serial);
1656 isc_refcount_decrement(&nodelock->references, &refs);
1657 INSIST((int)refs >= 0);
1660 * XXXDCL should this only be done for cache zones?
1662 if (node->data != NULL || node->down != NULL) {
1663 /* Restore the lock? */
1664 if (nlock == isc_rwlocktype_read)
1665 NODE_WEAKDOWNGRADE(&nodelock->lock);
1670 * Attempt to switch to a write lock on the tree. If this fails,
1671 * we will add this node to a linked list of nodes in this locking
1672 * bucket which we will free later.
1674 if (tlock != isc_rwlocktype_write) {
1676 * Locking hierarchy notwithstanding, we don't need to free
1677 * the node lock before acquiring the tree write lock because
1678 * we only do a trylock.
1680 if (tlock == isc_rwlocktype_read)
1681 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1683 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1684 isc_rwlocktype_write);
1685 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1686 result == ISC_R_LOCKBUSY);
1688 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1690 write_locked = ISC_TRUE;
1692 no_reference = ISC_TRUE;
1693 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1695 * We can now delete the node if the reference counter is
1696 * zero. This should be typically the case, but a different
1697 * thread may still gain a (new) reference just before the
1698 * current thread locks the tree (e.g., in findnode()).
1702 * If this node is the only one in the level it's in, deleting
1703 * this node may recursively make its parent the only node in
1704 * the parent level; if so, and if no one is currently using
1705 * the parent node, this is almost the only opportunity to
1706 * clean it up. But the recursive cleanup is not that trivial
1707 * since the child and parent may be in different lock buckets,
1708 * which would cause a lock order reversal problem. To avoid
1709 * the trouble, we'll dispatch a separate event for batch
1710 * cleaning. We need to check whether we're deleting the node
1711 * as a result of pruning to avoid infinite dispatching.
1712 * Note: pruning happens only when a task has been set for the
1713 * rbtdb. If the user of the rbtdb chooses not to set a task,
1714 * it's their responsibility to purge stale leaves (e.g. by
1715 * periodic walk-through).
1717 if (!pruning && node->parent != NULL &&
1718 node->parent->down == node && node->left == NULL &&
1719 node->right == NULL && rbtdb->task != NULL) {
1723 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1726 sizeof(isc_event_t));
1728 new_reference(rbtdb, node);
1730 attach((dns_db_t *)rbtdb, &db);
1732 isc_task_send(rbtdb->task, &ev);
1733 no_reference = ISC_FALSE;
1736 * XXX: this is a weird situation. We could
1737 * ignore this error case, but then the stale
1738 * node will unlikely be purged except via a
1739 * rare condition such as manual cleanup. So
1740 * we queue it in the deadnodes list, hoping
1741 * the memory shortage is temporary and the node
1742 * will be deleted later.
1744 isc_log_write(dns_lctx,
1745 DNS_LOGCATEGORY_DATABASE,
1746 DNS_LOGMODULE_CACHE,
1748 "decrement_reference: failed to "
1749 "allocate pruning event");
1750 INSIST(!ISC_LINK_LINKED(node, deadlink));
1751 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1755 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1756 char printname[DNS_NAME_FORMATSIZE];
1758 isc_log_write(dns_lctx,
1759 DNS_LOGCATEGORY_DATABASE,
1760 DNS_LOGMODULE_CACHE,
1762 "decrement_reference: "
1763 "delete from rbt: %p %s",
1765 dns_rbt_formatnodename(node,
1767 sizeof(printname)));
1770 INSIST(!ISC_LINK_LINKED(node, deadlink));
1772 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1775 result = dns_rbt_deletenode(rbtdb->tree, node,
1777 if (result != ISC_R_SUCCESS) {
1778 isc_log_write(dns_lctx,
1779 DNS_LOGCATEGORY_DATABASE,
1780 DNS_LOGMODULE_CACHE,
1782 "decrement_reference: "
1783 "dns_rbt_deletenode: %s",
1784 isc_result_totext(result));
1787 } else if (dns_rbtnode_refcurrent(node) == 0) {
1788 INSIST(!ISC_LINK_LINKED(node, deadlink));
1789 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1791 no_reference = ISC_FALSE;
1793 /* Restore the lock? */
1794 if (nlock == isc_rwlocktype_read)
1795 NODE_WEAKDOWNGRADE(&nodelock->lock);
1798 * Relock a read lock, or unlock the write lock if no lock was held.
1800 if (tlock == isc_rwlocktype_none)
1802 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1804 if (tlock == isc_rwlocktype_read)
1806 isc_rwlock_downgrade(&rbtdb->tree_lock);
1808 return (no_reference);
1812 * Prune the tree by recursively cleaning-up single leaves. In the worst
1813 * case, the number of iteration is the number of tree levels, which is at
1814 * most the maximum number of domain name labels, i.e, 127. In practice, this
1815 * should be much smaller (only a few times), and even the worst case would be
1816 * acceptable for a single event.
1819 prune_tree(isc_task_t *task, isc_event_t *event) {
1820 dns_rbtdb_t *rbtdb = event->ev_sender;
1821 dns_rbtnode_t *node = event->ev_arg;
1822 dns_rbtnode_t *parent;
1823 unsigned int locknum;
1827 isc_event_free(&event);
1829 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1830 locknum = node->locknum;
1831 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1833 parent = node->parent;
1834 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1835 isc_rwlocktype_write, ISC_TRUE);
1837 if (parent != NULL && parent->down == NULL) {
1839 * node was the only down child of the parent and has
1840 * just been removed. We'll then need to examine the
1841 * parent. Keep the lock if possible; otherwise,
1842 * release the old lock and acquire one for the parent.
1844 if (parent->locknum != locknum) {
1845 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1846 isc_rwlocktype_write);
1847 locknum = parent->locknum;
1848 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1849 isc_rwlocktype_write);
1853 * We need to gain a reference to the node before
1854 * decrementing it in the next iteration. In addition,
1855 * if the node is in the dead-nodes list, extract it
1856 * from the list beforehand as we do in
1857 * reactivate_node().
1859 new_reference(rbtdb, parent);
1860 if (ISC_LINK_LINKED(parent, deadlink)) {
1861 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1868 } while (node != NULL);
1869 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1870 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1872 detach((dns_db_t **)&rbtdb);
1876 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1877 rbtdb_changedlist_t *cleanup_list)
1880 * Caller must be holding the database lock.
1883 rbtdb->least_serial = version->serial;
1884 *cleanup_list = version->changed_list;
1885 ISC_LIST_INIT(version->changed_list);
1889 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1890 rbtdb_changed_t *changed, *next_changed;
1893 * If the changed record is dirty, then
1894 * an update created multiple versions of
1895 * a given rdataset. We keep this list
1896 * until we're the least open version, at
1897 * which point it's safe to get rid of any
1900 * If the changed record isn't dirty, then
1901 * we don't need it anymore since we're
1902 * committing and not rolling back.
1904 * The caller must be holding the database lock.
1906 for (changed = HEAD(version->changed_list);
1908 changed = next_changed) {
1909 next_changed = NEXT(changed, link);
1910 if (!changed->dirty) {
1911 UNLINK(version->changed_list,
1913 APPEND(*cleanup_list,
1920 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1921 dns_rdataset_t keyset;
1922 dns_rdataset_t nsecset, signsecset;
1923 dns_rdata_t rdata = DNS_RDATA_INIT;
1924 isc_boolean_t haszonekey = ISC_FALSE;
1925 isc_boolean_t hasnsec = ISC_FALSE;
1926 isc_boolean_t hasoptbit = ISC_FALSE;
1927 isc_boolean_t nsec3createflag = ISC_FALSE;
1928 isc_result_t result;
1930 dns_rdataset_init(&keyset);
1931 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1932 0, 0, &keyset, NULL);
1933 if (result == ISC_R_SUCCESS) {
1934 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1935 result = dns_rdataset_first(&keyset);
1936 while (result == ISC_R_SUCCESS) {
1937 dns_rdataset_current(&keyset, &keyrdata);
1938 if (dns_zonekey_iszonekey(&keyrdata)) {
1939 haszonekey = ISC_TRUE;
1942 result = dns_rdataset_next(&keyset);
1944 dns_rdataset_disassociate(&keyset);
1947 version->secure = dns_db_insecure;
1948 version->havensec3 = ISC_FALSE;
1952 dns_rdataset_init(&nsecset);
1953 dns_rdataset_init(&signsecset);
1954 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1955 0, 0, &nsecset, &signsecset);
1956 if (result == ISC_R_SUCCESS) {
1957 if (dns_rdataset_isassociated(&signsecset)) {
1959 result = dns_rdataset_first(&nsecset);
1960 if (result == ISC_R_SUCCESS) {
1961 dns_rdataset_current(&nsecset, &rdata);
1962 hasoptbit = dns_nsec_typepresent(&rdata,
1965 dns_rdataset_disassociate(&signsecset);
1967 dns_rdataset_disassociate(&nsecset);
1970 setnsec3parameters(db, version, &nsec3createflag);
1973 * Do we have a valid NSEC/NSEC3 chain?
1975 if (version->havensec3 || (hasnsec && !hasoptbit))
1976 version->secure = dns_db_secure;
1978 * Do we have a NSEC/NSEC3 chain under creation?
1980 else if (hasoptbit || nsec3createflag)
1981 version->secure = dns_db_partial;
1983 version->secure = dns_db_insecure;
1987 * Walk the origin node looking for NSEC3PARAM records.
1988 * Cache the nsec3 parameters.
1991 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1992 isc_boolean_t *nsec3createflag)
1994 dns_rbtnode_t *node;
1995 dns_rdata_nsec3param_t nsec3param;
1996 dns_rdata_t rdata = DNS_RDATA_INIT;
1997 isc_region_t region;
1998 isc_result_t result;
1999 rdatasetheader_t *header, *header_next;
2000 unsigned char *raw; /* RDATASLAB */
2001 unsigned int count, length;
2002 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2004 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2005 version->havensec3 = ISC_FALSE;
2006 node = rbtdb->origin_node;
2007 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2008 isc_rwlocktype_read);
2009 for (header = node->data;
2011 header = header_next) {
2012 header_next = header->next;
2014 if (header->serial <= version->serial &&
2016 if (NONEXISTENT(header))
2020 header = header->down;
2021 } while (header != NULL);
2023 if (header != NULL &&
2024 header->type == dns_rdatatype_nsec3param) {
2026 * Find A NSEC3PARAM with a supported algorithm.
2028 raw = (unsigned char *)header + sizeof(*header);
2029 count = raw[0] * 256 + raw[1]; /* count */
2030 #if DNS_RDATASET_FIXED
2031 raw += count * 4 + 2;
2035 while (count-- > 0U) {
2036 length = raw[0] * 256 + raw[1];
2037 #if DNS_RDATASET_FIXED
2043 region.length = length;
2045 dns_rdata_fromregion(&rdata,
2046 rbtdb->common.rdclass,
2047 dns_rdatatype_nsec3param,
2049 result = dns_rdata_tostruct(&rdata,
2052 INSIST(result == ISC_R_SUCCESS);
2053 dns_rdata_reset(&rdata);
2055 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2056 !dns_nsec3_supportedhash(nsec3param.hash))
2059 #ifdef RFC5155_STRICT
2060 if (nsec3param.flags != 0)
2063 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2065 *nsec3createflag = ISC_TRUE;
2066 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2071 memcpy(version->salt, nsec3param.salt,
2072 nsec3param.salt_length);
2073 version->hash = nsec3param.hash;
2074 version->salt_length = nsec3param.salt_length;
2075 version->iterations = nsec3param.iterations;
2076 version->flags = nsec3param.flags;
2077 version->havensec3 = ISC_TRUE;
2079 * Look for a better algorithm than the
2080 * unknown test algorithm.
2082 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2088 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2089 isc_rwlocktype_read);
2090 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2094 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2095 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2096 rbtdb_version_t *version, *cleanup_version, *least_greater;
2097 isc_boolean_t rollback = ISC_FALSE;
2098 rbtdb_changedlist_t cleanup_list;
2099 rdatasetheaderlist_t resigned_list;
2100 rbtdb_changed_t *changed, *next_changed;
2101 rbtdb_serial_t serial, least_serial;
2102 dns_rbtnode_t *rbtnode;
2104 rdatasetheader_t *header;
2105 isc_boolean_t writer;
2107 REQUIRE(VALID_RBTDB(rbtdb));
2108 version = (rbtdb_version_t *)*versionp;
2110 cleanup_version = NULL;
2111 ISC_LIST_INIT(cleanup_list);
2112 ISC_LIST_INIT(resigned_list);
2114 isc_refcount_decrement(&version->references, &refs);
2115 if (refs > 0) { /* typical and easy case first */
2117 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2118 INSIST(!version->writer);
2119 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2124 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2125 serial = version->serial;
2126 writer = version->writer;
2127 if (version->writer) {
2130 rbtdb_version_t *cur_version;
2132 INSIST(version->commit_ok);
2133 INSIST(version == rbtdb->future_version);
2135 * The current version is going to be replaced.
2136 * Release the (likely last) reference to it from the
2137 * DB itself and unlink it from the open list.
2139 cur_version = rbtdb->current_version;
2140 isc_refcount_decrement(&cur_version->references,
2143 if (cur_version->serial == rbtdb->least_serial)
2144 INSIST(EMPTY(cur_version->changed_list));
2145 UNLINK(rbtdb->open_versions,
2148 if (EMPTY(rbtdb->open_versions)) {
2150 * We're going to become the least open
2153 make_least_version(rbtdb, version,
2157 * Some other open version is the
2158 * least version. We can't cleanup
2159 * records that were changed in this
2160 * version because the older versions
2161 * may still be in use by an open
2164 * We can, however, discard the
2165 * changed records for things that
2166 * we've added that didn't exist in
2169 cleanup_nondirty(version, &cleanup_list);
2172 * If the (soon to be former) current version
2173 * isn't being used by anyone, we can clean
2177 cleanup_version = cur_version;
2178 APPENDLIST(version->changed_list,
2179 cleanup_version->changed_list,
2183 * Become the current version.
2185 version->writer = ISC_FALSE;
2186 rbtdb->current_version = version;
2187 rbtdb->current_serial = version->serial;
2188 rbtdb->future_version = NULL;
2191 * Keep the current version in the open list, and
2192 * gain a reference for the DB itself (see the DB
2193 * creation function below). This must be the only
2194 * case where we need to increment the counter from
2195 * zero and need to use isc_refcount_increment0().
2197 isc_refcount_increment0(&version->references,
2199 INSIST(cur_ref == 1);
2200 PREPEND(rbtdb->open_versions,
2201 rbtdb->current_version, link);
2202 resigned_list = version->resigned_list;
2203 ISC_LIST_INIT(version->resigned_list);
2206 * We're rolling back this transaction.
2208 cleanup_list = version->changed_list;
2209 ISC_LIST_INIT(version->changed_list);
2210 resigned_list = version->resigned_list;
2211 ISC_LIST_INIT(version->resigned_list);
2212 rollback = ISC_TRUE;
2213 cleanup_version = version;
2214 rbtdb->future_version = NULL;
2217 if (version != rbtdb->current_version) {
2219 * There are no external or internal references
2220 * to this version and it can be cleaned up.
2222 cleanup_version = version;
2225 * Find the version with the least serial
2226 * number greater than ours.
2228 least_greater = PREV(version, link);
2229 if (least_greater == NULL)
2230 least_greater = rbtdb->current_version;
2232 INSIST(version->serial < least_greater->serial);
2234 * Is this the least open version?
2236 if (version->serial == rbtdb->least_serial) {
2238 * Yes. Install the new least open
2241 make_least_version(rbtdb,
2246 * Add any unexecuted cleanups to
2247 * those of the least greater version.
2249 APPENDLIST(least_greater->changed_list,
2250 version->changed_list,
2253 } else if (version->serial == rbtdb->least_serial)
2254 INSIST(EMPTY(version->changed_list));
2255 UNLINK(rbtdb->open_versions, version, link);
2257 least_serial = rbtdb->least_serial;
2258 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2261 * Update the zone's secure status.
2263 if (writer && commit && !IS_CACHE(rbtdb))
2264 iszonesecure(db, version, rbtdb->origin_node);
2266 if (cleanup_version != NULL) {
2267 INSIST(EMPTY(cleanup_version->changed_list));
2268 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2269 sizeof(*cleanup_version));
2273 * Commit/rollback re-signed headers.
2275 for (header = HEAD(resigned_list);
2277 header = HEAD(resigned_list)) {
2280 ISC_LIST_UNLINK(resigned_list, header, link);
2282 lock = &rbtdb->node_locks[header->node->locknum].lock;
2283 NODE_LOCK(lock, isc_rwlocktype_write);
2285 resign_insert(rbtdb, header->node->locknum, header);
2286 decrement_reference(rbtdb, header->node, least_serial,
2287 isc_rwlocktype_write, isc_rwlocktype_none,
2289 NODE_UNLOCK(lock, isc_rwlocktype_write);
2292 if (!EMPTY(cleanup_list)) {
2294 * We acquire a tree write lock here in order to make sure
2295 * that stale nodes will be removed in decrement_reference().
2296 * If we didn't have the lock, those nodes could miss the
2297 * chance to be removed until the server stops. The write lock
2298 * is expensive, but this event should be rare enough to justify
2301 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2302 for (changed = HEAD(cleanup_list);
2304 changed = next_changed) {
2307 next_changed = NEXT(changed, link);
2308 rbtnode = changed->node;
2309 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2311 NODE_LOCK(lock, isc_rwlocktype_write);
2313 * This is a good opportunity to purge any dead nodes,
2316 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2319 rollback_node(rbtnode, serial);
2320 decrement_reference(rbtdb, rbtnode, least_serial,
2321 isc_rwlocktype_write,
2322 isc_rwlocktype_write, ISC_FALSE);
2324 NODE_UNLOCK(lock, isc_rwlocktype_write);
2326 isc_mem_put(rbtdb->common.mctx, changed,
2329 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2337 * Add the necessary magic for the wildcard name 'name'
2338 * to be found in 'rbtdb'.
2340 * In order for wildcard matching to work correctly in
2341 * zone_find(), we must ensure that a node for the wildcarding
2342 * level exists in the database, and has its 'find_callback'
2343 * and 'wild' bits set.
2345 * E.g. if the wildcard name is "*.sub.example." then we
2346 * must ensure that "sub.example." exists and is marked as
2350 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2351 isc_result_t result;
2352 dns_name_t foundname;
2353 dns_offsets_t offsets;
2355 dns_rbtnode_t *node = NULL;
2357 dns_name_init(&foundname, offsets);
2358 n = dns_name_countlabels(name);
2361 dns_name_getlabelsequence(name, 1, n, &foundname);
2362 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2363 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2366 node->find_callback = 1;
2368 return (ISC_R_SUCCESS);
2372 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2373 isc_result_t result;
2374 dns_name_t foundname;
2375 dns_offsets_t offsets;
2376 unsigned int n, l, i;
2378 dns_name_init(&foundname, offsets);
2379 n = dns_name_countlabels(name);
2380 l = dns_name_countlabels(&rbtdb->common.origin);
2383 dns_rbtnode_t *node = NULL; /* dummy */
2384 dns_name_getlabelsequence(name, n - i, i, &foundname);
2385 if (dns_name_iswildcard(&foundname)) {
2386 result = add_wildcard_magic(rbtdb, &foundname);
2387 if (result != ISC_R_SUCCESS)
2389 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2391 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2397 return (ISC_R_SUCCESS);
2401 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2402 dns_dbnode_t **nodep)
2404 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2405 dns_rbtnode_t *node = NULL;
2406 dns_name_t nodename;
2407 isc_result_t result;
2408 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2410 REQUIRE(VALID_RBTDB(rbtdb));
2412 dns_name_init(&nodename, NULL);
2413 RWLOCK(&rbtdb->tree_lock, locktype);
2414 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2415 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2416 if (result != ISC_R_SUCCESS) {
2417 RWUNLOCK(&rbtdb->tree_lock, locktype);
2419 if (result == DNS_R_PARTIALMATCH)
2420 result = ISC_R_NOTFOUND;
2424 * It would be nice to try to upgrade the lock instead of
2425 * unlocking then relocking.
2427 locktype = isc_rwlocktype_write;
2428 RWLOCK(&rbtdb->tree_lock, locktype);
2430 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2431 if (result == ISC_R_SUCCESS) {
2432 dns_rbt_namefromnode(node, &nodename);
2433 #ifdef DNS_RBT_USEHASH
2434 node->locknum = node->hashval % rbtdb->node_lock_count;
2436 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2437 rbtdb->node_lock_count;
2440 add_empty_wildcards(rbtdb, name);
2442 if (dns_name_iswildcard(name)) {
2443 result = add_wildcard_magic(rbtdb, name);
2444 if (result != ISC_R_SUCCESS) {
2445 RWUNLOCK(&rbtdb->tree_lock, locktype);
2449 } else if (result != ISC_R_EXISTS) {
2450 RWUNLOCK(&rbtdb->tree_lock, locktype);
2454 reactivate_node(rbtdb, node, locktype);
2455 RWUNLOCK(&rbtdb->tree_lock, locktype);
2457 *nodep = (dns_dbnode_t *)node;
2459 return (ISC_R_SUCCESS);
2463 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2464 dns_dbnode_t **nodep)
2466 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2467 dns_rbtnode_t *node = NULL;
2468 dns_name_t nodename;
2469 isc_result_t result;
2470 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2472 REQUIRE(VALID_RBTDB(rbtdb));
2474 dns_name_init(&nodename, NULL);
2475 RWLOCK(&rbtdb->tree_lock, locktype);
2476 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2477 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2478 if (result != ISC_R_SUCCESS) {
2479 RWUNLOCK(&rbtdb->tree_lock, locktype);
2481 if (result == DNS_R_PARTIALMATCH)
2482 result = ISC_R_NOTFOUND;
2486 * It would be nice to try to upgrade the lock instead of
2487 * unlocking then relocking.
2489 locktype = isc_rwlocktype_write;
2490 RWLOCK(&rbtdb->tree_lock, locktype);
2492 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2493 if (result == ISC_R_SUCCESS) {
2494 dns_rbt_namefromnode(node, &nodename);
2495 #ifdef DNS_RBT_USEHASH
2496 node->locknum = node->hashval % rbtdb->node_lock_count;
2498 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2499 rbtdb->node_lock_count;
2502 } else if (result != ISC_R_EXISTS) {
2503 RWUNLOCK(&rbtdb->tree_lock, locktype);
2507 INSIST(node->nsec3);
2508 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2509 new_reference(rbtdb, node);
2510 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2511 RWUNLOCK(&rbtdb->tree_lock, locktype);
2513 *nodep = (dns_dbnode_t *)node;
2515 return (ISC_R_SUCCESS);
2519 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2520 rbtdb_search_t *search = arg;
2521 rdatasetheader_t *header, *header_next;
2522 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2523 rdatasetheader_t *found;
2524 isc_result_t result;
2525 dns_rbtnode_t *onode;
2528 * We only want to remember the topmost zone cut, since it's the one
2529 * that counts, so we'll just continue if we've already found a
2532 if (search->zonecut != NULL)
2533 return (DNS_R_CONTINUE);
2536 result = DNS_R_CONTINUE;
2537 onode = search->rbtdb->origin_node;
2539 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2540 isc_rwlocktype_read);
2543 * Look for an NS or DNAME rdataset active in our version.
2546 dname_header = NULL;
2547 sigdname_header = NULL;
2548 for (header = node->data; header != NULL; header = header_next) {
2549 header_next = header->next;
2550 if (header->type == dns_rdatatype_ns ||
2551 header->type == dns_rdatatype_dname ||
2552 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2554 if (header->serial <= search->serial &&
2557 * Is this a "this rdataset doesn't
2560 if (NONEXISTENT(header))
2564 header = header->down;
2565 } while (header != NULL);
2566 if (header != NULL) {
2567 if (header->type == dns_rdatatype_dname)
2568 dname_header = header;
2569 else if (header->type ==
2570 RBTDB_RDATATYPE_SIGDNAME)
2571 sigdname_header = header;
2572 else if (node != onode ||
2573 IS_STUB(search->rbtdb)) {
2575 * We've found an NS rdataset that
2576 * isn't at the origin node. We check
2577 * that they're not at the origin node,
2578 * because otherwise we'd erroneously
2579 * treat the zone top as if it were
2589 * Did we find anything?
2591 if (dname_header != NULL) {
2593 * Note that DNAME has precedence over NS if both exist.
2595 found = dname_header;
2596 search->zonecut_sigrdataset = sigdname_header;
2597 } else if (ns_header != NULL) {
2599 search->zonecut_sigrdataset = NULL;
2602 if (found != NULL) {
2604 * We increment the reference count on node to ensure that
2605 * search->zonecut_rdataset will still be valid later.
2607 new_reference(search->rbtdb, node);
2608 search->zonecut = node;
2609 search->zonecut_rdataset = found;
2610 search->need_cleanup = ISC_TRUE;
2612 * Since we've found a zonecut, anything beneath it is
2613 * glue and is not subject to wildcard matching, so we
2614 * may clear search->wild.
2616 search->wild = ISC_FALSE;
2617 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2619 * If the caller does not want to find glue, then
2620 * this is the best answer and the search should
2623 result = DNS_R_PARTIALMATCH;
2628 * The search will continue beneath the zone cut.
2629 * This may or may not be the best match. In case it
2630 * is, we need to remember the node name.
2632 zcname = dns_fixedname_name(&search->zonecut_name);
2633 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2635 search->copy_name = ISC_TRUE;
2639 * There is no zonecut at this node which is active in this
2642 * If this is a "wild" node and the caller hasn't disabled
2643 * wildcard matching, remember that we've seen a wild node
2644 * in case we need to go searching for wildcard matches
2647 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2648 search->wild = ISC_TRUE;
2651 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2652 isc_rwlocktype_read);
2658 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2659 rdatasetheader_t *header, isc_stdtime_t now,
2660 dns_rdataset_t *rdataset)
2662 unsigned char *raw; /* RDATASLAB */
2665 * Caller must be holding the node reader lock.
2666 * XXXJT: technically, we need a writer lock, since we'll increment
2667 * the header count below. However, since the actual counter value
2668 * doesn't matter, we prioritize performance here. (We may want to
2669 * use atomic increment when available).
2672 if (rdataset == NULL)
2675 new_reference(rbtdb, node);
2677 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2679 rdataset->methods = &rdataset_methods;
2680 rdataset->rdclass = rbtdb->common.rdclass;
2681 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2682 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2683 rdataset->ttl = header->rdh_ttl - now;
2684 rdataset->trust = header->trust;
2685 if (NXDOMAIN(header))
2686 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2688 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2689 rdataset->private1 = rbtdb;
2690 rdataset->private2 = node;
2691 raw = (unsigned char *)header + sizeof(*header);
2692 rdataset->private3 = raw;
2693 rdataset->count = header->count++;
2694 if (rdataset->count == ISC_UINT32_MAX)
2695 rdataset->count = 0;
2698 * Reset iterator state.
2700 rdataset->privateuint4 = 0;
2701 rdataset->private5 = NULL;
2704 * Add noqname proof.
2706 rdataset->private6 = header->noqname;
2707 if (rdataset->private6 != NULL)
2708 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2709 rdataset->private7 = header->closest;
2710 if (rdataset->private7 != NULL)
2711 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2714 * Copy out re-signing information.
2716 if (RESIGN(header)) {
2717 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2718 rdataset->resign = header->resign;
2720 rdataset->resign = 0;
2723 static inline isc_result_t
2724 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2725 dns_name_t *foundname, dns_rdataset_t *rdataset,
2726 dns_rdataset_t *sigrdataset)
2728 isc_result_t result;
2730 rbtdb_rdatatype_t type;
2731 dns_rbtnode_t *node;
2734 * The caller MUST NOT be holding any node locks.
2737 node = search->zonecut;
2738 type = search->zonecut_rdataset->type;
2741 * If we have to set foundname, we do it before anything else.
2742 * If we were to set foundname after we had set nodep or bound the
2743 * rdataset, then we'd have to undo that work if dns_name_copy()
2744 * failed. By setting foundname first, there's nothing to undo if
2747 if (foundname != NULL && search->copy_name) {
2748 zcname = dns_fixedname_name(&search->zonecut_name);
2749 result = dns_name_copy(zcname, foundname, NULL);
2750 if (result != ISC_R_SUCCESS)
2753 if (nodep != NULL) {
2755 * Note that we don't have to increment the node's reference
2756 * count here because we're going to use the reference we
2757 * already have in the search block.
2760 search->need_cleanup = ISC_FALSE;
2762 if (rdataset != NULL) {
2763 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2764 isc_rwlocktype_read);
2765 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2766 search->now, rdataset);
2767 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2768 bind_rdataset(search->rbtdb, node,
2769 search->zonecut_sigrdataset,
2770 search->now, sigrdataset);
2771 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2772 isc_rwlocktype_read);
2775 if (type == dns_rdatatype_dname)
2776 return (DNS_R_DNAME);
2777 return (DNS_R_DELEGATION);
2780 static inline isc_boolean_t
2781 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2782 dns_rbtnode_t *node)
2784 unsigned char *raw; /* RDATASLAB */
2785 unsigned int count, size;
2787 isc_boolean_t valid = ISC_FALSE;
2788 dns_offsets_t offsets;
2789 isc_region_t region;
2790 rdatasetheader_t *header;
2793 * No additional locking is required.
2797 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2798 * if it occurs at a zone cut, but is not valid below it.
2800 if (type == dns_rdatatype_ns) {
2801 if (node != search->zonecut) {
2804 } else if (type != dns_rdatatype_a &&
2805 type != dns_rdatatype_aaaa &&
2806 type != dns_rdatatype_a6) {
2810 header = search->zonecut_rdataset;
2811 raw = (unsigned char *)header + sizeof(*header);
2812 count = raw[0] * 256 + raw[1];
2813 #if DNS_RDATASET_FIXED
2814 raw += 2 + (4 * count);
2821 size = raw[0] * 256 + raw[1];
2822 #if DNS_RDATASET_FIXED
2828 region.length = size;
2831 * XXX Until we have rdata structures, we have no choice but
2832 * to directly access the rdata format.
2834 dns_name_init(&ns_name, offsets);
2835 dns_name_fromregion(&ns_name, ®ion);
2836 if (dns_name_compare(&ns_name, name) == 0) {
2845 static inline isc_boolean_t
2846 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2849 dns_fixedname_t fnext;
2850 dns_fixedname_t forigin;
2855 dns_rbtnode_t *node;
2856 isc_result_t result;
2857 isc_boolean_t answer = ISC_FALSE;
2858 rdatasetheader_t *header;
2860 rbtdb = search->rbtdb;
2862 dns_name_init(&prefix, NULL);
2863 dns_fixedname_init(&fnext);
2864 next = dns_fixedname_name(&fnext);
2865 dns_fixedname_init(&forigin);
2866 origin = dns_fixedname_name(&forigin);
2868 result = dns_rbtnodechain_next(chain, NULL, NULL);
2869 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2871 result = dns_rbtnodechain_current(chain, &prefix,
2873 if (result != ISC_R_SUCCESS)
2875 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2876 isc_rwlocktype_read);
2877 for (header = node->data;
2879 header = header->next) {
2880 if (header->serial <= search->serial &&
2881 !IGNORE(header) && EXISTS(header))
2884 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2885 isc_rwlocktype_read);
2888 result = dns_rbtnodechain_next(chain, NULL, NULL);
2890 if (result == ISC_R_SUCCESS)
2891 result = dns_name_concatenate(&prefix, origin, next, NULL);
2892 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2897 static inline isc_boolean_t
2898 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2899 dns_fixedname_t fnext;
2900 dns_fixedname_t forigin;
2901 dns_fixedname_t fprev;
2909 dns_rbtnode_t *node;
2910 dns_rbtnodechain_t chain;
2911 isc_boolean_t check_next = ISC_TRUE;
2912 isc_boolean_t check_prev = ISC_TRUE;
2913 isc_boolean_t answer = ISC_FALSE;
2914 isc_result_t result;
2915 rdatasetheader_t *header;
2918 rbtdb = search->rbtdb;
2920 dns_name_init(&name, NULL);
2921 dns_name_init(&tname, NULL);
2922 dns_name_init(&rname, NULL);
2923 dns_fixedname_init(&fnext);
2924 next = dns_fixedname_name(&fnext);
2925 dns_fixedname_init(&fprev);
2926 prev = dns_fixedname_name(&fprev);
2927 dns_fixedname_init(&forigin);
2928 origin = dns_fixedname_name(&forigin);
2931 * Find if qname is at or below a empty node.
2932 * Use our own copy of the chain.
2935 chain = search->chain;
2938 result = dns_rbtnodechain_current(&chain, &name,
2940 if (result != ISC_R_SUCCESS)
2942 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2943 isc_rwlocktype_read);
2944 for (header = node->data;
2946 header = header->next) {
2947 if (header->serial <= search->serial &&
2948 !IGNORE(header) && EXISTS(header))
2951 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2952 isc_rwlocktype_read);
2955 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2956 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2957 if (result == ISC_R_SUCCESS)
2958 result = dns_name_concatenate(&name, origin, prev, NULL);
2959 if (result != ISC_R_SUCCESS)
2960 check_prev = ISC_FALSE;
2962 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2963 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2965 result = dns_rbtnodechain_current(&chain, &name,
2967 if (result != ISC_R_SUCCESS)
2969 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2970 isc_rwlocktype_read);
2971 for (header = node->data;
2973 header = header->next) {
2974 if (header->serial <= search->serial &&
2975 !IGNORE(header) && EXISTS(header))
2978 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2979 isc_rwlocktype_read);
2982 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2984 if (result == ISC_R_SUCCESS)
2985 result = dns_name_concatenate(&name, origin, next, NULL);
2986 if (result != ISC_R_SUCCESS)
2987 check_next = ISC_FALSE;
2989 dns_name_clone(qname, &rname);
2992 * Remove the wildcard label to find the terminal name.
2994 n = dns_name_countlabels(wname);
2995 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2998 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2999 (check_next && dns_name_issubdomain(next, &rname))) {
3004 * Remove the left hand label.
3006 n = dns_name_countlabels(&rname);
3007 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3008 } while (!dns_name_equal(&rname, &tname));
3012 static inline isc_result_t
3013 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3017 dns_rbtnode_t *node, *level_node, *wnode;
3018 rdatasetheader_t *header;
3019 isc_result_t result = ISC_R_NOTFOUND;
3022 dns_fixedname_t fwname;
3024 isc_boolean_t done, wild, active;
3025 dns_rbtnodechain_t wchain;
3028 * Caller must be holding the tree lock and MUST NOT be holding
3033 * Examine each ancestor level. If the level's wild bit
3034 * is set, then construct the corresponding wildcard name and
3035 * search for it. If the wildcard node exists, and is active in
3036 * this version, we're done. If not, then we next check to see
3037 * if the ancestor is active in this version. If so, then there
3038 * can be no possible wildcard match and again we're done. If not,
3039 * continue the search.
3042 rbtdb = search->rbtdb;
3043 i = search->chain.level_matches;
3047 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3048 isc_rwlocktype_read);
3051 * First we try to figure out if this node is active in
3052 * the search's version. We do this now, even though we
3053 * may not need the information, because it simplifies the
3054 * locking and code flow.
3056 for (header = node->data;
3058 header = header->next) {
3059 if (header->serial <= search->serial &&
3060 !IGNORE(header) && EXISTS(header))
3073 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3074 isc_rwlocktype_read);
3078 * Construct the wildcard name for this level.
3080 dns_name_init(&name, NULL);
3081 dns_rbt_namefromnode(node, &name);
3082 dns_fixedname_init(&fwname);
3083 wname = dns_fixedname_name(&fwname);
3084 result = dns_name_concatenate(dns_wildcardname, &name,
3087 while (result == ISC_R_SUCCESS && j != 0) {
3089 level_node = search->chain.levels[j];
3090 dns_name_init(&name, NULL);
3091 dns_rbt_namefromnode(level_node, &name);
3092 result = dns_name_concatenate(wname,
3097 if (result != ISC_R_SUCCESS)
3101 dns_rbtnodechain_init(&wchain, NULL);
3102 result = dns_rbt_findnode(rbtdb->tree, wname,
3103 NULL, &wnode, &wchain,
3104 DNS_RBTFIND_EMPTYDATA,
3106 if (result == ISC_R_SUCCESS) {
3110 * We have found the wildcard node. If it
3111 * is active in the search's version, we're
3114 lock = &rbtdb->node_locks[wnode->locknum].lock;
3115 NODE_LOCK(lock, isc_rwlocktype_read);
3116 for (header = wnode->data;
3118 header = header->next) {
3119 if (header->serial <= search->serial &&
3120 !IGNORE(header) && EXISTS(header))
3123 NODE_UNLOCK(lock, isc_rwlocktype_read);
3124 if (header != NULL ||
3125 activeempty(search, &wchain, wname)) {
3126 if (activeemtpynode(search, qname,
3128 return (ISC_R_NOTFOUND);
3131 * The wildcard node is active!
3133 * Note: result is still ISC_R_SUCCESS
3134 * so we don't have to set it.
3139 } else if (result != ISC_R_NOTFOUND &&
3140 result != DNS_R_PARTIALMATCH) {
3142 * An error has occurred. Bail out.
3150 * The level node is active. Any wildcarding
3151 * present at higher levels has no
3152 * effect and we're done.
3154 result = ISC_R_NOTFOUND;
3160 node = search->chain.levels[i];
3168 static isc_boolean_t
3169 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3171 dns_rdata_t rdata = DNS_RDATA_INIT;
3172 dns_rdata_nsec3_t nsec3;
3173 unsigned char *raw; /* RDATASLAB */
3174 unsigned int rdlen, count;
3175 isc_region_t region;
3176 isc_result_t result;
3178 REQUIRE(header->type == dns_rdatatype_nsec3);
3180 raw = (unsigned char *)header + sizeof(*header);
3181 count = raw[0] * 256 + raw[1]; /* count */
3182 #if DNS_RDATASET_FIXED
3183 raw += count * 4 + 2;
3187 while (count-- > 0) {
3188 rdlen = raw[0] * 256 + raw[1];
3189 #if DNS_RDATASET_FIXED
3195 region.length = rdlen;
3196 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3197 dns_rdatatype_nsec3, ®ion);
3199 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3200 INSIST(result == ISC_R_SUCCESS);
3201 if (nsec3.hash == search->rbtversion->hash &&
3202 nsec3.iterations == search->rbtversion->iterations &&
3203 nsec3.salt_length == search->rbtversion->salt_length &&
3204 memcmp(nsec3.salt, search->rbtversion->salt,
3205 nsec3.salt_length) == 0)
3207 dns_rdata_reset(&rdata);
3212 static inline isc_result_t
3213 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3214 dns_name_t *foundname, dns_rdataset_t *rdataset,
3215 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3216 dns_db_secure_t secure)
3218 dns_rbtnode_t *node;
3219 rdatasetheader_t *header, *header_next, *found, *foundsig;
3220 isc_boolean_t empty_node;
3221 isc_result_t result;
3222 dns_fixedname_t fname, forigin;
3223 dns_name_t *name, *origin;
3224 dns_rdatatype_t type;
3225 rbtdb_rdatatype_t sigtype;
3226 isc_boolean_t wraps;
3227 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3229 if (tree == search->rbtdb->nsec3) {
3230 type = dns_rdatatype_nsec3;
3231 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3234 type = dns_rdatatype_nsec;
3235 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3242 dns_fixedname_init(&fname);
3243 name = dns_fixedname_name(&fname);
3244 dns_fixedname_init(&forigin);
3245 origin = dns_fixedname_name(&forigin);
3246 result = dns_rbtnodechain_current(&search->chain, name,
3248 if (result != ISC_R_SUCCESS)
3250 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3251 isc_rwlocktype_read);
3254 empty_node = ISC_TRUE;
3255 for (header = node->data;
3257 header = header_next) {
3258 header_next = header->next;
3260 * Look for an active, extant NSEC or RRSIG NSEC.
3263 if (header->serial <= search->serial &&
3266 * Is this a "this rdataset doesn't
3269 if (NONEXISTENT(header))
3273 header = header->down;
3274 } while (header != NULL);
3275 if (header != NULL) {
3277 * We now know that there is at least one
3278 * active rdataset at this node.
3280 empty_node = ISC_FALSE;
3281 if (header->type == type) {
3283 if (foundsig != NULL)
3285 } else if (header->type == sigtype) {
3293 if (found != NULL && search->rbtversion->havensec3 &&
3294 found->type == dns_rdatatype_nsec3 &&
3295 !matchparams(found, search)) {
3296 empty_node = ISC_TRUE;
3299 result = dns_rbtnodechain_prev(&search->chain,
3301 } else if (found != NULL &&
3302 (foundsig != NULL || !need_sig))
3305 * We've found the right NSEC/NSEC3 record.
3307 * Note: for this to really be the right
3308 * NSEC record, it's essential that the NSEC
3309 * records of any nodes obscured by a zone
3310 * cut have been removed; we assume this is
3313 result = dns_name_concatenate(name, origin,
3315 if (result == ISC_R_SUCCESS) {
3316 if (nodep != NULL) {
3317 new_reference(search->rbtdb,
3321 bind_rdataset(search->rbtdb, node,
3324 if (foundsig != NULL)
3325 bind_rdataset(search->rbtdb,
3331 } else if (found == NULL && foundsig == NULL) {
3333 * This node is active, but has no NSEC or
3334 * RRSIG NSEC. That means it's glue or
3335 * other obscured zone data that isn't
3336 * relevant for our search. Treat the
3337 * node as if it were empty and keep looking.
3339 empty_node = ISC_TRUE;
3340 result = dns_rbtnodechain_prev(&search->chain,
3344 * We found an active node, but either the
3345 * NSEC or the RRSIG NSEC is missing. This
3348 result = DNS_R_BADDB;
3352 * This node isn't active. We've got to keep
3355 result = dns_rbtnodechain_prev(&search->chain, NULL,
3358 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3359 isc_rwlocktype_read);
3360 } while (empty_node && result == ISC_R_SUCCESS);
3362 if (result == ISC_R_NOMORE && wraps) {
3363 result = dns_rbtnodechain_last(&search->chain, tree,
3365 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3372 * If the result is ISC_R_NOMORE, then we got to the beginning of
3373 * the database and didn't find a NSEC record. This shouldn't
3376 if (result == ISC_R_NOMORE)
3377 result = DNS_R_BADDB;
3383 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3384 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3385 dns_dbnode_t **nodep, dns_name_t *foundname,
3386 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3388 dns_rbtnode_t *node = NULL;
3389 isc_result_t result;
3390 rbtdb_search_t search;
3391 isc_boolean_t cname_ok = ISC_TRUE;
3392 isc_boolean_t close_version = ISC_FALSE;
3393 isc_boolean_t maybe_zonecut = ISC_FALSE;
3394 isc_boolean_t at_zonecut = ISC_FALSE;
3396 isc_boolean_t empty_node;
3397 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3398 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3399 rbtdb_rdatatype_t sigtype;
3400 isc_boolean_t active;
3401 dns_rbtnodechain_t chain;
3405 search.rbtdb = (dns_rbtdb_t *)db;
3407 REQUIRE(VALID_RBTDB(search.rbtdb));
3410 * We don't care about 'now'.
3415 * If the caller didn't supply a version, attach to the current
3418 if (version == NULL) {
3419 currentversion(db, &version);
3420 close_version = ISC_TRUE;
3423 search.rbtversion = version;
3424 search.serial = search.rbtversion->serial;
3425 search.options = options;
3426 search.copy_name = ISC_FALSE;
3427 search.need_cleanup = ISC_FALSE;
3428 search.wild = ISC_FALSE;
3429 search.zonecut = NULL;
3430 dns_fixedname_init(&search.zonecut_name);
3431 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3435 * 'wild' will be true iff. we've matched a wildcard.
3439 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3442 * Search down from the root of the tree. If, while going down, we
3443 * encounter a callback node, zone_zonecut_callback() will search the
3444 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3446 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3448 result = dns_rbt_findnode(tree, name, foundname, &node,
3449 &search.chain, DNS_RBTFIND_EMPTYDATA,
3450 zone_zonecut_callback, &search);
3452 if (result == DNS_R_PARTIALMATCH) {
3454 if (search.zonecut != NULL) {
3455 result = setup_delegation(&search, nodep, foundname,
3456 rdataset, sigrdataset);
3462 * At least one of the levels in the search chain
3463 * potentially has a wildcard. For each such level,
3464 * we must see if there's a matching wildcard active
3465 * in the current version.
3467 result = find_wildcard(&search, &node, name);
3468 if (result == ISC_R_SUCCESS) {
3469 result = dns_name_copy(name, foundname, NULL);
3470 if (result != ISC_R_SUCCESS)
3475 else if (result != ISC_R_NOTFOUND)
3479 chain = search.chain;
3480 active = activeempty(&search, &chain, name);
3483 * If we're here, then the name does not exist, is not
3484 * beneath a zonecut, and there's no matching wildcard.
3486 if ((search.rbtversion->secure == dns_db_secure &&
3487 !search.rbtversion->havensec3) ||
3488 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3489 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3491 result = find_closest_nsec(&search, nodep, foundname,
3492 rdataset, sigrdataset, tree,
3493 search.rbtversion->secure);
3494 if (result == ISC_R_SUCCESS)
3495 result = active ? DNS_R_EMPTYNAME :
3498 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3500 } else if (result != ISC_R_SUCCESS)
3505 * We have found a node whose name is the desired name, or we
3506 * have matched a wildcard.
3509 if (search.zonecut != NULL) {
3511 * If we're beneath a zone cut, we don't want to look for
3512 * CNAMEs because they're not legitimate zone glue.
3514 cname_ok = ISC_FALSE;
3517 * The node may be a zone cut itself. If it might be one,
3518 * make sure we check for it later.
3520 * DS records live above the zone cut in ordinary zone so
3521 * we want to ignore any referral.
3523 * Stub zones don't have anything "above" the delgation so
3524 * we always return a referral.
3526 if (node->find_callback &&
3527 ((node != search.rbtdb->origin_node &&
3528 !dns_rdatatype_atparent(type)) ||
3529 IS_STUB(search.rbtdb)))
3530 maybe_zonecut = ISC_TRUE;
3534 * Certain DNSSEC types are not subject to CNAME matching
3535 * (RFC4035, section 2.5 and RFC3007).
3537 * We don't check for RRSIG, because we don't store RRSIG records
3540 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3541 cname_ok = ISC_FALSE;
3544 * We now go looking for rdata...
3547 lock = &search.rbtdb->node_locks[node->locknum].lock;
3548 NODE_LOCK(lock, isc_rwlocktype_read);
3552 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3556 empty_node = ISC_TRUE;
3557 for (header = node->data; header != NULL; header = header_next) {
3558 header_next = header->next;
3560 * Look for an active, extant rdataset.
3563 if (header->serial <= search.serial &&
3566 * Is this a "this rdataset doesn't
3569 if (NONEXISTENT(header))
3573 header = header->down;
3574 } while (header != NULL);
3575 if (header != NULL) {
3577 * We now know that there is at least one active
3578 * rdataset at this node.
3580 empty_node = ISC_FALSE;
3583 * Do special zone cut handling, if requested.
3585 if (maybe_zonecut &&
3586 header->type == dns_rdatatype_ns) {
3588 * We increment the reference count on node to
3589 * ensure that search->zonecut_rdataset will
3590 * still be valid later.
3592 new_reference(search.rbtdb, node);
3593 search.zonecut = node;
3594 search.zonecut_rdataset = header;
3595 search.zonecut_sigrdataset = NULL;
3596 search.need_cleanup = ISC_TRUE;
3597 maybe_zonecut = ISC_FALSE;
3598 at_zonecut = ISC_TRUE;
3600 * It is not clear if KEY should still be
3601 * allowed at the parent side of the zone
3602 * cut or not. It is needed for RFC3007
3603 * validated updates.
3605 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3606 && type != dns_rdatatype_nsec
3607 && type != dns_rdatatype_key) {
3609 * Glue is not OK, but any answer we
3610 * could return would be glue. Return
3616 if (found != NULL && foundsig != NULL)
3622 * If the NSEC3 record doesn't match the chain
3623 * we are using behave as if it isn't here.
3625 if (header->type == dns_rdatatype_nsec3 &&
3626 !matchparams(header, &search)) {
3627 NODE_UNLOCK(lock, isc_rwlocktype_read);
3631 * If we found a type we were looking for,
3634 if (header->type == type ||
3635 type == dns_rdatatype_any ||
3636 (header->type == dns_rdatatype_cname &&
3639 * We've found the answer!
3642 if (header->type == dns_rdatatype_cname &&
3645 * We may be finding a CNAME instead
3646 * of the desired type.
3648 * If we've already got the CNAME RRSIG,
3649 * use it, otherwise change sigtype
3650 * so that we find it.
3652 if (cnamesig != NULL)
3653 foundsig = cnamesig;
3656 RBTDB_RDATATYPE_SIGCNAME;
3659 * If we've got all we need, end the search.
3661 if (!maybe_zonecut && foundsig != NULL)
3663 } else if (header->type == sigtype) {
3665 * We've found the RRSIG rdataset for our
3666 * target type. Remember it.
3670 * If we've got all we need, end the search.
3672 if (!maybe_zonecut && found != NULL)
3674 } else if (header->type == dns_rdatatype_nsec &&
3675 !search.rbtversion->havensec3) {
3677 * Remember a NSEC rdataset even if we're
3678 * not specifically looking for it, because
3679 * we might need it later.
3681 nsecheader = header;
3682 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3683 !search.rbtversion->havensec3) {
3685 * If we need the NSEC rdataset, we'll also
3686 * need its signature.
3689 } else if (cname_ok &&
3690 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3692 * If we get a CNAME match, we'll also need
3702 * We have an exact match for the name, but there are no
3703 * active rdatasets in the desired version. That means that
3704 * this node doesn't exist in the desired version, and that
3705 * we really have a partial match.
3708 NODE_UNLOCK(lock, isc_rwlocktype_read);
3714 * If we didn't find what we were looking for...
3716 if (found == NULL) {
3717 if (search.zonecut != NULL) {
3719 * We were trying to find glue at a node beneath a
3720 * zone cut, but didn't.
3722 * Return the delegation.
3724 NODE_UNLOCK(lock, isc_rwlocktype_read);
3725 result = setup_delegation(&search, nodep, foundname,
3726 rdataset, sigrdataset);
3730 * The desired type doesn't exist.
3732 result = DNS_R_NXRRSET;
3733 if (search.rbtversion->secure == dns_db_secure &&
3734 !search.rbtversion->havensec3 &&
3735 (nsecheader == NULL || nsecsig == NULL)) {
3737 * The zone is secure but there's no NSEC,
3738 * or the NSEC has no signature!
3741 result = DNS_R_BADDB;
3745 NODE_UNLOCK(lock, isc_rwlocktype_read);
3746 result = find_closest_nsec(&search, nodep, foundname,
3747 rdataset, sigrdataset,
3749 search.rbtversion->secure);
3750 if (result == ISC_R_SUCCESS)
3751 result = DNS_R_EMPTYWILD;
3754 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3758 * There's no NSEC record, and we were told
3761 result = DNS_R_BADDB;
3764 if (nodep != NULL) {
3765 new_reference(search.rbtdb, node);
3768 if ((search.rbtversion->secure == dns_db_secure &&
3769 !search.rbtversion->havensec3) ||
3770 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3772 bind_rdataset(search.rbtdb, node, nsecheader,
3774 if (nsecsig != NULL)
3775 bind_rdataset(search.rbtdb, node,
3776 nsecsig, 0, sigrdataset);
3779 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3784 * We found what we were looking for, or we found a CNAME.
3787 if (type != found->type &&
3788 type != dns_rdatatype_any &&
3789 found->type == dns_rdatatype_cname) {
3791 * We weren't doing an ANY query and we found a CNAME instead
3792 * of the type we were looking for, so we need to indicate
3793 * that result to the caller.
3795 result = DNS_R_CNAME;
3796 } else if (search.zonecut != NULL) {
3798 * If we're beneath a zone cut, we must indicate that the
3799 * result is glue, unless we're actually at the zone cut
3800 * and the type is NSEC or KEY.
3802 if (search.zonecut == node) {
3804 * It is not clear if KEY should still be
3805 * allowed at the parent side of the zone
3806 * cut or not. It is needed for RFC3007
3807 * validated updates.
3809 if (type == dns_rdatatype_nsec ||
3810 type == dns_rdatatype_nsec3 ||
3811 type == dns_rdatatype_key)
3812 result = ISC_R_SUCCESS;
3813 else if (type == dns_rdatatype_any)
3814 result = DNS_R_ZONECUT;
3816 result = DNS_R_GLUE;
3818 result = DNS_R_GLUE;
3820 * We might have found data that isn't glue, but was occluded
3821 * by a dynamic update. If the caller cares about this, they
3822 * will have told us to validate glue.
3824 * XXX We should cache the glue validity state!
3826 if (result == DNS_R_GLUE &&
3827 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3828 !valid_glue(&search, foundname, type, node)) {
3829 NODE_UNLOCK(lock, isc_rwlocktype_read);
3830 result = setup_delegation(&search, nodep, foundname,
3831 rdataset, sigrdataset);
3836 * An ordinary successful query!
3838 result = ISC_R_SUCCESS;
3841 if (nodep != NULL) {
3843 new_reference(search.rbtdb, node);
3845 search.need_cleanup = ISC_FALSE;
3849 if (type != dns_rdatatype_any) {
3850 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3851 if (foundsig != NULL)
3852 bind_rdataset(search.rbtdb, node, foundsig, 0,
3857 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3860 NODE_UNLOCK(lock, isc_rwlocktype_read);
3863 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3866 * If we found a zonecut but aren't going to use it, we have to
3869 if (search.need_cleanup) {
3870 node = search.zonecut;
3871 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3873 NODE_LOCK(lock, isc_rwlocktype_read);
3874 decrement_reference(search.rbtdb, node, 0,
3875 isc_rwlocktype_read, isc_rwlocktype_none,
3877 NODE_UNLOCK(lock, isc_rwlocktype_read);
3881 closeversion(db, &version, ISC_FALSE);
3883 dns_rbtnodechain_reset(&search.chain);
3889 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3890 isc_stdtime_t now, dns_dbnode_t **nodep,
3891 dns_name_t *foundname,
3892 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3901 UNUSED(sigrdataset);
3903 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3905 return (ISC_R_NOTIMPLEMENTED);
3909 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3910 rbtdb_search_t *search = arg;
3911 rdatasetheader_t *header, *header_prev, *header_next;
3912 rdatasetheader_t *dname_header, *sigdname_header;
3913 isc_result_t result;
3915 isc_rwlocktype_t locktype;
3919 REQUIRE(search->zonecut == NULL);
3922 * Keep compiler silent.
3926 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3927 locktype = isc_rwlocktype_read;
3928 NODE_LOCK(lock, locktype);
3931 * Look for a DNAME or RRSIG DNAME rdataset.
3933 dname_header = NULL;
3934 sigdname_header = NULL;
3936 for (header = node->data; header != NULL; header = header_next) {
3937 header_next = header->next;
3938 if (header->rdh_ttl <= search->now) {
3940 * This rdataset is stale. If no one else is
3941 * using the node, we can clean it up right
3942 * now, otherwise we mark it as stale, and
3943 * the node as dirty, so it will get cleaned
3946 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3947 (locktype == isc_rwlocktype_write ||
3948 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3950 * We update the node's status only when we
3951 * can get write access; otherwise, we leave
3952 * others to this work. Periodical cleaning
3953 * will eventually take the job as the last
3955 * We won't downgrade the lock, since other
3956 * rdatasets are probably stale, too.
3958 locktype = isc_rwlocktype_write;
3960 if (dns_rbtnode_refcurrent(node) == 0) {
3964 * header->down can be non-NULL if the
3965 * refcount has just decremented to 0
3966 * but decrement_reference() has not
3967 * performed clean_cache_node(), in
3968 * which case we need to purge the
3969 * stale headers first.
3971 mctx = search->rbtdb->common.mctx;
3972 clean_stale_headers(search->rbtdb,
3975 if (header_prev != NULL)
3979 node->data = header->next;
3980 free_rdataset(search->rbtdb, mctx,
3983 header->attributes |=
3984 RDATASET_ATTR_STALE;
3986 header_prev = header;
3989 header_prev = header;
3990 } else if (header->type == dns_rdatatype_dname &&
3992 dname_header = header;
3993 header_prev = header;
3994 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3996 sigdname_header = header;
3997 header_prev = header;
3999 header_prev = header;
4002 if (dname_header != NULL &&
4003 (!DNS_TRUST_PENDING(dname_header->trust) ||
4004 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4006 * We increment the reference count on node to ensure that
4007 * search->zonecut_rdataset will still be valid later.
4009 new_reference(search->rbtdb, node);
4010 INSIST(!ISC_LINK_LINKED(node, deadlink));
4011 search->zonecut = node;
4012 search->zonecut_rdataset = dname_header;
4013 search->zonecut_sigrdataset = sigdname_header;
4014 search->need_cleanup = ISC_TRUE;
4015 result = DNS_R_PARTIALMATCH;
4017 result = DNS_R_CONTINUE;
4019 NODE_UNLOCK(lock, locktype);
4024 static inline isc_result_t
4025 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4026 dns_dbnode_t **nodep, dns_name_t *foundname,
4027 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4030 dns_rbtnode_t *level_node;
4031 rdatasetheader_t *header, *header_prev, *header_next;
4032 rdatasetheader_t *found, *foundsig;
4033 isc_result_t result = ISC_R_NOTFOUND;
4038 isc_rwlocktype_t locktype;
4041 * Caller must be holding the tree lock.
4044 rbtdb = search->rbtdb;
4045 i = search->chain.level_matches;
4048 locktype = isc_rwlocktype_read;
4049 lock = &rbtdb->node_locks[node->locknum].lock;
4050 NODE_LOCK(lock, locktype);
4053 * Look for NS and RRSIG NS rdatasets.
4058 for (header = node->data;
4060 header = header_next) {
4061 header_next = header->next;
4062 if (header->rdh_ttl <= search->now) {
4064 * This rdataset is stale. If no one else is
4065 * using the node, we can clean it up right
4066 * now, otherwise we mark it as stale, and
4067 * the node as dirty, so it will get cleaned
4070 if ((header->rdh_ttl <= search->now -
4072 (locktype == isc_rwlocktype_write ||
4073 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4075 * We update the node's status only
4076 * when we can get write access.
4078 locktype = isc_rwlocktype_write;
4080 if (dns_rbtnode_refcurrent(node)
4084 m = search->rbtdb->common.mctx;
4085 clean_stale_headers(
4088 if (header_prev != NULL)
4094 free_rdataset(rbtdb, m,
4097 header->attributes |=
4098 RDATASET_ATTR_STALE;
4100 header_prev = header;
4103 header_prev = header;
4104 } else if (EXISTS(header)) {
4106 * We've found an extant rdataset. See if
4107 * we're interested in it.
4109 if (header->type == dns_rdatatype_ns) {
4111 if (foundsig != NULL)
4113 } else if (header->type ==
4114 RBTDB_RDATATYPE_SIGNS) {
4119 header_prev = header;
4121 header_prev = header;
4124 if (found != NULL) {
4126 * If we have to set foundname, we do it before
4127 * anything else. If we were to set foundname after
4128 * we had set nodep or bound the rdataset, then we'd
4129 * have to undo that work if dns_name_concatenate()
4130 * failed. By setting foundname first, there's
4131 * nothing to undo if we have trouble.
4133 if (foundname != NULL) {
4134 dns_name_init(&name, NULL);
4135 dns_rbt_namefromnode(node, &name);
4136 result = dns_name_copy(&name, foundname, NULL);
4137 while (result == ISC_R_SUCCESS && i > 0) {
4139 level_node = search->chain.levels[i];
4140 dns_name_init(&name, NULL);
4141 dns_rbt_namefromnode(level_node,
4144 dns_name_concatenate(foundname,
4149 if (result != ISC_R_SUCCESS) {
4154 result = DNS_R_DELEGATION;
4155 if (nodep != NULL) {
4156 new_reference(search->rbtdb, node);
4159 bind_rdataset(search->rbtdb, node, found, search->now,
4161 if (foundsig != NULL)
4162 bind_rdataset(search->rbtdb, node, foundsig,
4163 search->now, sigrdataset);
4164 if (need_headerupdate(found, search->now) ||
4165 (foundsig != NULL &&
4166 need_headerupdate(foundsig, search->now))) {
4167 if (locktype != isc_rwlocktype_write) {
4168 NODE_UNLOCK(lock, locktype);
4169 NODE_LOCK(lock, isc_rwlocktype_write);
4170 locktype = isc_rwlocktype_write;
4172 if (need_headerupdate(found, search->now))
4173 update_header(search->rbtdb, found,
4175 if (foundsig != NULL &&
4176 need_headerupdate(foundsig, search->now)) {
4177 update_header(search->rbtdb, foundsig,
4184 NODE_UNLOCK(lock, locktype);
4186 if (found == NULL && i > 0) {
4188 node = search->chain.levels[i];
4198 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4199 isc_stdtime_t now, dns_name_t *foundname,
4200 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4202 dns_rbtnode_t *node;
4203 rdatasetheader_t *header, *header_next, *header_prev;
4204 rdatasetheader_t *found, *foundsig;
4205 isc_boolean_t empty_node;
4206 isc_result_t result;
4207 dns_fixedname_t fname, forigin;
4208 dns_name_t *name, *origin;
4209 rbtdb_rdatatype_t matchtype, sigmatchtype;
4211 isc_rwlocktype_t locktype;
4213 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4214 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4215 dns_rdatatype_nsec);
4219 dns_fixedname_init(&fname);
4220 name = dns_fixedname_name(&fname);
4221 dns_fixedname_init(&forigin);
4222 origin = dns_fixedname_name(&forigin);
4223 result = dns_rbtnodechain_current(&search->chain, name,
4225 if (result != ISC_R_SUCCESS)
4227 locktype = isc_rwlocktype_read;
4228 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4229 NODE_LOCK(lock, locktype);
4232 empty_node = ISC_TRUE;
4234 for (header = node->data;
4236 header = header_next) {
4237 header_next = header->next;
4238 if (header->rdh_ttl <= now) {
4240 * This rdataset is stale. If no one else is
4241 * using the node, we can clean it up right
4242 * now, otherwise we mark it as stale, and the
4243 * node as dirty, so it will get cleaned up
4246 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4247 (locktype == isc_rwlocktype_write ||
4248 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4250 * We update the node's status only
4251 * when we can get write access.
4253 locktype = isc_rwlocktype_write;
4255 if (dns_rbtnode_refcurrent(node)
4259 m = search->rbtdb->common.mctx;
4260 clean_stale_headers(
4263 if (header_prev != NULL)
4267 node->data = header->next;
4268 free_rdataset(search->rbtdb, m,
4271 header->attributes |=
4272 RDATASET_ATTR_STALE;
4274 header_prev = header;
4277 header_prev = header;
4280 if (NONEXISTENT(header) ||
4281 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4282 header_prev = header;
4285 empty_node = ISC_FALSE;
4286 if (header->type == matchtype)
4288 else if (header->type == sigmatchtype)
4290 header_prev = header;
4292 if (found != NULL) {
4293 result = dns_name_concatenate(name, origin,
4295 if (result != ISC_R_SUCCESS)
4297 bind_rdataset(search->rbtdb, node, found,
4299 if (foundsig != NULL)
4300 bind_rdataset(search->rbtdb, node, foundsig,
4302 new_reference(search->rbtdb, node);
4304 result = DNS_R_COVERINGNSEC;
4305 } else if (!empty_node) {
4306 result = ISC_R_NOTFOUND;
4308 result = dns_rbtnodechain_prev(&search->chain, NULL,
4311 NODE_UNLOCK(lock, locktype);
4312 } while (empty_node && result == ISC_R_SUCCESS);
4317 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4318 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4319 dns_dbnode_t **nodep, dns_name_t *foundname,
4320 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4322 dns_rbtnode_t *node = NULL;
4323 isc_result_t result;
4324 rbtdb_search_t search;
4325 isc_boolean_t cname_ok = ISC_TRUE;
4326 isc_boolean_t empty_node;
4328 isc_rwlocktype_t locktype;
4329 rdatasetheader_t *header, *header_prev, *header_next;
4330 rdatasetheader_t *found, *nsheader;
4331 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4332 rdatasetheader_t *update, *updatesig;
4333 rbtdb_rdatatype_t sigtype, negtype;
4337 search.rbtdb = (dns_rbtdb_t *)db;
4339 REQUIRE(VALID_RBTDB(search.rbtdb));
4340 REQUIRE(version == NULL);
4343 isc_stdtime_get(&now);
4345 search.rbtversion = NULL;
4347 search.options = options;
4348 search.copy_name = ISC_FALSE;
4349 search.need_cleanup = ISC_FALSE;
4350 search.wild = ISC_FALSE;
4351 search.zonecut = NULL;
4352 dns_fixedname_init(&search.zonecut_name);
4353 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4358 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4361 * Search down from the root of the tree. If, while going down, we
4362 * encounter a callback node, cache_zonecut_callback() will search the
4363 * rdatasets at the zone cut for a DNAME rdataset.
4365 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4366 &search.chain, DNS_RBTFIND_EMPTYDATA,
4367 cache_zonecut_callback, &search);
4369 if (result == DNS_R_PARTIALMATCH) {
4370 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4371 result = find_coveringnsec(&search, nodep, now,
4372 foundname, rdataset,
4374 if (result == DNS_R_COVERINGNSEC)
4377 if (search.zonecut != NULL) {
4378 result = setup_delegation(&search, nodep, foundname,
4379 rdataset, sigrdataset);
4383 result = find_deepest_zonecut(&search, node, nodep,
4384 foundname, rdataset,
4388 } else if (result != ISC_R_SUCCESS)
4392 * Certain DNSSEC types are not subject to CNAME matching
4393 * (RFC4035, section 2.5 and RFC3007).
4395 * We don't check for RRSIG, because we don't store RRSIG records
4398 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4399 cname_ok = ISC_FALSE;
4402 * We now go looking for rdata...
4405 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4406 locktype = isc_rwlocktype_read;
4407 NODE_LOCK(lock, locktype);
4411 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4412 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4416 empty_node = ISC_TRUE;
4418 for (header = node->data; header != NULL; header = header_next) {
4419 header_next = header->next;
4420 if (header->rdh_ttl <= now) {
4422 * This rdataset is stale. If no one else is using the
4423 * node, we can clean it up right now, otherwise we
4424 * mark it as stale, and the node as dirty, so it will
4425 * get cleaned up later.
4427 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4428 (locktype == isc_rwlocktype_write ||
4429 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4431 * We update the node's status only when we
4432 * can get write access.
4434 locktype = isc_rwlocktype_write;
4436 if (dns_rbtnode_refcurrent(node) == 0) {
4439 mctx = search.rbtdb->common.mctx;
4440 clean_stale_headers(search.rbtdb, mctx,
4442 if (header_prev != NULL)
4446 node->data = header->next;
4447 free_rdataset(search.rbtdb, mctx,
4450 header->attributes |=
4451 RDATASET_ATTR_STALE;
4453 header_prev = header;
4456 header_prev = header;
4457 } else if (EXISTS(header)) {
4459 * We now know that there is at least one active
4460 * non-stale rdataset at this node.
4462 empty_node = ISC_FALSE;
4465 * If we found a type we were looking for, remember
4468 if (header->type == type ||
4469 (type == dns_rdatatype_any &&
4470 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4471 (cname_ok && header->type ==
4472 dns_rdatatype_cname)) {
4474 * We've found the answer.
4477 if (header->type == dns_rdatatype_cname &&
4481 * If we've already got the CNAME RRSIG,
4482 * use it, otherwise change sigtype
4483 * so that we find it.
4485 if (cnamesig != NULL)
4486 foundsig = cnamesig;
4489 RBTDB_RDATATYPE_SIGCNAME;
4490 foundsig = cnamesig;
4492 } else if (header->type == sigtype) {
4494 * We've found the RRSIG rdataset for our
4495 * target type. Remember it.
4498 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4499 header->type == negtype) {
4501 * We've found a negative cache entry.
4504 } else if (header->type == dns_rdatatype_ns) {
4506 * Remember a NS rdataset even if we're
4507 * not specifically looking for it, because
4508 * we might need it later.
4511 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4513 * If we need the NS rdataset, we'll also
4514 * need its signature.
4517 } else if (cname_ok &&
4518 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4520 * If we get a CNAME match, we'll also need
4525 header_prev = header;
4527 header_prev = header;
4532 * We have an exact match for the name, but there are no
4533 * extant rdatasets. That means that this node doesn't
4534 * meaningfully exist, and that we really have a partial match.
4536 NODE_UNLOCK(lock, locktype);
4541 * If we didn't find what we were looking for...
4543 if (found == NULL ||
4544 (DNS_TRUST_ADDITIONAL(found->trust) &&
4545 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4546 (found->trust == dns_trust_glue &&
4547 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4548 (DNS_TRUST_PENDING(found->trust) &&
4549 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4551 * If there is an NS rdataset at this node, then this is the
4554 if (nsheader != NULL) {
4555 if (nodep != NULL) {
4556 new_reference(search.rbtdb, node);
4557 INSIST(!ISC_LINK_LINKED(node, deadlink));
4560 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4562 if (need_headerupdate(nsheader, search.now))
4564 if (nssig != NULL) {
4565 bind_rdataset(search.rbtdb, node, nssig,
4566 search.now, sigrdataset);
4567 if (need_headerupdate(nssig, search.now))
4570 result = DNS_R_DELEGATION;
4575 * Go find the deepest zone cut.
4577 NODE_UNLOCK(lock, locktype);
4582 * We found what we were looking for, or we found a CNAME.
4585 if (nodep != NULL) {
4586 new_reference(search.rbtdb, node);
4587 INSIST(!ISC_LINK_LINKED(node, deadlink));
4591 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4593 * We found a negative cache entry.
4595 if (NXDOMAIN(found))
4596 result = DNS_R_NCACHENXDOMAIN;
4598 result = DNS_R_NCACHENXRRSET;
4599 } else if (type != found->type &&
4600 type != dns_rdatatype_any &&
4601 found->type == dns_rdatatype_cname) {
4603 * We weren't doing an ANY query and we found a CNAME instead
4604 * of the type we were looking for, so we need to indicate
4605 * that result to the caller.
4607 result = DNS_R_CNAME;
4610 * An ordinary successful query!
4612 result = ISC_R_SUCCESS;
4615 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4616 result == DNS_R_NCACHENXRRSET) {
4617 bind_rdataset(search.rbtdb, node, found, search.now,
4619 if (need_headerupdate(found, search.now))
4621 if (foundsig != NULL) {
4622 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4624 if (need_headerupdate(foundsig, search.now))
4625 updatesig = foundsig;
4630 if ((update != NULL || updatesig != NULL) &&
4631 locktype != isc_rwlocktype_write) {
4632 NODE_UNLOCK(lock, locktype);
4633 NODE_LOCK(lock, isc_rwlocktype_write);
4634 locktype = isc_rwlocktype_write;
4636 if (update != NULL && need_headerupdate(update, search.now))
4637 update_header(search.rbtdb, update, search.now);
4638 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4639 update_header(search.rbtdb, updatesig, search.now);
4641 NODE_UNLOCK(lock, locktype);
4644 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4647 * If we found a zonecut but aren't going to use it, we have to
4650 if (search.need_cleanup) {
4651 node = search.zonecut;
4652 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4654 NODE_LOCK(lock, isc_rwlocktype_read);
4655 decrement_reference(search.rbtdb, node, 0,
4656 isc_rwlocktype_read, isc_rwlocktype_none,
4658 NODE_UNLOCK(lock, isc_rwlocktype_read);
4661 dns_rbtnodechain_reset(&search.chain);
4667 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4668 isc_stdtime_t now, dns_dbnode_t **nodep,
4669 dns_name_t *foundname,
4670 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4672 dns_rbtnode_t *node = NULL;
4674 isc_result_t result;
4675 rbtdb_search_t search;
4676 rdatasetheader_t *header, *header_prev, *header_next;
4677 rdatasetheader_t *found, *foundsig;
4678 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4679 isc_rwlocktype_t locktype;
4681 search.rbtdb = (dns_rbtdb_t *)db;
4683 REQUIRE(VALID_RBTDB(search.rbtdb));
4686 isc_stdtime_get(&now);
4688 search.rbtversion = NULL;
4690 search.options = options;
4691 search.copy_name = ISC_FALSE;
4692 search.need_cleanup = ISC_FALSE;
4693 search.wild = ISC_FALSE;
4694 search.zonecut = NULL;
4695 dns_fixedname_init(&search.zonecut_name);
4696 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4699 if ((options & DNS_DBFIND_NOEXACT) != 0)
4700 rbtoptions |= DNS_RBTFIND_NOEXACT;
4702 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4705 * Search down from the root of the tree.
4707 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4708 &search.chain, rbtoptions, NULL, &search);
4710 if (result == DNS_R_PARTIALMATCH) {
4712 result = find_deepest_zonecut(&search, node, nodep, foundname,
4713 rdataset, sigrdataset);
4715 } else if (result != ISC_R_SUCCESS)
4719 * We now go looking for an NS rdataset at the node.
4722 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4723 locktype = isc_rwlocktype_read;
4724 NODE_LOCK(lock, locktype);
4729 for (header = node->data; header != NULL; header = header_next) {
4730 header_next = header->next;
4731 if (header->rdh_ttl <= now) {
4733 * This rdataset is stale. If no one else is using the
4734 * node, we can clean it up right now, otherwise we
4735 * mark it as stale, and the node as dirty, so it will
4736 * get cleaned up later.
4738 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4739 (locktype == isc_rwlocktype_write ||
4740 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4742 * We update the node's status only when we
4743 * can get write access.
4745 locktype = isc_rwlocktype_write;
4747 if (dns_rbtnode_refcurrent(node) == 0) {
4750 mctx = search.rbtdb->common.mctx;
4751 clean_stale_headers(search.rbtdb, mctx,
4753 if (header_prev != NULL)
4757 node->data = header->next;
4758 free_rdataset(search.rbtdb, mctx,
4761 header->attributes |=
4762 RDATASET_ATTR_STALE;
4764 header_prev = header;
4767 header_prev = header;
4768 } else if (EXISTS(header)) {
4770 * If we found a type we were looking for, remember
4773 if (header->type == dns_rdatatype_ns) {
4775 * Remember a NS rdataset even if we're
4776 * not specifically looking for it, because
4777 * we might need it later.
4780 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4782 * If we need the NS rdataset, we'll also
4783 * need its signature.
4787 header_prev = header;
4789 header_prev = header;
4792 if (found == NULL) {
4794 * No NS records here.
4796 NODE_UNLOCK(lock, locktype);
4800 if (nodep != NULL) {
4801 new_reference(search.rbtdb, node);
4802 INSIST(!ISC_LINK_LINKED(node, deadlink));
4806 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4807 if (foundsig != NULL)
4808 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4811 if (need_headerupdate(found, search.now) ||
4812 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4813 if (locktype != isc_rwlocktype_write) {
4814 NODE_UNLOCK(lock, locktype);
4815 NODE_LOCK(lock, isc_rwlocktype_write);
4816 locktype = isc_rwlocktype_write;
4818 if (need_headerupdate(found, search.now))
4819 update_header(search.rbtdb, found, search.now);
4820 if (foundsig != NULL &&
4821 need_headerupdate(foundsig, search.now)) {
4822 update_header(search.rbtdb, foundsig, search.now);
4826 NODE_UNLOCK(lock, locktype);
4829 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4831 INSIST(!search.need_cleanup);
4833 dns_rbtnodechain_reset(&search.chain);
4835 if (result == DNS_R_DELEGATION)
4836 result = ISC_R_SUCCESS;
4842 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4843 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4844 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4847 REQUIRE(VALID_RBTDB(rbtdb));
4848 REQUIRE(targetp != NULL && *targetp == NULL);
4850 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4851 dns_rbtnode_refincrement(node, &refs);
4853 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4859 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4860 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4861 dns_rbtnode_t *node;
4862 isc_boolean_t want_free = ISC_FALSE;
4863 isc_boolean_t inactive = ISC_FALSE;
4864 rbtdb_nodelock_t *nodelock;
4866 REQUIRE(VALID_RBTDB(rbtdb));
4867 REQUIRE(targetp != NULL && *targetp != NULL);
4869 node = (dns_rbtnode_t *)(*targetp);
4870 nodelock = &rbtdb->node_locks[node->locknum];
4872 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4874 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4875 isc_rwlocktype_none, ISC_FALSE)) {
4876 if (isc_refcount_current(&nodelock->references) == 0 &&
4877 nodelock->exiting) {
4878 inactive = ISC_TRUE;
4882 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4887 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4889 if (rbtdb->active == 0)
4890 want_free = ISC_TRUE;
4891 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4893 char buf[DNS_NAME_FORMATSIZE];
4894 if (dns_name_dynamic(&rbtdb->common.origin))
4895 dns_name_format(&rbtdb->common.origin, buf,
4898 strcpy(buf, "<UNKNOWN>");
4899 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4900 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4901 "calling free_rbtdb(%s)", buf);
4902 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4908 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4909 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4910 dns_rbtnode_t *rbtnode = node;
4911 rdatasetheader_t *header;
4912 isc_boolean_t force_expire = ISC_FALSE;
4914 * These are the category and module used by the cache cleaner.
4916 isc_boolean_t log = ISC_FALSE;
4917 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4918 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4919 int level = ISC_LOG_DEBUG(2);
4920 char printname[DNS_NAME_FORMATSIZE];
4922 REQUIRE(VALID_RBTDB(rbtdb));
4925 * Caller must hold a tree lock.
4929 isc_stdtime_get(&now);
4931 if (rbtdb->overmem) {
4934 isc_random_get(&val);
4936 * XXXDCL Could stand to have a better policy, like LRU.
4938 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4941 * Note that 'log' can be true IFF rbtdb->overmem is also true.
4942 * rbtdb->overmem can currently only be true for cache
4943 * databases -- hence all of the "overmem cache" log strings.
4945 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4947 isc_log_write(dns_lctx, category, module, level,
4948 "overmem cache: %s %s",
4949 force_expire ? "FORCE" : "check",
4950 dns_rbt_formatnodename(rbtnode,
4952 sizeof(printname)));
4956 * We may not need write access, but this code path is not performance
4957 * sensitive, so it should be okay to always lock as a writer.
4959 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4960 isc_rwlocktype_write);
4962 for (header = rbtnode->data; header != NULL; header = header->next)
4963 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4965 * We don't check if refcurrent(rbtnode) == 0 and try
4966 * to free like we do in cache_find(), because
4967 * refcurrent(rbtnode) must be non-zero. This is so
4968 * because 'node' is an argument to the function.
4970 header->attributes |= RDATASET_ATTR_STALE;
4973 isc_log_write(dns_lctx, category, module,
4974 level, "overmem cache: stale %s",
4976 } else if (force_expire) {
4977 if (! RETAIN(header)) {
4978 set_ttl(rbtdb, header, 0);
4979 header->attributes |= RDATASET_ATTR_STALE;
4982 isc_log_write(dns_lctx, category, module,
4983 level, "overmem cache: "
4984 "reprieve by RETAIN() %s",
4987 } else if (rbtdb->overmem && log)
4988 isc_log_write(dns_lctx, category, module, level,
4989 "overmem cache: saved %s", printname);
4991 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4992 isc_rwlocktype_write);
4994 return (ISC_R_SUCCESS);
4998 overmem(dns_db_t *db, isc_boolean_t overmem) {
4999 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5001 if (IS_CACHE(rbtdb))
5002 rbtdb->overmem = overmem;
5006 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5007 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5008 dns_rbtnode_t *rbtnode = node;
5009 isc_boolean_t first;
5011 REQUIRE(VALID_RBTDB(rbtdb));
5013 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5014 isc_rwlocktype_read);
5016 fprintf(out, "node %p, %u references, locknum = %u\n",
5017 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5019 if (rbtnode->data != NULL) {
5020 rdatasetheader_t *current, *top_next;
5022 for (current = rbtnode->data; current != NULL;
5023 current = top_next) {
5024 top_next = current->next;
5026 fprintf(out, "\ttype %u", current->type);
5032 "\tserial = %lu, ttl = %u, "
5033 "trust = %u, attributes = %u, "
5035 (unsigned long)current->serial,
5038 current->attributes,
5040 current = current->down;
5041 } while (current != NULL);
5044 fprintf(out, "(empty)\n");
5046 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5047 isc_rwlocktype_read);
5051 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5053 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5054 rbtdb_dbiterator_t *rbtdbiter;
5056 REQUIRE(VALID_RBTDB(rbtdb));
5058 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5059 if (rbtdbiter == NULL)
5060 return (ISC_R_NOMEMORY);
5062 rbtdbiter->common.methods = &dbiterator_methods;
5063 rbtdbiter->common.db = NULL;
5064 dns_db_attach(db, &rbtdbiter->common.db);
5065 rbtdbiter->common.relative_names =
5066 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5067 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5068 rbtdbiter->common.cleaning = ISC_FALSE;
5069 rbtdbiter->paused = ISC_TRUE;
5070 rbtdbiter->tree_locked = isc_rwlocktype_none;
5071 rbtdbiter->result = ISC_R_SUCCESS;
5072 dns_fixedname_init(&rbtdbiter->name);
5073 dns_fixedname_init(&rbtdbiter->origin);
5074 rbtdbiter->node = NULL;
5075 rbtdbiter->delete = 0;
5076 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5077 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5078 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5079 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5080 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5081 if (rbtdbiter->nsec3only)
5082 rbtdbiter->current = &rbtdbiter->nsec3chain;
5084 rbtdbiter->current = &rbtdbiter->chain;
5086 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5088 return (ISC_R_SUCCESS);
5092 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5093 dns_rdatatype_t type, dns_rdatatype_t covers,
5094 isc_stdtime_t now, dns_rdataset_t *rdataset,
5095 dns_rdataset_t *sigrdataset)
5097 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5098 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5099 rdatasetheader_t *header, *header_next, *found, *foundsig;
5100 rbtdb_serial_t serial;
5101 rbtdb_version_t *rbtversion = version;
5102 isc_boolean_t close_version = ISC_FALSE;
5103 rbtdb_rdatatype_t matchtype, sigmatchtype;
5105 REQUIRE(VALID_RBTDB(rbtdb));
5106 REQUIRE(type != dns_rdatatype_any);
5108 if (rbtversion == NULL) {
5109 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5110 close_version = ISC_TRUE;
5112 serial = rbtversion->serial;
5115 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5116 isc_rwlocktype_read);
5120 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5122 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5126 for (header = rbtnode->data; header != NULL; header = header_next) {
5127 header_next = header->next;
5129 if (header->serial <= serial &&
5132 * Is this a "this rdataset doesn't
5135 if (NONEXISTENT(header))
5139 header = header->down;
5140 } while (header != NULL);
5141 if (header != NULL) {
5143 * We have an active, extant rdataset. If it's a
5144 * type we're looking for, remember it.
5146 if (header->type == matchtype) {
5148 if (foundsig != NULL)
5150 } else if (header->type == sigmatchtype) {
5157 if (found != NULL) {
5158 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5159 if (foundsig != NULL)
5160 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5164 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5165 isc_rwlocktype_read);
5168 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5172 return (ISC_R_NOTFOUND);
5174 return (ISC_R_SUCCESS);
5178 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5179 dns_rdatatype_t type, dns_rdatatype_t covers,
5180 isc_stdtime_t now, dns_rdataset_t *rdataset,
5181 dns_rdataset_t *sigrdataset)
5183 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5184 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5185 rdatasetheader_t *header, *header_next, *found, *foundsig;
5186 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5187 isc_result_t result;
5189 isc_rwlocktype_t locktype;
5191 REQUIRE(VALID_RBTDB(rbtdb));
5192 REQUIRE(type != dns_rdatatype_any);
5196 result = ISC_R_SUCCESS;
5199 isc_stdtime_get(&now);
5201 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5202 locktype = isc_rwlocktype_read;
5203 NODE_LOCK(lock, locktype);
5207 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5208 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5210 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5214 for (header = rbtnode->data; header != NULL; header = header_next) {
5215 header_next = header->next;
5216 if (header->rdh_ttl <= now) {
5217 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5218 (locktype == isc_rwlocktype_write ||
5219 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5221 * We update the node's status only when we
5222 * can get write access.
5224 locktype = isc_rwlocktype_write;
5227 * We don't check if refcurrent(rbtnode) == 0
5228 * and try to free like we do in cache_find(),
5229 * because refcurrent(rbtnode) must be
5230 * non-zero. This is so because 'node' is an
5231 * argument to the function.
5233 header->attributes |= RDATASET_ATTR_STALE;
5236 } else if (EXISTS(header)) {
5237 if (header->type == matchtype)
5239 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5240 header->type == negtype)
5242 else if (header->type == sigmatchtype)
5246 if (found != NULL) {
5247 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5248 if (foundsig != NULL)
5249 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5253 NODE_UNLOCK(lock, locktype);
5256 return (ISC_R_NOTFOUND);
5258 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5260 * We found a negative cache entry.
5262 if (NXDOMAIN(found))
5263 result = DNS_R_NCACHENXDOMAIN;
5265 result = DNS_R_NCACHENXRRSET;
5272 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5273 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5275 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5276 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5277 rbtdb_version_t *rbtversion = version;
5278 rbtdb_rdatasetiter_t *iterator;
5281 REQUIRE(VALID_RBTDB(rbtdb));
5283 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5284 if (iterator == NULL)
5285 return (ISC_R_NOMEMORY);
5287 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5289 if (rbtversion == NULL)
5291 (dns_dbversion_t **) (void *)(&rbtversion));
5295 isc_refcount_increment(&rbtversion->references,
5301 isc_stdtime_get(&now);
5305 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5306 iterator->common.methods = &rdatasetiter_methods;
5307 iterator->common.db = db;
5308 iterator->common.node = node;
5309 iterator->common.version = (dns_dbversion_t *)rbtversion;
5310 iterator->common.now = now;
5312 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5314 dns_rbtnode_refincrement(rbtnode, &refs);
5317 iterator->current = NULL;
5319 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5321 *iteratorp = (dns_rdatasetiter_t *)iterator;
5323 return (ISC_R_SUCCESS);
5326 static isc_boolean_t
5327 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5328 rdatasetheader_t *header, *header_next;
5329 isc_boolean_t cname, other_data;
5330 dns_rdatatype_t rdtype;
5333 * The caller must hold the node lock.
5337 * Look for CNAME and "other data" rdatasets active in our version.
5340 other_data = ISC_FALSE;
5341 for (header = node->data; header != NULL; header = header_next) {
5342 header_next = header->next;
5343 if (header->type == dns_rdatatype_cname) {
5345 * Look for an active extant CNAME.
5348 if (header->serial <= serial &&
5351 * Is this a "this rdataset doesn't
5354 if (NONEXISTENT(header))
5358 header = header->down;
5359 } while (header != NULL);
5364 * Look for active extant "other data".
5366 * "Other data" is any rdataset whose type is not
5367 * KEY, NSEC, SIG or RRSIG.
5369 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5370 if (rdtype != dns_rdatatype_key &&
5371 rdtype != dns_rdatatype_sig &&
5372 rdtype != dns_rdatatype_nsec &&
5373 rdtype != dns_rdatatype_rrsig) {
5375 * Is it active and extant?
5378 if (header->serial <= serial &&
5381 * Is this a "this rdataset
5382 * doesn't exist" record?
5384 if (NONEXISTENT(header))
5388 header = header->down;
5389 } while (header != NULL);
5391 other_data = ISC_TRUE;
5396 if (cname && other_data)
5403 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5404 isc_result_t result;
5406 INSIST(!IS_CACHE(rbtdb));
5407 INSIST(newheader->heap_index == 0);
5408 INSIST(!ISC_LINK_LINKED(newheader, link));
5410 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5415 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5416 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5417 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5419 rbtdb_changed_t *changed = NULL;
5420 rdatasetheader_t *topheader, *topheader_prev, *header;
5421 unsigned char *merged;
5422 isc_result_t result;
5423 isc_boolean_t header_nx;
5424 isc_boolean_t newheader_nx;
5425 isc_boolean_t merge;
5426 dns_rdatatype_t rdtype, covers;
5427 rbtdb_rdatatype_t negtype;
5432 * Add an rdatasetheader_t to a node.
5436 * Caller must be holding the node lock.
5439 if ((options & DNS_DBADD_MERGE) != 0) {
5440 REQUIRE(rbtversion != NULL);
5445 if ((options & DNS_DBADD_FORCE) != 0)
5446 trust = dns_trust_ultimate;
5448 trust = newheader->trust;
5450 if (rbtversion != NULL && !loading) {
5452 * We always add a changed record, even if no changes end up
5453 * being made to this node, because it's harmless and
5454 * simplifies the code.
5456 changed = add_changed(rbtdb, rbtversion, rbtnode);
5457 if (changed == NULL) {
5458 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5459 return (ISC_R_NOMEMORY);
5463 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5464 topheader_prev = NULL;
5467 if (rbtversion == NULL && !newheader_nx) {
5468 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5471 * We're adding a negative cache entry.
5473 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5474 if (covers == dns_rdatatype_any) {
5476 * We're adding an negative cache entry
5477 * which covers all types (NXDOMAIN,
5478 * NODATA(QTYPE=ANY)).
5480 * We make all other data stale so that the
5481 * only rdataset that can be found at this
5482 * node is the negative cache entry.
5484 for (topheader = rbtnode->data;
5486 topheader = topheader->next) {
5487 set_ttl(rbtdb, topheader, 0);
5488 topheader->attributes |=
5489 RDATASET_ATTR_STALE;
5494 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5497 * We're adding something that isn't a
5498 * negative cache entry. Look for an extant
5499 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5502 for (topheader = rbtnode->data;
5504 topheader = topheader->next) {
5505 if (topheader->type ==
5506 RBTDB_RDATATYPE_NCACHEANY)
5509 if (topheader != NULL && EXISTS(topheader) &&
5510 topheader->rdh_ttl > now) {
5514 if (trust < topheader->trust) {
5516 * The NXDOMAIN/NODATA(QTYPE=ANY)
5519 free_rdataset(rbtdb,
5522 if (addedrdataset != NULL)
5523 bind_rdataset(rbtdb, rbtnode,
5526 return (DNS_R_UNCHANGED);
5529 * The new rdataset is better. Expire the
5530 * NXDOMAIN/NODATA(QTYPE=ANY).
5532 set_ttl(rbtdb, topheader, 0);
5533 topheader->attributes |= RDATASET_ATTR_STALE;
5538 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5542 for (topheader = rbtnode->data;
5544 topheader = topheader->next) {
5545 if (topheader->type == newheader->type ||
5546 topheader->type == negtype)
5548 topheader_prev = topheader;
5553 * If header isn't NULL, we've found the right type. There may be
5554 * IGNORE rdatasets between the top of the chain and the first real
5555 * data. We skip over them.
5558 while (header != NULL && IGNORE(header))
5559 header = header->down;
5560 if (header != NULL) {
5561 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5564 * Deleting an already non-existent rdataset has no effect.
5566 if (header_nx && newheader_nx) {
5567 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5568 return (DNS_R_UNCHANGED);
5572 * Trying to add an rdataset with lower trust to a cache DB
5573 * has no effect, provided that the cache data isn't stale.
5575 if (rbtversion == NULL && trust < header->trust &&
5576 (header->rdh_ttl > now || header_nx)) {
5577 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5578 if (addedrdataset != NULL)
5579 bind_rdataset(rbtdb, rbtnode, header, now,
5581 return (DNS_R_UNCHANGED);
5585 * Don't merge if a nonexistent rdataset is involved.
5587 if (merge && (header_nx || newheader_nx))
5591 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5592 * that is the union of 'newheader' and 'header'.
5595 unsigned int flags = 0;
5596 INSIST(rbtversion->serial >= header->serial);
5598 result = ISC_R_SUCCESS;
5600 if ((options & DNS_DBADD_EXACT) != 0)
5601 flags |= DNS_RDATASLAB_EXACT;
5602 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5603 newheader->rdh_ttl != header->rdh_ttl)
5604 result = DNS_R_NOTEXACT;
5605 else if (newheader->rdh_ttl != header->rdh_ttl)
5606 flags |= DNS_RDATASLAB_FORCE;
5607 if (result == ISC_R_SUCCESS)
5608 result = dns_rdataslab_merge(
5609 (unsigned char *)header,
5610 (unsigned char *)newheader,
5611 (unsigned int)(sizeof(*newheader)),
5613 rbtdb->common.rdclass,
5614 (dns_rdatatype_t)header->type,
5616 if (result == ISC_R_SUCCESS) {
5618 * If 'header' has the same serial number as
5619 * we do, we could clean it up now if we knew
5620 * that our caller had no references to it.
5621 * We don't know this, however, so we leave it
5622 * alone. It will get cleaned up when
5623 * clean_zone_node() runs.
5625 free_rdataset(rbtdb, rbtdb->common.mctx,
5627 newheader = (rdatasetheader_t *)merged;
5628 if (loading && RESIGN(newheader) &&
5630 header->resign < newheader->resign)
5631 newheader->resign = header->resign;
5633 free_rdataset(rbtdb, rbtdb->common.mctx,
5639 * Don't replace existing NS, A and AAAA RRsets
5640 * in the cache if they are already exist. This
5641 * prevents named being locked to old servers.
5642 * Don't lower trust of existing record if the
5645 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5646 header->type == dns_rdatatype_ns &&
5647 !header_nx && !newheader_nx &&
5648 header->trust >= newheader->trust &&
5649 dns_rdataslab_equalx((unsigned char *)header,
5650 (unsigned char *)newheader,
5651 (unsigned int)(sizeof(*newheader)),
5652 rbtdb->common.rdclass,
5653 (dns_rdatatype_t)header->type)) {
5655 * Honour the new ttl if it is less than the
5658 if (header->rdh_ttl > newheader->rdh_ttl)
5659 set_ttl(rbtdb, header, newheader->rdh_ttl);
5660 if (header->noqname == NULL &&
5661 newheader->noqname != NULL) {
5662 header->noqname = newheader->noqname;
5663 newheader->noqname = NULL;
5665 if (header->closest == NULL &&
5666 newheader->closest != NULL) {
5667 header->closest = newheader->closest;
5668 newheader->closest = NULL;
5670 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5671 if (addedrdataset != NULL)
5672 bind_rdataset(rbtdb, rbtnode, header, now,
5674 return (ISC_R_SUCCESS);
5676 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5677 (header->type == dns_rdatatype_a ||
5678 header->type == dns_rdatatype_aaaa) &&
5679 !header_nx && !newheader_nx &&
5680 header->trust >= newheader->trust &&
5681 dns_rdataslab_equal((unsigned char *)header,
5682 (unsigned char *)newheader,
5683 (unsigned int)(sizeof(*newheader)))) {
5685 * Honour the new ttl if it is less than the
5688 if (header->rdh_ttl > newheader->rdh_ttl)
5689 set_ttl(rbtdb, header, newheader->rdh_ttl);
5690 if (header->noqname == NULL &&
5691 newheader->noqname != NULL) {
5692 header->noqname = newheader->noqname;
5693 newheader->noqname = NULL;
5695 if (header->closest == NULL &&
5696 newheader->closest != NULL) {
5697 header->closest = newheader->closest;
5698 newheader->closest = NULL;
5700 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5701 if (addedrdataset != NULL)
5702 bind_rdataset(rbtdb, rbtnode, header, now,
5704 return (ISC_R_SUCCESS);
5706 INSIST(rbtversion == NULL ||
5707 rbtversion->serial >= topheader->serial);
5708 if (topheader_prev != NULL)
5709 topheader_prev->next = newheader;
5711 rbtnode->data = newheader;
5712 newheader->next = topheader->next;
5715 * There are no other references to 'header' when
5716 * loading, so we MAY clean up 'header' now.
5717 * Since we don't generate changed records when
5718 * loading, we MUST clean up 'header' now.
5720 newheader->down = NULL;
5721 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5723 newheader->down = topheader;
5724 topheader->next = newheader;
5726 if (changed != NULL)
5727 changed->dirty = ISC_TRUE;
5728 if (rbtversion == NULL) {
5729 set_ttl(rbtdb, header, 0);
5730 header->attributes |= RDATASET_ATTR_STALE;
5732 idx = newheader->node->locknum;
5733 if (IS_CACHE(rbtdb)) {
5734 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5737 * XXXMLG We don't check the return value
5738 * here. If it fails, we will not do TTL
5739 * based expiry on this node. However, we
5740 * will do it on the LRU side, so memory
5741 * will not leak... for long.
5743 isc_heap_insert(rbtdb->heaps[idx], newheader);
5744 } else if (RESIGN(newheader))
5745 resign_insert(rbtdb, idx, newheader);
5749 * No non-IGNORED rdatasets of the given type exist at
5754 * If we're trying to delete the type, don't bother.
5757 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5758 return (DNS_R_UNCHANGED);
5761 if (topheader != NULL) {
5763 * We have an list of rdatasets of the given type,
5764 * but they're all marked IGNORE. We simply insert
5765 * the new rdataset at the head of the list.
5767 * Ignored rdatasets cannot occur during loading, so
5771 INSIST(rbtversion == NULL ||
5772 rbtversion->serial >= topheader->serial);
5773 if (topheader_prev != NULL)
5774 topheader_prev->next = newheader;
5776 rbtnode->data = newheader;
5777 newheader->next = topheader->next;
5778 newheader->down = topheader;
5779 topheader->next = newheader;
5781 if (changed != NULL)
5782 changed->dirty = ISC_TRUE;
5785 * No rdatasets of the given type exist at the node.
5787 newheader->next = rbtnode->data;
5788 newheader->down = NULL;
5789 rbtnode->data = newheader;
5791 idx = newheader->node->locknum;
5792 if (IS_CACHE(rbtdb)) {
5793 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5795 isc_heap_insert(rbtdb->heaps[idx], newheader);
5796 } else if (RESIGN(newheader)) {
5797 resign_insert(rbtdb, idx, newheader);
5802 * Check if the node now contains CNAME and other data.
5804 if (rbtversion != NULL &&
5805 cname_and_other_data(rbtnode, rbtversion->serial))
5806 return (DNS_R_CNAMEANDOTHER);
5808 if (addedrdataset != NULL)
5809 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5811 return (ISC_R_SUCCESS);
5814 static inline isc_boolean_t
5815 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5816 rbtdb_rdatatype_t type)
5818 if (IS_CACHE(rbtdb)) {
5819 if (type == dns_rdatatype_dname)
5823 } else if (type == dns_rdatatype_dname ||
5824 (type == dns_rdatatype_ns &&
5825 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5830 static inline isc_result_t
5831 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5832 dns_rdataset_t *rdataset)
5834 struct noqname *noqname;
5835 isc_mem_t *mctx = rbtdb->common.mctx;
5837 dns_rdataset_t neg, negsig;
5838 isc_result_t result;
5841 dns_name_init(&name, NULL);
5842 dns_rdataset_init(&neg);
5843 dns_rdataset_init(&negsig);
5845 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5846 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5848 noqname = isc_mem_get(mctx, sizeof(*noqname));
5849 if (noqname == NULL) {
5850 result = ISC_R_NOMEMORY;
5853 dns_name_init(&noqname->name, NULL);
5854 noqname->neg = NULL;
5855 noqname->negsig = NULL;
5856 noqname->type = neg.type;
5857 result = dns_name_dup(&name, mctx, &noqname->name);
5858 if (result != ISC_R_SUCCESS)
5860 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5861 if (result != ISC_R_SUCCESS)
5863 noqname->neg = r.base;
5864 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5865 if (result != ISC_R_SUCCESS)
5867 noqname->negsig = r.base;
5868 dns_rdataset_disassociate(&neg);
5869 dns_rdataset_disassociate(&negsig);
5870 newheader->noqname = noqname;
5871 return (ISC_R_SUCCESS);
5874 dns_rdataset_disassociate(&neg);
5875 dns_rdataset_disassociate(&negsig);
5876 free_noqname(mctx, &noqname);
5880 static inline isc_result_t
5881 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5882 dns_rdataset_t *rdataset)
5884 struct noqname *closest;
5885 isc_mem_t *mctx = rbtdb->common.mctx;
5887 dns_rdataset_t neg, negsig;
5888 isc_result_t result;
5891 dns_name_init(&name, NULL);
5892 dns_rdataset_init(&neg);
5893 dns_rdataset_init(&negsig);
5895 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5896 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5898 closest = isc_mem_get(mctx, sizeof(*closest));
5899 if (closest == NULL) {
5900 result = ISC_R_NOMEMORY;
5903 dns_name_init(&closest->name, NULL);
5904 closest->neg = NULL;
5905 closest->negsig = NULL;
5906 closest->type = neg.type;
5907 result = dns_name_dup(&name, mctx, &closest->name);
5908 if (result != ISC_R_SUCCESS)
5910 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5911 if (result != ISC_R_SUCCESS)
5913 closest->neg = r.base;
5914 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5915 if (result != ISC_R_SUCCESS)
5917 closest->negsig = r.base;
5918 dns_rdataset_disassociate(&neg);
5919 dns_rdataset_disassociate(&negsig);
5920 newheader->closest = closest;
5921 return (ISC_R_SUCCESS);
5924 dns_rdataset_disassociate(&neg);
5925 dns_rdataset_disassociate(&negsig);
5926 free_noqname(mctx, &closest);
5930 static dns_dbmethods_t zone_methods;
5933 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5934 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5935 dns_rdataset_t *addedrdataset)
5937 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5938 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5939 rbtdb_version_t *rbtversion = version;
5940 isc_region_t region;
5941 rdatasetheader_t *newheader;
5942 rdatasetheader_t *header;
5943 isc_result_t result;
5944 isc_boolean_t delegating;
5945 isc_boolean_t tree_locked = ISC_FALSE;
5947 REQUIRE(VALID_RBTDB(rbtdb));
5949 if (rbtdb->common.methods == &zone_methods)
5950 REQUIRE(((rbtnode->nsec3 &&
5951 (rdataset->type == dns_rdatatype_nsec3 ||
5952 rdataset->covers == dns_rdatatype_nsec3)) ||
5954 rdataset->type != dns_rdatatype_nsec3 &&
5955 rdataset->covers != dns_rdatatype_nsec3)));
5957 if (rbtversion == NULL) {
5959 isc_stdtime_get(&now);
5963 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5965 sizeof(rdatasetheader_t));
5966 if (result != ISC_R_SUCCESS)
5969 newheader = (rdatasetheader_t *)region.base;
5970 init_rdataset(rbtdb, newheader);
5971 set_ttl(rbtdb, newheader, rdataset->ttl + now);
5972 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5974 newheader->attributes = 0;
5975 newheader->noqname = NULL;
5976 newheader->closest = NULL;
5977 newheader->count = init_count++;
5978 newheader->trust = rdataset->trust;
5979 newheader->additional_auth = NULL;
5980 newheader->additional_glue = NULL;
5981 newheader->last_used = now;
5982 newheader->node = rbtnode;
5983 if (rbtversion != NULL) {
5984 newheader->serial = rbtversion->serial;
5987 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5988 newheader->attributes |= RDATASET_ATTR_RESIGN;
5989 newheader->resign = rdataset->resign;
5991 newheader->resign = 0;
5993 newheader->serial = 1;
5994 newheader->resign = 0;
5995 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5996 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
5997 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
5998 newheader->attributes |= RDATASET_ATTR_OPTOUT;
5999 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6000 result = addnoqname(rbtdb, newheader, rdataset);
6001 if (result != ISC_R_SUCCESS) {
6002 free_rdataset(rbtdb, rbtdb->common.mctx,
6007 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6008 result = addclosest(rbtdb, newheader, rdataset);
6009 if (result != ISC_R_SUCCESS) {
6010 free_rdataset(rbtdb, rbtdb->common.mctx,
6018 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6019 * just DNAME for the cache), then we need to set the callback bit
6022 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6023 delegating = ISC_TRUE;
6025 delegating = ISC_FALSE;
6028 * If we're adding a delegation type or the DB is a cache in an overmem
6029 * state, hold an exclusive lock on the tree. In the latter case
6030 * the lock does not necessarily have to be acquired but it will help
6031 * purge stale entries more effectively.
6033 if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6034 tree_locked = ISC_TRUE;
6035 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6038 if (IS_CACHE(rbtdb) && rbtdb->overmem)
6039 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6041 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6042 isc_rwlocktype_write);
6044 if (rbtdb->rrsetstats != NULL) {
6045 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6046 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6049 if (IS_CACHE(rbtdb)) {
6051 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6053 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6054 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6055 expire_header(rbtdb, header, tree_locked);
6058 * If we've been holding a write lock on the tree just for
6059 * cleaning, we can release it now. However, we still need the
6062 if (tree_locked && !delegating) {
6063 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6064 tree_locked = ISC_FALSE;
6068 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6069 addedrdataset, now);
6070 if (result == ISC_R_SUCCESS && delegating)
6071 rbtnode->find_callback = 1;
6073 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6074 isc_rwlocktype_write);
6077 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6080 * Update the zone's secure status. If version is non-NULL
6081 * this is deferred until closeversion() is called.
6083 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6084 iszonesecure(db, version, rbtdb->origin_node);
6090 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6091 dns_rdataset_t *rdataset, unsigned int options,
6092 dns_rdataset_t *newrdataset)
6094 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6095 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6096 rbtdb_version_t *rbtversion = version;
6097 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6098 unsigned char *subresult;
6099 isc_region_t region;
6100 isc_result_t result;
6101 rbtdb_changed_t *changed;
6103 REQUIRE(VALID_RBTDB(rbtdb));
6105 if (rbtdb->common.methods == &zone_methods)
6106 REQUIRE(((rbtnode->nsec3 &&
6107 (rdataset->type == dns_rdatatype_nsec3 ||
6108 rdataset->covers == dns_rdatatype_nsec3)) ||
6110 rdataset->type != dns_rdatatype_nsec3 &&
6111 rdataset->covers != dns_rdatatype_nsec3)));
6113 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6115 sizeof(rdatasetheader_t));
6116 if (result != ISC_R_SUCCESS)
6118 newheader = (rdatasetheader_t *)region.base;
6119 init_rdataset(rbtdb, newheader);
6120 set_ttl(rbtdb, newheader, rdataset->ttl);
6121 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6123 newheader->attributes = 0;
6124 newheader->serial = rbtversion->serial;
6125 newheader->trust = 0;
6126 newheader->noqname = NULL;
6127 newheader->closest = NULL;
6128 newheader->count = init_count++;
6129 newheader->additional_auth = NULL;
6130 newheader->additional_glue = NULL;
6131 newheader->last_used = 0;
6132 newheader->node = rbtnode;
6133 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6134 newheader->attributes |= RDATASET_ATTR_RESIGN;
6135 newheader->resign = rdataset->resign;
6137 newheader->resign = 0;
6139 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6140 isc_rwlocktype_write);
6142 changed = add_changed(rbtdb, rbtversion, rbtnode);
6143 if (changed == NULL) {
6144 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6145 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6146 isc_rwlocktype_write);
6147 return (ISC_R_NOMEMORY);
6150 topheader_prev = NULL;
6151 for (topheader = rbtnode->data;
6153 topheader = topheader->next) {
6154 if (topheader->type == newheader->type)
6156 topheader_prev = topheader;
6159 * If header isn't NULL, we've found the right type. There may be
6160 * IGNORE rdatasets between the top of the chain and the first real
6161 * data. We skip over them.
6164 while (header != NULL && IGNORE(header))
6165 header = header->down;
6166 if (header != NULL && EXISTS(header)) {
6167 unsigned int flags = 0;
6169 result = ISC_R_SUCCESS;
6170 if ((options & DNS_DBSUB_EXACT) != 0) {
6171 flags |= DNS_RDATASLAB_EXACT;
6172 if (newheader->rdh_ttl != header->rdh_ttl)
6173 result = DNS_R_NOTEXACT;
6175 if (result == ISC_R_SUCCESS)
6176 result = dns_rdataslab_subtract(
6177 (unsigned char *)header,
6178 (unsigned char *)newheader,
6179 (unsigned int)(sizeof(*newheader)),
6181 rbtdb->common.rdclass,
6182 (dns_rdatatype_t)header->type,
6184 if (result == ISC_R_SUCCESS) {
6185 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6186 newheader = (rdatasetheader_t *)subresult;
6187 init_rdataset(rbtdb, newheader);
6189 * We have to set the serial since the rdataslab
6190 * subtraction routine copies the reserved portion of
6191 * header, not newheader.
6193 newheader->serial = rbtversion->serial;
6195 * XXXJT: dns_rdataslab_subtract() copied the pointers
6196 * to additional info. We need to clear these fields
6197 * to avoid having duplicated references.
6199 newheader->additional_auth = NULL;
6200 newheader->additional_glue = NULL;
6201 } else if (result == DNS_R_NXRRSET) {
6203 * This subtraction would remove all of the rdata;
6204 * add a nonexistent header instead.
6206 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6207 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6208 if (newheader == NULL) {
6209 result = ISC_R_NOMEMORY;
6212 set_ttl(rbtdb, newheader, 0);
6213 newheader->type = topheader->type;
6214 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6215 newheader->trust = 0;
6216 newheader->serial = rbtversion->serial;
6217 newheader->noqname = NULL;
6218 newheader->closest = NULL;
6219 newheader->count = 0;
6220 newheader->additional_auth = NULL;
6221 newheader->additional_glue = NULL;
6222 newheader->node = rbtnode;
6223 newheader->resign = 0;
6224 newheader->last_used = 0;
6226 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6231 * If we're here, we want to link newheader in front of
6234 INSIST(rbtversion->serial >= topheader->serial);
6235 if (topheader_prev != NULL)
6236 topheader_prev->next = newheader;
6238 rbtnode->data = newheader;
6239 newheader->next = topheader->next;
6240 newheader->down = topheader;
6241 topheader->next = newheader;
6243 changed->dirty = ISC_TRUE;
6246 * The rdataset doesn't exist, so we don't need to do anything
6247 * to satisfy the deletion request.
6249 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6250 if ((options & DNS_DBSUB_EXACT) != 0)
6251 result = DNS_R_NOTEXACT;
6253 result = DNS_R_UNCHANGED;
6256 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6257 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6260 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6261 isc_rwlocktype_write);
6264 * Update the zone's secure status. If version is non-NULL
6265 * this is deferred until closeversion() is called.
6267 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6268 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6274 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6275 dns_rdatatype_t type, dns_rdatatype_t covers)
6277 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6278 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6279 rbtdb_version_t *rbtversion = version;
6280 isc_result_t result;
6281 rdatasetheader_t *newheader;
6283 REQUIRE(VALID_RBTDB(rbtdb));
6285 if (type == dns_rdatatype_any)
6286 return (ISC_R_NOTIMPLEMENTED);
6287 if (type == dns_rdatatype_rrsig && covers == 0)
6288 return (ISC_R_NOTIMPLEMENTED);
6290 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6291 if (newheader == NULL)
6292 return (ISC_R_NOMEMORY);
6293 set_ttl(rbtdb, newheader, 0);
6294 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6295 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6296 newheader->trust = 0;
6297 newheader->noqname = NULL;
6298 newheader->closest = NULL;
6299 newheader->additional_auth = NULL;
6300 newheader->additional_glue = NULL;
6301 if (rbtversion != NULL)
6302 newheader->serial = rbtversion->serial;
6304 newheader->serial = 0;
6305 newheader->count = 0;
6306 newheader->last_used = 0;
6307 newheader->node = rbtnode;
6309 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6310 isc_rwlocktype_write);
6312 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6313 ISC_FALSE, NULL, 0);
6315 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6316 isc_rwlocktype_write);
6319 * Update the zone's secure status. If version is non-NULL
6320 * this is deferred until closeversion() is called.
6322 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6323 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6329 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6330 rbtdb_load_t *loadctx = arg;
6331 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6332 dns_rbtnode_t *node;
6333 isc_result_t result;
6334 isc_region_t region;
6335 rdatasetheader_t *newheader;
6338 * This routine does no node locking. See comments in
6339 * 'load' below for more information on loading and
6345 * SOA records are only allowed at top of zone.
6347 if (rdataset->type == dns_rdatatype_soa &&
6348 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6349 return (DNS_R_NOTZONETOP);
6351 if (rdataset->type != dns_rdatatype_nsec3 &&
6352 rdataset->covers != dns_rdatatype_nsec3)
6353 add_empty_wildcards(rbtdb, name);
6355 if (dns_name_iswildcard(name)) {
6357 * NS record owners cannot legally be wild cards.
6359 if (rdataset->type == dns_rdatatype_ns)
6360 return (DNS_R_INVALIDNS);
6362 * NSEC3 record owners cannot legally be wild cards.
6364 if (rdataset->type == dns_rdatatype_nsec3)
6365 return (DNS_R_INVALIDNSEC3);
6366 result = add_wildcard_magic(rbtdb, name);
6367 if (result != ISC_R_SUCCESS)
6372 if (rdataset->type == dns_rdatatype_nsec3 ||
6373 rdataset->covers == dns_rdatatype_nsec3) {
6374 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6375 if (result == ISC_R_SUCCESS)
6378 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6379 if (result == ISC_R_SUCCESS)
6382 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6384 if (result != ISC_R_EXISTS) {
6385 dns_name_t foundname;
6386 dns_name_init(&foundname, NULL);
6387 dns_rbt_namefromnode(node, &foundname);
6388 #ifdef DNS_RBT_USEHASH
6389 node->locknum = node->hashval % rbtdb->node_lock_count;
6391 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6392 rbtdb->node_lock_count;
6396 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6398 sizeof(rdatasetheader_t));
6399 if (result != ISC_R_SUCCESS)
6401 newheader = (rdatasetheader_t *)region.base;
6402 init_rdataset(rbtdb, newheader);
6403 set_ttl(rbtdb, newheader,
6404 rdataset->ttl + loadctx->now); /* XXX overflow check */
6405 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6407 newheader->attributes = 0;
6408 newheader->trust = rdataset->trust;
6409 newheader->serial = 1;
6410 newheader->noqname = NULL;
6411 newheader->closest = NULL;
6412 newheader->count = init_count++;
6413 newheader->additional_auth = NULL;
6414 newheader->additional_glue = NULL;
6415 newheader->last_used = 0;
6416 newheader->node = node;
6417 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6418 newheader->attributes |= RDATASET_ATTR_RESIGN;
6419 newheader->resign = rdataset->resign;
6421 newheader->resign = 0;
6423 result = add(rbtdb, node, rbtdb->current_version, newheader,
6424 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6425 if (result == ISC_R_SUCCESS &&
6426 delegating_type(rbtdb, node, rdataset->type))
6427 node->find_callback = 1;
6428 else if (result == DNS_R_UNCHANGED)
6429 result = ISC_R_SUCCESS;
6435 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6436 rbtdb_load_t *loadctx;
6439 rbtdb = (dns_rbtdb_t *)db;
6441 REQUIRE(VALID_RBTDB(rbtdb));
6443 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6444 if (loadctx == NULL)
6445 return (ISC_R_NOMEMORY);
6447 loadctx->rbtdb = rbtdb;
6448 if (IS_CACHE(rbtdb))
6449 isc_stdtime_get(&loadctx->now);
6453 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6455 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6457 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6459 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6461 *addp = loading_addrdataset;
6464 return (ISC_R_SUCCESS);
6468 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6469 rbtdb_load_t *loadctx;
6470 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6472 REQUIRE(VALID_RBTDB(rbtdb));
6473 REQUIRE(dbloadp != NULL);
6475 REQUIRE(loadctx->rbtdb == rbtdb);
6477 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6479 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6480 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6482 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6483 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6485 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6488 * If there's a KEY rdataset at the zone origin containing a
6489 * zone key, we consider the zone secure.
6491 if (! IS_CACHE(rbtdb))
6492 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6496 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6498 return (ISC_R_SUCCESS);
6502 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6503 dns_masterformat_t masterformat) {
6506 rbtdb = (dns_rbtdb_t *)db;
6508 REQUIRE(VALID_RBTDB(rbtdb));
6510 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6511 &dns_master_style_default,
6512 filename, masterformat));
6516 delete_callback(void *data, void *arg) {
6517 dns_rbtdb_t *rbtdb = arg;
6518 rdatasetheader_t *current, *next;
6519 unsigned int locknum;
6522 locknum = current->node->locknum;
6523 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6524 while (current != NULL) {
6525 next = current->next;
6526 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6529 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6532 static isc_boolean_t
6533 issecure(dns_db_t *db) {
6535 isc_boolean_t secure;
6537 rbtdb = (dns_rbtdb_t *)db;
6539 REQUIRE(VALID_RBTDB(rbtdb));
6541 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6542 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6543 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6548 static isc_boolean_t
6549 isdnssec(dns_db_t *db) {
6551 isc_boolean_t dnssec;
6553 rbtdb = (dns_rbtdb_t *)db;
6555 REQUIRE(VALID_RBTDB(rbtdb));
6557 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6558 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6559 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6565 nodecount(dns_db_t *db) {
6569 rbtdb = (dns_rbtdb_t *)db;
6571 REQUIRE(VALID_RBTDB(rbtdb));
6573 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6574 count = dns_rbt_nodecount(rbtdb->tree);
6575 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6581 settask(dns_db_t *db, isc_task_t *task) {
6584 rbtdb = (dns_rbtdb_t *)db;
6586 REQUIRE(VALID_RBTDB(rbtdb));
6588 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6589 if (rbtdb->task != NULL)
6590 isc_task_detach(&rbtdb->task);
6592 isc_task_attach(task, &rbtdb->task);
6593 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6596 static isc_boolean_t
6597 ispersistent(dns_db_t *db) {
6603 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6604 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6605 dns_rbtnode_t *onode;
6606 isc_result_t result = ISC_R_SUCCESS;
6608 REQUIRE(VALID_RBTDB(rbtdb));
6609 REQUIRE(nodep != NULL && *nodep == NULL);
6611 /* Note that the access to origin_node doesn't require a DB lock */
6612 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6613 if (onode != NULL) {
6614 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6615 new_reference(rbtdb, onode);
6616 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6618 *nodep = rbtdb->origin_node;
6620 INSIST(IS_CACHE(rbtdb));
6621 result = ISC_R_NOTFOUND;
6628 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6629 isc_uint8_t *flags, isc_uint16_t *iterations,
6630 unsigned char *salt, size_t *salt_length)
6633 isc_result_t result = ISC_R_NOTFOUND;
6634 rbtdb_version_t *rbtversion = version;
6636 rbtdb = (dns_rbtdb_t *)db;
6638 REQUIRE(VALID_RBTDB(rbtdb));
6640 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6642 if (rbtversion == NULL)
6643 rbtversion = rbtdb->current_version;
6645 if (rbtversion->havensec3) {
6647 *hash = rbtversion->hash;
6648 if (salt != NULL && salt_length != NULL) {
6649 REQUIRE(*salt_length >= rbtversion->salt_length);
6650 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6652 if (salt_length != NULL)
6653 *salt_length = rbtversion->salt_length;
6654 if (iterations != NULL)
6655 *iterations = rbtversion->iterations;
6657 *flags = rbtversion->flags;
6658 result = ISC_R_SUCCESS;
6660 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6666 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6667 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6668 isc_stdtime_t oldresign;
6669 isc_result_t result = ISC_R_SUCCESS;
6670 rdatasetheader_t *header;
6672 REQUIRE(VALID_RBTDB(rbtdb));
6673 REQUIRE(!IS_CACHE(rbtdb));
6674 REQUIRE(rdataset != NULL);
6676 header = rdataset->private3;
6679 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6680 isc_rwlocktype_write);
6682 oldresign = header->resign;
6683 header->resign = resign;
6684 if (header->heap_index != 0) {
6685 INSIST(RESIGN(header));
6687 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6688 header->heap_index);
6689 header->heap_index = 0;
6690 } else if (resign < oldresign)
6691 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6692 header->heap_index);
6694 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6695 header->heap_index);
6696 } else if (resign && header->heap_index == 0) {
6697 header->attributes |= RDATASET_ATTR_RESIGN;
6698 result = resign_insert(rbtdb, header->node->locknum, header);
6700 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6701 isc_rwlocktype_write);
6706 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6707 dns_name_t *foundname)
6709 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6710 rdatasetheader_t *header = NULL, *this;
6712 isc_result_t result = ISC_R_NOTFOUND;
6713 unsigned int locknum;
6715 REQUIRE(VALID_RBTDB(rbtdb));
6717 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6719 for (i = 0; i < rbtdb->node_lock_count; i++) {
6720 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6721 this = isc_heap_element(rbtdb->heaps[i], 1);
6723 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6724 isc_rwlocktype_read);
6729 else if (isc_serial_lt(this->resign, header->resign)) {
6730 locknum = header->node->locknum;
6731 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6732 isc_rwlocktype_read);
6735 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6736 isc_rwlocktype_read);
6742 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6744 if (foundname != NULL)
6745 dns_rbt_fullnamefromnode(header->node, foundname);
6747 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6748 isc_rwlocktype_read);
6750 result = ISC_R_SUCCESS;
6753 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6759 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6761 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6762 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6763 dns_rbtnode_t *node;
6764 rdatasetheader_t *header;
6766 REQUIRE(VALID_RBTDB(rbtdb));
6767 REQUIRE(rdataset != NULL);
6768 REQUIRE(rbtdb->future_version == rbtversion);
6769 REQUIRE(rbtversion->writer);
6771 node = rdataset->private2;
6772 header = rdataset->private3;
6775 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6776 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6777 isc_rwlocktype_write);
6779 * Delete from heap and save to re-signed list so that it can
6780 * be restored if we backout of this change.
6782 new_reference(rbtdb, node);
6783 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6784 header->heap_index = 0;
6785 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6787 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6788 isc_rwlocktype_write);
6789 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6792 static dns_stats_t *
6793 getrrsetstats(dns_db_t *db) {
6794 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6796 REQUIRE(VALID_RBTDB(rbtdb));
6797 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6799 return (rbtdb->rrsetstats);
6802 static dns_dbmethods_t zone_methods = {
6841 static dns_dbmethods_t cache_methods = {
6881 #ifdef DNS_RBTDB_VERSION64
6886 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6887 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6888 void *driverarg, dns_db_t **dbp)
6891 isc_result_t result;
6894 isc_boolean_t (*sooner)(void *, void *);
6896 /* Keep the compiler happy. */
6901 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6903 return (ISC_R_NOMEMORY);
6905 memset(rbtdb, '\0', sizeof(*rbtdb));
6906 dns_name_init(&rbtdb->common.origin, NULL);
6907 rbtdb->common.attributes = 0;
6908 if (type == dns_dbtype_cache) {
6909 rbtdb->common.methods = &cache_methods;
6910 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6911 } else if (type == dns_dbtype_stub) {
6912 rbtdb->common.methods = &zone_methods;
6913 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6915 rbtdb->common.methods = &zone_methods;
6916 rbtdb->common.rdclass = rdclass;
6917 rbtdb->common.mctx = NULL;
6919 result = RBTDB_INITLOCK(&rbtdb->lock);
6920 if (result != ISC_R_SUCCESS)
6923 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6924 if (result != ISC_R_SUCCESS)
6928 * Initialize node_lock_count in a generic way to support future
6929 * extension which allows the user to specify this value on creation.
6930 * Note that when specified for a cache DB it must be larger than 1
6931 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6933 if (rbtdb->node_lock_count == 0) {
6934 if (IS_CACHE(rbtdb))
6935 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6937 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6938 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6939 result = ISC_R_RANGE;
6940 goto cleanup_tree_lock;
6942 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6943 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6944 sizeof(rbtdb_nodelock_t));
6945 if (rbtdb->node_locks == NULL) {
6946 result = ISC_R_NOMEMORY;
6947 goto cleanup_tree_lock;
6950 rbtdb->rrsetstats = NULL;
6951 if (IS_CACHE(rbtdb)) {
6952 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6953 if (result != ISC_R_SUCCESS)
6954 goto cleanup_node_locks;
6955 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6956 sizeof(rdatasetheaderlist_t));
6957 if (rbtdb->rdatasets == NULL) {
6958 result = ISC_R_NOMEMORY;
6959 goto cleanup_rrsetstats;
6961 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6962 ISC_LIST_INIT(rbtdb->rdatasets[i]);
6964 rbtdb->rdatasets = NULL;
6969 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6970 sizeof(isc_heap_t *));
6971 if (rbtdb->heaps == NULL) {
6972 result = ISC_R_NOMEMORY;
6973 goto cleanup_rdatasets;
6975 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6976 rbtdb->heaps[i] = NULL;
6977 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6978 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6979 result = isc_heap_create(mctx, sooner, set_index, 0,
6981 if (result != ISC_R_SUCCESS)
6986 * Create deadnode lists.
6988 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6989 sizeof(rbtnodelist_t));
6990 if (rbtdb->deadnodes == NULL) {
6991 result = ISC_R_NOMEMORY;
6994 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6995 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6997 rbtdb->active = rbtdb->node_lock_count;
6999 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7000 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7001 if (result == ISC_R_SUCCESS) {
7002 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7003 if (result != ISC_R_SUCCESS)
7004 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7006 if (result != ISC_R_SUCCESS) {
7008 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7009 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7010 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7012 goto cleanup_deadnodes;
7014 rbtdb->node_locks[i].exiting = ISC_FALSE;
7018 * Attach to the mctx. The database will persist so long as there
7019 * are references to it, and attaching to the mctx ensures that our
7020 * mctx won't disappear out from under us.
7022 isc_mem_attach(mctx, &rbtdb->common.mctx);
7025 * Must be initialized before free_rbtdb() is called.
7027 isc_ondestroy_init(&rbtdb->common.ondest);
7030 * Make a copy of the origin name.
7032 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7033 if (result != ISC_R_SUCCESS) {
7034 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7039 * Make the Red-Black Trees.
7041 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7042 if (result != ISC_R_SUCCESS) {
7043 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7047 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7048 if (result != ISC_R_SUCCESS) {
7049 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7054 * In order to set the node callback bit correctly in zone databases,
7055 * we need to know if the node has the origin name of the zone.
7056 * In loading_addrdataset() we could simply compare the new name
7057 * to the origin name, but this is expensive. Also, we don't know the
7058 * node name in addrdataset(), so we need another way of knowing the
7061 * We now explicitly create a node for the zone's origin, and then
7062 * we simply remember the node's address. This is safe, because
7063 * the top-of-zone node can never be deleted, nor can its address
7066 if (!IS_CACHE(rbtdb)) {
7067 rbtdb->origin_node = NULL;
7068 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7069 &rbtdb->origin_node);
7070 if (result != ISC_R_SUCCESS) {
7071 INSIST(result != ISC_R_EXISTS);
7072 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7075 rbtdb->origin_node->nsec3 = 0;
7077 * We need to give the origin node the right locknum.
7079 dns_name_init(&name, NULL);
7080 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7081 #ifdef DNS_RBT_USEHASH
7082 rbtdb->origin_node->locknum =
7083 rbtdb->origin_node->hashval %
7084 rbtdb->node_lock_count;
7086 rbtdb->origin_node->locknum =
7087 dns_name_hash(&name, ISC_TRUE) %
7088 rbtdb->node_lock_count;
7093 * Misc. Initialization.
7095 result = isc_refcount_init(&rbtdb->references, 1);
7096 if (result != ISC_R_SUCCESS) {
7097 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7100 rbtdb->attributes = 0;
7101 rbtdb->overmem = ISC_FALSE;
7105 * Version Initialization.
7107 rbtdb->current_serial = 1;
7108 rbtdb->least_serial = 1;
7109 rbtdb->next_serial = 2;
7110 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7111 if (rbtdb->current_version == NULL) {
7112 isc_refcount_decrement(&rbtdb->references, NULL);
7113 isc_refcount_destroy(&rbtdb->references);
7114 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7115 return (ISC_R_NOMEMORY);
7117 rbtdb->current_version->secure = dns_db_insecure;
7118 rbtdb->current_version->havensec3 = ISC_FALSE;
7119 rbtdb->current_version->flags = 0;
7120 rbtdb->current_version->iterations = 0;
7121 rbtdb->current_version->hash = 0;
7122 rbtdb->current_version->salt_length = 0;
7123 memset(rbtdb->current_version->salt, 0,
7124 sizeof(rbtdb->current_version->salt));
7125 rbtdb->future_version = NULL;
7126 ISC_LIST_INIT(rbtdb->open_versions);
7128 * Keep the current version in the open list so that list operation
7129 * won't happen in normal lookup operations.
7131 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7133 rbtdb->common.magic = DNS_DB_MAGIC;
7134 rbtdb->common.impmagic = RBTDB_MAGIC;
7136 *dbp = (dns_db_t *)rbtdb;
7138 return (ISC_R_SUCCESS);
7141 isc_mem_put(mctx, rbtdb->deadnodes,
7142 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7145 if (rbtdb->heaps != NULL) {
7146 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7147 if (rbtdb->heaps[i] != NULL)
7148 isc_heap_destroy(&rbtdb->heaps[i]);
7149 isc_mem_put(mctx, rbtdb->heaps,
7150 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7154 if (rbtdb->rdatasets != NULL)
7155 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7156 sizeof(rdatasetheaderlist_t));
7158 if (rbtdb->rrsetstats != NULL)
7159 dns_stats_detach(&rbtdb->rrsetstats);
7162 isc_mem_put(mctx, rbtdb->node_locks,
7163 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7166 isc_rwlock_destroy(&rbtdb->tree_lock);
7169 RBTDB_DESTROYLOCK(&rbtdb->lock);
7172 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7178 * Slabbed Rdataset Methods
7182 rdataset_disassociate(dns_rdataset_t *rdataset) {
7183 dns_db_t *db = rdataset->private1;
7184 dns_dbnode_t *node = rdataset->private2;
7186 detachnode(db, &node);
7190 rdataset_first(dns_rdataset_t *rdataset) {
7191 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7194 count = raw[0] * 256 + raw[1];
7196 rdataset->private5 = NULL;
7197 return (ISC_R_NOMORE);
7200 #if DNS_RDATASET_FIXED
7201 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7202 raw += 2 + (4 * count);
7208 * The privateuint4 field is the number of rdata beyond the
7209 * cursor position, so we decrement the total count by one
7210 * before storing it.
7212 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7213 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7214 * to the first entry in the offset table.
7217 rdataset->privateuint4 = count;
7218 rdataset->private5 = raw;
7220 return (ISC_R_SUCCESS);
7224 rdataset_next(dns_rdataset_t *rdataset) {
7226 unsigned int length;
7227 unsigned char *raw; /* RDATASLAB */
7229 count = rdataset->privateuint4;
7231 return (ISC_R_NOMORE);
7233 rdataset->privateuint4 = count;
7236 * Skip forward one record (length + 4) or one offset (4).
7238 raw = rdataset->private5;
7239 #if DNS_RDATASET_FIXED
7240 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7242 length = raw[0] * 256 + raw[1];
7244 #if DNS_RDATASET_FIXED
7246 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7248 rdataset->private5 = raw + 2; /* length(2) */
7251 return (ISC_R_SUCCESS);
7255 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7256 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7257 #if DNS_RDATASET_FIXED
7258 unsigned int offset;
7260 unsigned int length;
7262 unsigned int flags = 0;
7264 REQUIRE(raw != NULL);
7267 * Find the start of the record if not already in private5
7268 * then skip the length and order fields.
7270 #if DNS_RDATASET_FIXED
7271 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7272 offset = (raw[0] << 24) + (raw[1] << 16) +
7273 (raw[2] << 8) + raw[3];
7274 raw = rdataset->private3;
7278 length = raw[0] * 256 + raw[1];
7279 #if DNS_RDATASET_FIXED
7284 if (rdataset->type == dns_rdatatype_rrsig) {
7285 if (*raw & DNS_RDATASLAB_OFFLINE)
7286 flags |= DNS_RDATA_OFFLINE;
7292 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7293 rdata->flags |= flags;
7297 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7298 dns_db_t *db = source->private1;
7299 dns_dbnode_t *node = source->private2;
7300 dns_dbnode_t *cloned_node = NULL;
7302 attachnode(db, node, &cloned_node);
7306 * Reset iterator state.
7308 target->privateuint4 = 0;
7309 target->private5 = NULL;
7313 rdataset_count(dns_rdataset_t *rdataset) {
7314 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7317 count = raw[0] * 256 + raw[1];
7323 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7324 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7326 dns_db_t *db = rdataset->private1;
7327 dns_dbnode_t *node = rdataset->private2;
7328 dns_dbnode_t *cloned_node;
7329 struct noqname *noqname = rdataset->private6;
7332 attachnode(db, node, &cloned_node);
7333 nsec->methods = &rdataset_methods;
7334 nsec->rdclass = db->rdclass;
7335 nsec->type = noqname->type;
7337 nsec->ttl = rdataset->ttl;
7338 nsec->trust = rdataset->trust;
7339 nsec->private1 = rdataset->private1;
7340 nsec->private2 = rdataset->private2;
7341 nsec->private3 = noqname->neg;
7342 nsec->privateuint4 = 0;
7343 nsec->private5 = NULL;
7344 nsec->private6 = NULL;
7345 nsec->private7 = NULL;
7348 attachnode(db, node, &cloned_node);
7349 nsecsig->methods = &rdataset_methods;
7350 nsecsig->rdclass = db->rdclass;
7351 nsecsig->type = dns_rdatatype_rrsig;
7352 nsecsig->covers = noqname->type;
7353 nsecsig->ttl = rdataset->ttl;
7354 nsecsig->trust = rdataset->trust;
7355 nsecsig->private1 = rdataset->private1;
7356 nsecsig->private2 = rdataset->private2;
7357 nsecsig->private3 = noqname->negsig;
7358 nsecsig->privateuint4 = 0;
7359 nsecsig->private5 = NULL;
7360 nsec->private6 = NULL;
7361 nsec->private7 = NULL;
7363 dns_name_clone(&noqname->name, name);
7365 return (ISC_R_SUCCESS);
7369 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7370 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7372 dns_db_t *db = rdataset->private1;
7373 dns_dbnode_t *node = rdataset->private2;
7374 dns_dbnode_t *cloned_node;
7375 struct noqname *closest = rdataset->private7;
7378 attachnode(db, node, &cloned_node);
7379 nsec->methods = &rdataset_methods;
7380 nsec->rdclass = db->rdclass;
7381 nsec->type = closest->type;
7383 nsec->ttl = rdataset->ttl;
7384 nsec->trust = rdataset->trust;
7385 nsec->private1 = rdataset->private1;
7386 nsec->private2 = rdataset->private2;
7387 nsec->private3 = closest->neg;
7388 nsec->privateuint4 = 0;
7389 nsec->private5 = NULL;
7390 nsec->private6 = NULL;
7391 nsec->private7 = NULL;
7394 attachnode(db, node, &cloned_node);
7395 nsecsig->methods = &rdataset_methods;
7396 nsecsig->rdclass = db->rdclass;
7397 nsecsig->type = dns_rdatatype_rrsig;
7398 nsecsig->covers = closest->type;
7399 nsecsig->ttl = rdataset->ttl;
7400 nsecsig->trust = rdataset->trust;
7401 nsecsig->private1 = rdataset->private1;
7402 nsecsig->private2 = rdataset->private2;
7403 nsecsig->private3 = closest->negsig;
7404 nsecsig->privateuint4 = 0;
7405 nsecsig->private5 = NULL;
7406 nsec->private6 = NULL;
7407 nsec->private7 = NULL;
7409 dns_name_clone(&closest->name, name);
7411 return (ISC_R_SUCCESS);
7415 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7416 dns_rbtdb_t *rbtdb = rdataset->private1;
7417 dns_rbtnode_t *rbtnode = rdataset->private2;
7418 rdatasetheader_t *header = rdataset->private3;
7421 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7422 isc_rwlocktype_write);
7423 header->trust = rdataset->trust = trust;
7424 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7425 isc_rwlocktype_write);
7429 rdataset_expire(dns_rdataset_t *rdataset) {
7430 dns_rbtdb_t *rbtdb = rdataset->private1;
7431 dns_rbtnode_t *rbtnode = rdataset->private2;
7432 rdatasetheader_t *header = rdataset->private3;
7435 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7436 isc_rwlocktype_write);
7437 expire_header(rbtdb, header, ISC_FALSE);
7438 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7439 isc_rwlocktype_write);
7443 * Rdataset Iterator Methods
7447 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7448 rbtdb_rdatasetiter_t *rbtiterator;
7450 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7452 if (rbtiterator->common.version != NULL)
7453 closeversion(rbtiterator->common.db,
7454 &rbtiterator->common.version, ISC_FALSE);
7455 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7456 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7457 sizeof(*rbtiterator));
7463 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7464 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7465 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7466 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7467 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7468 rdatasetheader_t *header, *top_next;
7469 rbtdb_serial_t serial;
7472 if (IS_CACHE(rbtdb)) {
7474 now = rbtiterator->common.now;
7476 serial = rbtversion->serial;
7480 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7481 isc_rwlocktype_read);
7483 for (header = rbtnode->data; header != NULL; header = top_next) {
7484 top_next = header->next;
7486 if (header->serial <= serial && !IGNORE(header)) {
7488 * Is this a "this rdataset doesn't exist"
7489 * record? Or is it too old in the cache?
7491 * Note: unlike everywhere else, we
7492 * check for now > header->rdh_ttl instead
7493 * of now >= header->rdh_ttl. This allows
7494 * ANY and RRSIG queries for 0 TTL
7495 * rdatasets to work.
7497 if (NONEXISTENT(header) ||
7498 (now != 0 && now > header->rdh_ttl))
7502 header = header->down;
7503 } while (header != NULL);
7508 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7509 isc_rwlocktype_read);
7511 rbtiterator->current = header;
7514 return (ISC_R_NOMORE);
7516 return (ISC_R_SUCCESS);
7520 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7521 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7522 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7523 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7524 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7525 rdatasetheader_t *header, *top_next;
7526 rbtdb_serial_t serial;
7528 rbtdb_rdatatype_t type, negtype;
7529 dns_rdatatype_t rdtype, covers;
7531 header = rbtiterator->current;
7533 return (ISC_R_NOMORE);
7535 if (IS_CACHE(rbtdb)) {
7537 now = rbtiterator->common.now;
7539 serial = rbtversion->serial;
7543 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7544 isc_rwlocktype_read);
7546 type = header->type;
7547 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7549 covers = RBTDB_RDATATYPE_EXT(header->type);
7550 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7552 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7553 for (header = header->next; header != NULL; header = top_next) {
7554 top_next = header->next;
7556 * If not walking back up the down list.
7558 if (header->type != type && header->type != negtype) {
7560 if (header->serial <= serial &&
7563 * Is this a "this rdataset doesn't
7566 * Note: unlike everywhere else, we
7567 * check for now > header->ttl instead
7568 * of now >= header->ttl. This allows
7569 * ANY and RRSIG queries for 0 TTL
7570 * rdatasets to work.
7572 if ((header->attributes &
7573 RDATASET_ATTR_NONEXISTENT) != 0 ||
7574 (now != 0 && now > header->rdh_ttl))
7578 header = header->down;
7579 } while (header != NULL);
7585 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7586 isc_rwlocktype_read);
7588 rbtiterator->current = header;
7591 return (ISC_R_NOMORE);
7593 return (ISC_R_SUCCESS);
7597 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7598 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7599 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7600 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7601 rdatasetheader_t *header;
7603 header = rbtiterator->current;
7604 REQUIRE(header != NULL);
7606 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7607 isc_rwlocktype_read);
7609 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7612 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7613 isc_rwlocktype_read);
7618 * Database Iterator Methods
7622 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7623 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7624 dns_rbtnode_t *node = rbtdbiter->node;
7629 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7630 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7634 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7635 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7636 dns_rbtnode_t *node = rbtdbiter->node;
7642 lock = &rbtdb->node_locks[node->locknum].lock;
7643 NODE_LOCK(lock, isc_rwlocktype_read);
7644 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7645 rbtdbiter->tree_locked, ISC_FALSE);
7646 NODE_UNLOCK(lock, isc_rwlocktype_read);
7648 rbtdbiter->node = NULL;
7652 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7653 dns_rbtnode_t *node;
7654 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7655 isc_boolean_t was_read_locked = ISC_FALSE;
7659 if (rbtdbiter->delete != 0) {
7661 * Note that "%d node of %d in tree" can report things like
7662 * "flush_deletions: 59 nodes of 41 in tree". This means
7663 * That some nodes appear on the deletions list more than
7664 * once. Only the last occurence will actually be deleted.
7666 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7667 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7668 "flush_deletions: %d nodes of %d in tree",
7670 dns_rbt_nodecount(rbtdb->tree));
7672 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7673 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7674 was_read_locked = ISC_TRUE;
7676 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7677 rbtdbiter->tree_locked = isc_rwlocktype_write;
7679 for (i = 0; i < rbtdbiter->delete; i++) {
7680 node = rbtdbiter->deletions[i];
7681 lock = &rbtdb->node_locks[node->locknum].lock;
7683 NODE_LOCK(lock, isc_rwlocktype_read);
7684 decrement_reference(rbtdb, node, 0,
7685 isc_rwlocktype_read,
7686 rbtdbiter->tree_locked, ISC_FALSE);
7687 NODE_UNLOCK(lock, isc_rwlocktype_read);
7690 rbtdbiter->delete = 0;
7692 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7693 if (was_read_locked) {
7694 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7695 rbtdbiter->tree_locked = isc_rwlocktype_read;
7698 rbtdbiter->tree_locked = isc_rwlocktype_none;
7704 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7705 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7707 REQUIRE(rbtdbiter->paused);
7708 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7710 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7711 rbtdbiter->tree_locked = isc_rwlocktype_read;
7713 rbtdbiter->paused = ISC_FALSE;
7717 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7718 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7719 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7720 dns_db_t *db = NULL;
7722 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7723 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7724 rbtdbiter->tree_locked = isc_rwlocktype_none;
7726 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7728 dereference_iter_node(rbtdbiter);
7730 flush_deletions(rbtdbiter);
7732 dns_db_attach(rbtdbiter->common.db, &db);
7733 dns_db_detach(&rbtdbiter->common.db);
7735 dns_rbtnodechain_reset(&rbtdbiter->chain);
7736 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7737 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7744 dbiterator_first(dns_dbiterator_t *iterator) {
7745 isc_result_t result;
7746 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7747 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7748 dns_name_t *name, *origin;
7750 if (rbtdbiter->result != ISC_R_SUCCESS &&
7751 rbtdbiter->result != ISC_R_NOMORE)
7752 return (rbtdbiter->result);
7754 if (rbtdbiter->paused)
7755 resume_iteration(rbtdbiter);
7757 dereference_iter_node(rbtdbiter);
7759 name = dns_fixedname_name(&rbtdbiter->name);
7760 origin = dns_fixedname_name(&rbtdbiter->origin);
7761 dns_rbtnodechain_reset(&rbtdbiter->chain);
7762 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7764 if (rbtdbiter->nsec3only) {
7765 rbtdbiter->current = &rbtdbiter->nsec3chain;
7766 result = dns_rbtnodechain_first(rbtdbiter->current,
7767 rbtdb->nsec3, name, origin);
7769 rbtdbiter->current = &rbtdbiter->chain;
7770 result = dns_rbtnodechain_first(rbtdbiter->current,
7771 rbtdb->tree, name, origin);
7772 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7773 rbtdbiter->current = &rbtdbiter->nsec3chain;
7774 result = dns_rbtnodechain_first(rbtdbiter->current,
7779 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7780 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7781 NULL, &rbtdbiter->node);
7782 if (result == ISC_R_SUCCESS) {
7783 rbtdbiter->new_origin = ISC_TRUE;
7784 reference_iter_node(rbtdbiter);
7787 INSIST(result == ISC_R_NOTFOUND);
7788 result = ISC_R_NOMORE; /* The tree is empty. */
7791 rbtdbiter->result = result;
7797 dbiterator_last(dns_dbiterator_t *iterator) {
7798 isc_result_t result;
7799 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7800 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7801 dns_name_t *name, *origin;
7803 if (rbtdbiter->result != ISC_R_SUCCESS &&
7804 rbtdbiter->result != ISC_R_NOMORE)
7805 return (rbtdbiter->result);
7807 if (rbtdbiter->paused)
7808 resume_iteration(rbtdbiter);
7810 dereference_iter_node(rbtdbiter);
7812 name = dns_fixedname_name(&rbtdbiter->name);
7813 origin = dns_fixedname_name(&rbtdbiter->origin);
7814 dns_rbtnodechain_reset(&rbtdbiter->chain);
7815 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7817 result = ISC_R_NOTFOUND;
7818 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7819 rbtdbiter->current = &rbtdbiter->nsec3chain;
7820 result = dns_rbtnodechain_last(rbtdbiter->current,
7821 rbtdb->nsec3, name, origin);
7823 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7824 rbtdbiter->current = &rbtdbiter->chain;
7825 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7828 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7829 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7830 NULL, &rbtdbiter->node);
7831 if (result == ISC_R_SUCCESS) {
7832 rbtdbiter->new_origin = ISC_TRUE;
7833 reference_iter_node(rbtdbiter);
7836 INSIST(result == ISC_R_NOTFOUND);
7837 result = ISC_R_NOMORE; /* The tree is empty. */
7840 rbtdbiter->result = result;
7846 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7847 isc_result_t result;
7848 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7849 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7850 dns_name_t *iname, *origin;
7852 if (rbtdbiter->result != ISC_R_SUCCESS &&
7853 rbtdbiter->result != ISC_R_NOTFOUND &&
7854 rbtdbiter->result != ISC_R_NOMORE)
7855 return (rbtdbiter->result);
7857 if (rbtdbiter->paused)
7858 resume_iteration(rbtdbiter);
7860 dereference_iter_node(rbtdbiter);
7862 iname = dns_fixedname_name(&rbtdbiter->name);
7863 origin = dns_fixedname_name(&rbtdbiter->origin);
7864 dns_rbtnodechain_reset(&rbtdbiter->chain);
7865 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7867 if (rbtdbiter->nsec3only) {
7868 rbtdbiter->current = &rbtdbiter->nsec3chain;
7869 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7872 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7873 } else if (rbtdbiter->nonsec3) {
7874 rbtdbiter->current = &rbtdbiter->chain;
7875 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7878 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7881 * Stay on main chain if not found on either chain.
7883 rbtdbiter->current = &rbtdbiter->chain;
7884 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7887 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7888 if (result == DNS_R_PARTIALMATCH) {
7889 dns_rbtnode_t *node = NULL;
7890 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7891 &node, &rbtdbiter->nsec3chain,
7892 DNS_RBTFIND_EMPTYDATA,
7894 if (result == ISC_R_SUCCESS) {
7895 rbtdbiter->node = node;
7896 rbtdbiter->current = &rbtdbiter->nsec3chain;
7902 if (result == ISC_R_SUCCESS) {
7903 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7905 if (result == ISC_R_SUCCESS) {
7906 rbtdbiter->new_origin = ISC_TRUE;
7907 reference_iter_node(rbtdbiter);
7909 } else if (result == DNS_R_PARTIALMATCH) {
7910 result = ISC_R_NOTFOUND;
7911 rbtdbiter->node = NULL;
7914 rbtdbiter->result = result;
7916 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7917 isc_result_t tresult;
7918 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7920 if (tresult == ISC_R_SUCCESS) {
7921 rbtdbiter->new_origin = ISC_TRUE;
7922 reference_iter_node(rbtdbiter);
7925 rbtdbiter->node = NULL;
7928 rbtdbiter->node = NULL;
7930 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7931 ISC_R_SUCCESS : result;
7938 dbiterator_prev(dns_dbiterator_t *iterator) {
7939 isc_result_t result;
7940 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7941 dns_name_t *name, *origin;
7942 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7944 REQUIRE(rbtdbiter->node != NULL);
7946 if (rbtdbiter->result != ISC_R_SUCCESS)
7947 return (rbtdbiter->result);
7949 if (rbtdbiter->paused)
7950 resume_iteration(rbtdbiter);
7952 name = dns_fixedname_name(&rbtdbiter->name);
7953 origin = dns_fixedname_name(&rbtdbiter->origin);
7954 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7955 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7956 !rbtdbiter->nonsec3 &&
7957 &rbtdbiter->nsec3chain == rbtdbiter->current) {
7958 rbtdbiter->current = &rbtdbiter->chain;
7959 dns_rbtnodechain_reset(rbtdbiter->current);
7960 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7962 if (result == ISC_R_NOTFOUND)
7963 result = ISC_R_NOMORE;
7966 dereference_iter_node(rbtdbiter);
7968 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7969 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7970 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7971 NULL, &rbtdbiter->node);
7974 if (result == ISC_R_SUCCESS)
7975 reference_iter_node(rbtdbiter);
7977 rbtdbiter->result = result;
7983 dbiterator_next(dns_dbiterator_t *iterator) {
7984 isc_result_t result;
7985 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7986 dns_name_t *name, *origin;
7987 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7989 REQUIRE(rbtdbiter->node != NULL);
7991 if (rbtdbiter->result != ISC_R_SUCCESS)
7992 return (rbtdbiter->result);
7994 if (rbtdbiter->paused)
7995 resume_iteration(rbtdbiter);
7997 name = dns_fixedname_name(&rbtdbiter->name);
7998 origin = dns_fixedname_name(&rbtdbiter->origin);
7999 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8000 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8001 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8002 rbtdbiter->current = &rbtdbiter->nsec3chain;
8003 dns_rbtnodechain_reset(rbtdbiter->current);
8004 result = dns_rbtnodechain_first(rbtdbiter->current,
8005 rbtdb->nsec3, name, origin);
8006 if (result == ISC_R_NOTFOUND)
8007 result = ISC_R_NOMORE;
8010 dereference_iter_node(rbtdbiter);
8012 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8013 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8014 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8015 NULL, &rbtdbiter->node);
8017 if (result == ISC_R_SUCCESS)
8018 reference_iter_node(rbtdbiter);
8020 rbtdbiter->result = result;
8026 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8029 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8030 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8031 dns_rbtnode_t *node = rbtdbiter->node;
8032 isc_result_t result;
8033 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8034 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8036 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8037 REQUIRE(rbtdbiter->node != NULL);
8039 if (rbtdbiter->paused)
8040 resume_iteration(rbtdbiter);
8043 if (rbtdbiter->common.relative_names)
8045 result = dns_name_concatenate(nodename, origin, name, NULL);
8046 if (result != ISC_R_SUCCESS)
8048 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8049 result = DNS_R_NEWORIGIN;
8051 result = ISC_R_SUCCESS;
8053 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8054 new_reference(rbtdb, node);
8055 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8057 *nodep = rbtdbiter->node;
8059 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8060 isc_result_t expire_result;
8063 * If the deletion array is full, flush it before trying
8064 * to expire the current node. The current node can't
8065 * fully deleted while the iteration cursor is still on it.
8067 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8068 flush_deletions(rbtdbiter);
8070 expire_result = expirenode(iterator->db, *nodep, 0);
8073 * expirenode() currently always returns success.
8075 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8078 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8079 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8080 dns_rbtnode_refincrement(node, &refs);
8082 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8090 dbiterator_pause(dns_dbiterator_t *iterator) {
8091 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8092 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8094 if (rbtdbiter->result != ISC_R_SUCCESS &&
8095 rbtdbiter->result != ISC_R_NOMORE)
8096 return (rbtdbiter->result);
8098 if (rbtdbiter->paused)
8099 return (ISC_R_SUCCESS);
8101 rbtdbiter->paused = ISC_TRUE;
8103 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8104 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8105 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8106 rbtdbiter->tree_locked = isc_rwlocktype_none;
8109 flush_deletions(rbtdbiter);
8111 return (ISC_R_SUCCESS);
8115 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8116 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8117 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8119 if (rbtdbiter->result != ISC_R_SUCCESS)
8120 return (rbtdbiter->result);
8122 return (dns_name_copy(origin, name, NULL));
8126 * Additional cache routines.
8129 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8130 dns_rdatatype_t qtype, dns_acache_t *acache,
8131 dns_zone_t **zonep, dns_db_t **dbp,
8132 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8133 dns_name_t *fname, dns_message_t *msg,
8136 dns_rbtdb_t *rbtdb = rdataset->private1;
8137 dns_rbtnode_t *rbtnode = rdataset->private2;
8138 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8139 unsigned int current_count = rdataset->privateuint4;
8141 rdatasetheader_t *header;
8142 nodelock_t *nodelock;
8143 unsigned int total_count;
8144 acachectl_t *acarray;
8145 dns_acacheentry_t *entry;
8146 isc_result_t result;
8148 UNUSED(qtype); /* we do not use this value at least for now */
8151 header = (struct rdatasetheader *)(raw - sizeof(*header));
8153 total_count = raw[0] * 256 + raw[1];
8154 INSIST(total_count > current_count);
8155 count = total_count - current_count - 1;
8159 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8160 NODE_LOCK(nodelock, isc_rwlocktype_read);
8163 case dns_rdatasetadditional_fromauth:
8164 acarray = header->additional_auth;
8166 case dns_rdatasetadditional_fromcache:
8169 case dns_rdatasetadditional_fromglue:
8170 acarray = header->additional_glue;
8176 if (acarray == NULL) {
8177 if (type != dns_rdatasetadditional_fromcache)
8178 dns_acache_countquerymiss(acache);
8179 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8180 return (ISC_R_NOTFOUND);
8183 if (acarray[count].entry == NULL) {
8184 dns_acache_countquerymiss(acache);
8185 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8186 return (ISC_R_NOTFOUND);
8190 dns_acache_attachentry(acarray[count].entry, &entry);
8192 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8194 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8195 nodep, fname, msg, now);
8197 dns_acache_detachentry(&entry);
8203 acache_callback(dns_acacheentry_t *entry, void **arg) {
8205 dns_rbtnode_t *rbtnode;
8206 nodelock_t *nodelock;
8207 acachectl_t *acarray = NULL;
8208 acache_cbarg_t *cbarg;
8211 REQUIRE(arg != NULL);
8215 * The caller must hold the entry lock.
8218 rbtdb = (dns_rbtdb_t *)cbarg->db;
8219 rbtnode = (dns_rbtnode_t *)cbarg->node;
8221 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8222 NODE_LOCK(nodelock, isc_rwlocktype_write);
8224 switch (cbarg->type) {
8225 case dns_rdatasetadditional_fromauth:
8226 acarray = cbarg->header->additional_auth;
8228 case dns_rdatasetadditional_fromglue:
8229 acarray = cbarg->header->additional_glue;
8235 count = cbarg->count;
8236 if (acarray != NULL && acarray[count].entry == entry) {
8237 acarray[count].entry = NULL;
8238 INSIST(acarray[count].cbarg == cbarg);
8239 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8240 acarray[count].cbarg = NULL;
8242 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8244 dns_acache_detachentry(&entry);
8246 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8248 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8249 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8255 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8256 acache_cbarg_t **cbargp)
8258 acache_cbarg_t *cbarg;
8260 REQUIRE(mctx != NULL);
8261 REQUIRE(entry != NULL);
8262 REQUIRE(cbargp != NULL && *cbargp != NULL);
8266 dns_acache_cancelentry(entry);
8267 dns_db_detachnode(cbarg->db, &cbarg->node);
8268 dns_db_detach(&cbarg->db);
8270 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8276 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8277 dns_rdatatype_t qtype, dns_acache_t *acache,
8278 dns_zone_t *zone, dns_db_t *db,
8279 dns_dbversion_t *version, dns_dbnode_t *node,
8282 dns_rbtdb_t *rbtdb = rdataset->private1;
8283 dns_rbtnode_t *rbtnode = rdataset->private2;
8284 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8285 unsigned int current_count = rdataset->privateuint4;
8286 rdatasetheader_t *header;
8287 unsigned int total_count, count;
8288 nodelock_t *nodelock;
8289 isc_result_t result;
8290 acachectl_t *acarray;
8291 dns_acacheentry_t *newentry, *oldentry = NULL;
8292 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8296 if (type == dns_rdatasetadditional_fromcache)
8297 return (ISC_R_SUCCESS);
8299 header = (struct rdatasetheader *)(raw - sizeof(*header));
8301 total_count = raw[0] * 256 + raw[1];
8302 INSIST(total_count > current_count);
8303 count = total_count - current_count - 1; /* should be private data */
8305 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8306 if (newcbarg == NULL)
8307 return (ISC_R_NOMEMORY);
8308 newcbarg->type = type;
8309 newcbarg->count = count;
8310 newcbarg->header = header;
8311 newcbarg->db = NULL;
8312 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8313 newcbarg->node = NULL;
8314 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8317 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8318 acache_callback, newcbarg, &newentry);
8319 if (result != ISC_R_SUCCESS)
8321 /* Set cache data in the new entry. */
8322 result = dns_acache_setentry(acache, newentry, zone, db,
8323 version, node, fname);
8324 if (result != ISC_R_SUCCESS)
8327 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8328 NODE_LOCK(nodelock, isc_rwlocktype_write);
8332 case dns_rdatasetadditional_fromauth:
8333 acarray = header->additional_auth;
8335 case dns_rdatasetadditional_fromglue:
8336 acarray = header->additional_glue;
8342 if (acarray == NULL) {
8345 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8346 sizeof(acachectl_t));
8348 if (acarray == NULL) {
8349 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8353 for (i = 0; i < total_count; i++) {
8354 acarray[i].entry = NULL;
8355 acarray[i].cbarg = NULL;
8359 case dns_rdatasetadditional_fromauth:
8360 header->additional_auth = acarray;
8362 case dns_rdatasetadditional_fromglue:
8363 header->additional_glue = acarray;
8369 if (acarray[count].entry != NULL) {
8371 * Swap the entry. Delay cleaning-up the old entry since
8372 * it would require a node lock.
8374 oldentry = acarray[count].entry;
8375 INSIST(acarray[count].cbarg != NULL);
8376 oldcbarg = acarray[count].cbarg;
8378 acarray[count].entry = newentry;
8379 acarray[count].cbarg = newcbarg;
8381 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8383 if (oldentry != NULL) {
8384 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8385 dns_acache_detachentry(&oldentry);
8388 return (ISC_R_SUCCESS);
8391 if (newcbarg != NULL) {
8392 if (newentry != NULL) {
8393 acache_cancelentry(rbtdb->common.mctx, newentry,
8395 dns_acache_detachentry(&newentry);
8397 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8398 dns_db_detach(&newcbarg->db);
8399 isc_mem_put(rbtdb->common.mctx, newcbarg,
8408 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8409 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8411 dns_rbtdb_t *rbtdb = rdataset->private1;
8412 dns_rbtnode_t *rbtnode = rdataset->private2;
8413 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8414 unsigned int current_count = rdataset->privateuint4;
8415 rdatasetheader_t *header;
8416 nodelock_t *nodelock;
8417 unsigned int total_count, count;
8418 acachectl_t *acarray;
8419 dns_acacheentry_t *entry;
8420 acache_cbarg_t *cbarg;
8422 UNUSED(qtype); /* we do not use this value at least for now */
8425 if (type == dns_rdatasetadditional_fromcache)
8426 return (ISC_R_SUCCESS);
8428 header = (struct rdatasetheader *)(raw - sizeof(*header));
8430 total_count = raw[0] * 256 + raw[1];
8431 INSIST(total_count > current_count);
8432 count = total_count - current_count - 1;
8437 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8438 NODE_LOCK(nodelock, isc_rwlocktype_write);
8441 case dns_rdatasetadditional_fromauth:
8442 acarray = header->additional_auth;
8444 case dns_rdatasetadditional_fromglue:
8445 acarray = header->additional_glue;
8451 if (acarray == NULL) {
8452 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8453 return (ISC_R_NOTFOUND);
8456 entry = acarray[count].entry;
8457 if (entry == NULL) {
8458 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8459 return (ISC_R_NOTFOUND);
8462 acarray[count].entry = NULL;
8463 cbarg = acarray[count].cbarg;
8464 acarray[count].cbarg = NULL;
8466 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8468 if (entry != NULL) {
8470 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8471 dns_acache_detachentry(&entry);
8474 return (ISC_R_SUCCESS);
8478 * Routines for LRU-based cache management.
8482 * See if a given cache entry that is being reused needs to be updated
8483 * in the LRU-list. From the LRU management point of view, this function is
8484 * expected to return true for almost all cases. When used with threads,
8485 * however, this may cause a non-negligible performance penalty because a
8486 * writer lock will have to be acquired before updating the list.
8487 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8488 * function returns true if the entry has not been updated for some period of
8489 * time. We differentiate the NS or glue address case and the others since
8490 * experiments have shown that the former tends to be accessed relatively
8491 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8492 * may cause external queries at a higher level zone, involving more
8495 * Caller must hold the node (read or write) lock.
8497 static inline isc_boolean_t
8498 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8499 if ((header->attributes &
8500 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8503 #if DNS_RBTDB_LIMITLRUUPDATE
8504 if (header->type == dns_rdatatype_ns ||
8505 (header->trust == dns_trust_glue &&
8506 (header->type == dns_rdatatype_a ||
8507 header->type == dns_rdatatype_aaaa))) {
8509 * Glue records are updated if at least 60 seconds have passed
8510 * since the previous update time.
8512 return (header->last_used + 60 <= now);
8515 /* Other records are updated if 5 minutes have passed. */
8516 return (header->last_used + 300 <= now);
8525 * Update the timestamp of a given cache entry and move it to the head
8526 * of the corresponding LRU list.
8528 * Caller must hold the node (write) lock.
8530 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8533 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8536 INSIST(IS_CACHE(rbtdb));
8538 /* To be checked: can we really assume this? XXXMLG */
8539 INSIST(ISC_LINK_LINKED(header, link));
8541 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8542 header->last_used = now;
8543 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8547 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8548 * under an overmem condition. To recover from this condition quickly, up to
8549 * 2 entries will be purged. This process is triggered while adding a new
8550 * entry, and we specifically avoid purging entries in the same LRU bucket as
8551 * the one to which the new entry will belong. Otherwise, we might purge
8552 * entries of the same name of different RR types while adding RRsets from a
8553 * single response (consider the case where we're adding A and AAAA glue records
8554 * of the same NS name).
8557 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8558 isc_stdtime_t now, isc_boolean_t tree_locked)
8560 rdatasetheader_t *header, *header_prev;
8561 unsigned int locknum;
8564 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8565 locknum != locknum_start && purgecount > 0;
8566 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8567 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8568 isc_rwlocktype_write);
8570 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8571 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8572 expire_header(rbtdb, header, tree_locked);
8576 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8577 header != NULL && purgecount > 0;
8578 header = header_prev) {
8579 header_prev = ISC_LIST_PREV(header, link);
8581 * Unlink the entry at this point to avoid checking it
8582 * again even if it's currently used someone else and
8583 * cannot be purged at this moment. This entry won't be
8584 * referenced any more (so unlinking is safe) since the
8585 * TTL was reset to 0.
8587 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8589 expire_header(rbtdb, header, tree_locked);
8593 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8594 isc_rwlocktype_write);
8599 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8600 isc_boolean_t tree_locked)
8602 set_ttl(rbtdb, header, 0);
8603 header->attributes |= RDATASET_ATTR_STALE;
8604 header->node->dirty = 1;
8607 * Caller must hold the node (write) lock.
8610 if (dns_rbtnode_refcurrent(header->node) == 0) {
8612 * If no one else is using the node, we can clean it up now.
8613 * We first need to gain a new reference to the node to meet a
8614 * requirement of decrement_reference().
8616 new_reference(rbtdb, header->node);
8617 decrement_reference(rbtdb, header->node, 0,
8618 isc_rwlocktype_write,
8619 tree_locked ? isc_rwlocktype_write :
8620 isc_rwlocktype_none, ISC_FALSE);