2 * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.270.12.16.10.6 2010/11/16 07:46:23 marka Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
65 #include <dns/zonekey.h>
67 #ifdef DNS_RBTDB_VERSION64
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
80 * Note that "impmagic" is not the first four bytes of the struct, so
81 * ISC_MAGIC_VALID cannot be used.
83 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
84 (rbtdb)->common.impmagic == RBTDB_MAGIC)
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t rbtdb_serial_t;
89 * Make casting easier in symbolic debuggers by using different names
90 * for the 64 bit version.
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
96 typedef isc_uint32_t rbtdb_serial_t;
99 typedef isc_uint32_t rbtdb_rdatatype_t;
101 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
105 #define RBTDB_RDATATYPE_SIGNSEC \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
119 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120 * Using rwlock is effective with regard to lookup performance only when
121 * it is implemented in an efficient way.
122 * Otherwise, it is generally wise to stick to the simple locking since rwlock
123 * would require more memory or can even make lookups slower due to its own
124 * overhead (when it internally calls mutex locks).
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
129 #define DNS_RBTDB_USERWLOCK 0
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
138 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t) LOCK(l)
141 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
145 * Since node locking is sensitive to both performance and memory footprint,
146 * we need some trick here. If we have both high-performance rwlock and
147 * high performance and small-memory reference counters, we use rwlock for
148 * node lock and isc_refcount for node references. In this case, we don't have
149 * to protect the access to the counters by locks.
150 * Otherwise, we simply use ordinary mutex lock for node locking, and use
151 * simple integers as reference counters which is protected by the lock.
152 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153 * NODE_UNLOCK. In some other cases, however, we need to protect reference
154 * counters first and then protect other parts of a node as read-only data.
155 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156 * provided for these special cases. When we can use the efficient backend
157 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159 * section including the access to the reference counter.
160 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161 * section is also protected by NODE_STRONGLOCK().
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
166 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t) RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
172 #define NODE_STRONGLOCK(l) ((void)0)
173 #define NODE_STRONGUNLOCK(l) ((void)0)
174 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
178 typedef isc_mutex_t nodelock_t;
180 #define NODE_INITLOCK(l) isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
182 #define NODE_LOCK(l, t) LOCK(l)
183 #define NODE_UNLOCK(l, t) UNLOCK(l)
184 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
186 #define NODE_STRONGLOCK(l) LOCK(l)
187 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t) ((void)0)
189 #define NODE_WEAKUNLOCK(l, t) ((void)0)
190 #define NODE_WEAKDOWNGRADE(l) ((void)0)
194 * Whether to rate-limit updating the LRU to avoid possible thread contention.
195 * Our performance measurement has shown the cost is marginal, so it's defined
196 * to be 0 by default either with or without threads.
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 * Allow clients with a virtual time of up to 5 minutes in the past to see
204 * records that would have otherwise have expired.
206 #define RBTDB_VIRTUAL 300
212 dns_rdatatype_t type;
215 typedef struct acachectl acachectl_t;
217 typedef struct rdatasetheader {
219 * Locked by the owning node's lock.
221 rbtdb_serial_t serial;
223 rbtdb_rdatatype_t type;
224 isc_uint16_t attributes;
226 struct noqname *noqname;
227 struct noqname *closest;
229 * We don't use the LIST macros, because the LIST structure has
230 * both head and tail pointers, and is doubly linked.
233 struct rdatasetheader *next;
235 * If this is the top header for an rdataset, 'next' points
236 * to the top header for the next rdataset (i.e., the next type).
237 * Otherwise, it points up to the header whose down pointer points
241 struct rdatasetheader *down;
243 * Points to the header for the next older version of
249 * Monotonously increased every time this rdataset is bound so that
250 * it is used as the base of the starting point in DNS responses
251 * when the "cyclic" rrset-order is required. Since the ordering
252 * should not be so crucial, no lock is set for the counter for
253 * performance reasons.
256 acachectl_t *additional_auth;
257 acachectl_t *additional_glue;
260 isc_stdtime_t last_used;
261 ISC_LINK(struct rdatasetheader) link;
263 unsigned int heap_index;
265 * Used for TTL-based cache cleaning.
267 isc_stdtime_t resign;
270 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
273 #define RDATASET_ATTR_NONEXISTENT 0x0001
274 #define RDATASET_ATTR_STALE 0x0002
275 #define RDATASET_ATTR_IGNORE 0x0004
276 #define RDATASET_ATTR_RETAIN 0x0008
277 #define RDATASET_ATTR_NXDOMAIN 0x0010
278 #define RDATASET_ATTR_RESIGN 0x0020
279 #define RDATASET_ATTR_STATCOUNT 0x0040
280 #define RDATASET_ATTR_OPTOUT 0x0080
281 #define RDATASET_ATTR_NEGATIVE 0x0100
283 typedef struct acache_cbarg {
284 dns_rdatasetadditional_t type;
288 rdatasetheader_t *header;
292 dns_acacheentry_t *entry;
293 acache_cbarg_t *cbarg;
298 * When the cache will pre-expire data (due to memory low or other
299 * situations) before the rdataset's TTL has expired, it MUST
300 * respect the RETAIN bit and not expire the data until its TTL is
304 #undef IGNORE /* WIN32 winbase.h defines this. */
306 #define EXISTS(header) \
307 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
308 #define NONEXISTENT(header) \
309 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
310 #define IGNORE(header) \
311 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
312 #define RETAIN(header) \
313 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
314 #define NXDOMAIN(header) \
315 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
316 #define RESIGN(header) \
317 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
318 #define OPTOUT(header) \
319 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define NEGATIVE(header) \
321 (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
323 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
326 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
327 * There is a tradeoff issue about configuring this value: if this is too
328 * small, it may cause heavier contention between threads; if this is too large,
329 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
330 * The default value should work well for most environments, but this can
331 * also be configurable at compilation time via the
332 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
333 * 1 due to the assumption of overmem_purge().
335 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
336 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
337 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
342 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
343 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
347 /* Protected in the refcount routines. */
348 isc_refcount_t references;
349 /* Locked by lock. */
350 isc_boolean_t exiting;
353 typedef struct rbtdb_changed {
354 dns_rbtnode_t * node;
356 ISC_LINK(struct rbtdb_changed) link;
359 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
367 typedef struct rbtdb_version {
369 rbtdb_serial_t serial;
371 * Protected in the refcount routines.
372 * XXXJT: should we change the lock policy based on the refcount
375 isc_refcount_t references;
376 /* Locked by database lock. */
377 isc_boolean_t writer;
378 isc_boolean_t commit_ok;
379 rbtdb_changedlist_t changed_list;
380 rdatasetheaderlist_t resigned_list;
381 ISC_LINK(struct rbtdb_version) link;
382 dns_db_secure_t secure;
383 isc_boolean_t havensec3;
384 /* NSEC3 parameters */
387 isc_uint16_t iterations;
388 isc_uint8_t salt_length;
389 unsigned char salt[DNS_NSEC3_SALTSIZE];
392 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
397 #if DNS_RBTDB_USERWLOCK
402 isc_rwlock_t tree_lock;
403 unsigned int node_lock_count;
404 rbtdb_nodelock_t * node_locks;
405 dns_rbtnode_t * origin_node;
406 dns_stats_t * rrsetstats; /* cache DB only */
407 /* Locked by lock. */
409 isc_refcount_t references;
410 unsigned int attributes;
411 rbtdb_serial_t current_serial;
412 rbtdb_serial_t least_serial;
413 rbtdb_serial_t next_serial;
414 rbtdb_version_t * current_version;
415 rbtdb_version_t * future_version;
416 rbtdb_versionlist_t open_versions;
418 dns_dbnode_t *soanode;
419 dns_dbnode_t *nsnode;
422 * This is a linked list used to implement the LRU cache. There will
423 * be node_lock_count linked lists here. Nodes in bucket 1 will be
424 * placed on the linked list rdatasets[1].
426 rdatasetheaderlist_t *rdatasets;
429 * Temporary storage for stale cache nodes and dynamically deleted
430 * nodes that await being cleaned up.
432 rbtnodelist_t *deadnodes;
435 * Heaps. Each of these is used for TTL based expiry.
439 /* Locked by tree_lock. */
444 unsigned int quantum;
447 #define RBTDB_ATTR_LOADED 0x01
448 #define RBTDB_ATTR_LOADING 0x02
455 rbtdb_version_t * rbtversion;
456 rbtdb_serial_t serial;
457 unsigned int options;
458 dns_rbtnodechain_t chain;
459 isc_boolean_t copy_name;
460 isc_boolean_t need_cleanup;
462 dns_rbtnode_t * zonecut;
463 rdatasetheader_t * zonecut_rdataset;
464 rdatasetheader_t * zonecut_sigrdataset;
465 dns_fixedname_t zonecut_name;
477 static void rdataset_disassociate(dns_rdataset_t *rdataset);
478 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
479 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
480 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
481 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
482 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
483 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
486 dns_rdataset_t *negsig);
487 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
490 dns_rdataset_t *negsig);
491 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
492 dns_rdatasetadditional_t type,
493 dns_rdatatype_t qtype,
494 dns_acache_t *acache,
497 dns_dbversion_t **versionp,
498 dns_dbnode_t **nodep,
502 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
503 dns_rdatasetadditional_t type,
504 dns_rdatatype_t qtype,
505 dns_acache_t *acache,
508 dns_dbversion_t *version,
511 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
512 dns_rdataset_t *rdataset,
513 dns_rdatasetadditional_t type,
514 dns_rdatatype_t qtype);
515 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
517 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
519 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
520 isc_boolean_t tree_locked);
521 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
522 isc_stdtime_t now, isc_boolean_t tree_locked);
523 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
524 rdatasetheader_t *newheader);
525 static void prune_tree(isc_task_t *task, isc_event_t *event);
526 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
527 static void rdataset_expire(dns_rdataset_t *rdataset);
529 static dns_rdatasetmethods_t rdataset_methods = {
530 rdataset_disassociate,
540 rdataset_getadditional,
541 rdataset_setadditional,
542 rdataset_putadditional,
547 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
548 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
549 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
550 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
551 dns_rdataset_t *rdataset);
553 static dns_rdatasetitermethods_t rdatasetiter_methods = {
554 rdatasetiter_destroy,
560 typedef struct rbtdb_rdatasetiter {
561 dns_rdatasetiter_t common;
562 rdatasetheader_t * current;
563 } rbtdb_rdatasetiter_t;
565 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
566 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
567 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
568 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
570 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
571 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
572 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
573 dns_dbnode_t **nodep,
575 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
576 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
579 static dns_dbiteratormethods_t dbiterator_methods = {
591 #define DELETION_BATCH_MAX 64
594 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
596 typedef struct rbtdb_dbiterator {
597 dns_dbiterator_t common;
598 isc_boolean_t paused;
599 isc_boolean_t new_origin;
600 isc_rwlocktype_t tree_locked;
602 dns_fixedname_t name;
603 dns_fixedname_t origin;
604 dns_rbtnodechain_t chain;
605 dns_rbtnodechain_t nsec3chain;
606 dns_rbtnodechain_t *current;
608 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
610 isc_boolean_t nsec3only;
611 isc_boolean_t nonsec3;
612 } rbtdb_dbiterator_t;
615 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
616 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
618 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
620 static void overmem(dns_db_t *db, isc_boolean_t overmem);
621 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
622 isc_boolean_t *nsec3createflag);
625 * 'init_count' is used to initialize 'newheader->count' which inturn
626 * is used to determine where in the cycle rrset-order cyclic starts.
627 * We don't lock this as we don't care about simultaneous updates.
630 * Both init_count and header->count can be ISC_UINT32_MAX.
631 * The count on the returned rdataset however can't be as
632 * that indicates that the database does not implement cyclic
635 static unsigned int init_count;
640 * If a routine is going to lock more than one lock in this module, then
641 * the locking must be done in the following order:
645 * Node Lock (Only one from the set may be locked at one time by
650 * Failure to follow this hierarchy can result in deadlock.
656 * For zone databases the node for the origin of the zone MUST NOT be deleted.
665 attach(dns_db_t *source, dns_db_t **targetp) {
666 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
668 REQUIRE(VALID_RBTDB(rbtdb));
670 isc_refcount_increment(&rbtdb->references, NULL);
676 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
677 dns_rbtdb_t *rbtdb = event->ev_arg;
681 free_rbtdb(rbtdb, ISC_TRUE, event);
685 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
686 isc_boolean_t increment)
688 dns_rdatastatstype_t statattributes = 0;
689 dns_rdatastatstype_t base = 0;
690 dns_rdatastatstype_t type;
692 /* At the moment we count statistics only for cache DB */
693 INSIST(IS_CACHE(rbtdb));
695 if (NXDOMAIN(header))
696 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
697 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
698 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
699 base = RBTDB_RDATATYPE_EXT(header->type);
701 base = RBTDB_RDATATYPE_BASE(header->type);
703 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
705 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
707 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
711 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
716 oldttl = header->rdh_ttl;
717 header->rdh_ttl = newttl;
719 if (!IS_CACHE(rbtdb))
723 * It's possible the rbtdb is not a cache. If this is the case,
724 * we will not have a heap, and we move on. If we do, though,
725 * we might need to adjust things.
727 if (header->heap_index == 0 || newttl == oldttl)
729 idx = header->node->locknum;
730 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
732 heap = rbtdb->heaps[idx];
735 isc_heap_increased(heap, header->heap_index);
737 isc_heap_decreased(heap, header->heap_index);
741 * These functions allow the heap code to rank the priority of each
742 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
745 ttl_sooner(void *v1, void *v2) {
746 rdatasetheader_t *h1 = v1;
747 rdatasetheader_t *h2 = v2;
749 if (h1->rdh_ttl < h2->rdh_ttl)
755 resign_sooner(void *v1, void *v2) {
756 rdatasetheader_t *h1 = v1;
757 rdatasetheader_t *h2 = v2;
759 if (h1->resign < h2->resign)
765 * This function sets the heap index into the header.
768 set_index(void *what, unsigned int index) {
769 rdatasetheader_t *h = what;
771 h->heap_index = index;
775 * Work out how many nodes can be deleted in the time between two
776 * requests to the nameserver. Smooth the resulting number and use it
777 * as a estimate for the number of nodes to be deleted in the next
781 adjust_quantum(unsigned int old, isc_time_t *start) {
782 unsigned int pps = dns_pps; /* packets per second */
783 unsigned int interval;
792 interval = 1000000 / pps; /* interval in usec */
795 usecs = isc_time_microdiff(&end, start);
798 * We were unable to measure the amount of time taken.
799 * Double the nodes deleted next time.
806 new = old * interval;
807 new /= (unsigned int)usecs;
814 new = (new + old * 3) / 4;
816 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
817 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
823 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
825 isc_ondestroy_t ondest;
827 char buf[DNS_NAME_FORMATSIZE];
830 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
831 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
833 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
834 REQUIRE(rbtdb->future_version == NULL);
836 if (rbtdb->current_version != NULL) {
839 isc_refcount_decrement(&rbtdb->current_version->references,
842 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
843 isc_refcount_destroy(&rbtdb->current_version->references);
844 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
845 sizeof(rbtdb_version_t));
849 * We assume the number of remaining dead nodes is reasonably small;
850 * the overhead of unlinking all nodes here should be negligible.
852 for (i = 0; i < rbtdb->node_lock_count; i++) {
855 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
856 while (node != NULL) {
857 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
858 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
863 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
865 if (rbtdb->tree != NULL) {
866 isc_time_now(&start);
867 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
868 if (result == ISC_R_QUOTA) {
869 INSIST(rbtdb->task != NULL);
870 if (rbtdb->quantum != 0)
871 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
874 event = isc_event_allocate(rbtdb->common.mctx,
876 DNS_EVENT_FREESTORAGE,
879 sizeof(isc_event_t));
882 isc_task_send(rbtdb->task, &event);
885 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
888 if (rbtdb->nsec3 != NULL) {
889 isc_time_now(&start);
890 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
891 if (result == ISC_R_QUOTA) {
892 INSIST(rbtdb->task != NULL);
893 if (rbtdb->quantum != 0)
894 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
897 event = isc_event_allocate(rbtdb->common.mctx,
899 DNS_EVENT_FREESTORAGE,
902 sizeof(isc_event_t));
905 isc_task_send(rbtdb->task, &event);
908 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
912 isc_event_free(&event);
914 if (dns_name_dynamic(&rbtdb->common.origin))
915 dns_name_format(&rbtdb->common.origin, buf,
918 strcpy(buf, "<UNKNOWN>");
919 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
920 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
921 "done free_rbtdb(%s)", buf);
923 if (dns_name_dynamic(&rbtdb->common.origin))
924 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
925 for (i = 0; i < rbtdb->node_lock_count; i++) {
926 isc_refcount_destroy(&rbtdb->node_locks[i].references);
927 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
931 * Clean up LRU / re-signing order lists.
933 if (rbtdb->rdatasets != NULL) {
934 for (i = 0; i < rbtdb->node_lock_count; i++)
935 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
936 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
937 rbtdb->node_lock_count *
938 sizeof(rdatasetheaderlist_t));
941 * Clean up dead node buckets.
943 if (rbtdb->deadnodes != NULL) {
944 for (i = 0; i < rbtdb->node_lock_count; i++)
945 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
946 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
947 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
950 * Clean up heap objects.
952 if (rbtdb->heaps != NULL) {
953 for (i = 0; i < rbtdb->node_lock_count; i++)
954 isc_heap_destroy(&rbtdb->heaps[i]);
955 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
956 rbtdb->node_lock_count *
957 sizeof(isc_heap_t *));
960 if (rbtdb->rrsetstats != NULL)
961 dns_stats_detach(&rbtdb->rrsetstats);
963 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
964 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
965 isc_rwlock_destroy(&rbtdb->tree_lock);
966 isc_refcount_destroy(&rbtdb->references);
967 if (rbtdb->task != NULL)
968 isc_task_detach(&rbtdb->task);
970 RBTDB_DESTROYLOCK(&rbtdb->lock);
971 rbtdb->common.magic = 0;
972 rbtdb->common.impmagic = 0;
973 ondest = rbtdb->common.ondest;
974 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
975 isc_ondestroy_notify(&ondest, rbtdb);
979 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
980 isc_boolean_t want_free = ISC_FALSE;
982 unsigned int inactive = 0;
984 /* XXX check for open versions here */
986 if (rbtdb->soanode != NULL)
987 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
988 if (rbtdb->nsnode != NULL)
989 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
992 * Even though there are no external direct references, there still
993 * may be nodes in use.
995 for (i = 0; i < rbtdb->node_lock_count; i++) {
996 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
997 rbtdb->node_locks[i].exiting = ISC_TRUE;
998 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
999 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1005 if (inactive != 0) {
1006 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1007 rbtdb->active -= inactive;
1008 if (rbtdb->active == 0)
1009 want_free = ISC_TRUE;
1010 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1012 char buf[DNS_NAME_FORMATSIZE];
1013 if (dns_name_dynamic(&rbtdb->common.origin))
1014 dns_name_format(&rbtdb->common.origin, buf,
1017 strcpy(buf, "<UNKNOWN>");
1018 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1019 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1020 "calling free_rbtdb(%s)", buf);
1021 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1027 detach(dns_db_t **dbp) {
1028 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1031 REQUIRE(VALID_RBTDB(rbtdb));
1033 isc_refcount_decrement(&rbtdb->references, &refs);
1036 maybe_free_rbtdb(rbtdb);
1042 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1043 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1044 rbtdb_version_t *version;
1047 REQUIRE(VALID_RBTDB(rbtdb));
1049 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1050 version = rbtdb->current_version;
1051 isc_refcount_increment(&version->references, &refs);
1052 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1054 *versionp = (dns_dbversion_t *)version;
1057 static inline rbtdb_version_t *
1058 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1059 unsigned int references, isc_boolean_t writer)
1061 isc_result_t result;
1062 rbtdb_version_t *version;
1064 version = isc_mem_get(mctx, sizeof(*version));
1065 if (version == NULL)
1067 version->serial = serial;
1068 result = isc_refcount_init(&version->references, references);
1069 if (result != ISC_R_SUCCESS) {
1070 isc_mem_put(mctx, version, sizeof(*version));
1073 version->writer = writer;
1074 version->commit_ok = ISC_FALSE;
1075 ISC_LIST_INIT(version->changed_list);
1076 ISC_LIST_INIT(version->resigned_list);
1077 ISC_LINK_INIT(version, link);
1083 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1084 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1085 rbtdb_version_t *version;
1087 REQUIRE(VALID_RBTDB(rbtdb));
1088 REQUIRE(versionp != NULL && *versionp == NULL);
1089 REQUIRE(rbtdb->future_version == NULL);
1091 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1092 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1093 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1095 if (version != NULL) {
1096 version->commit_ok = ISC_TRUE;
1097 version->secure = rbtdb->current_version->secure;
1098 version->havensec3 = rbtdb->current_version->havensec3;
1099 if (version->havensec3) {
1100 version->flags = rbtdb->current_version->flags;
1101 version->iterations =
1102 rbtdb->current_version->iterations;
1103 version->hash = rbtdb->current_version->hash;
1104 version->salt_length =
1105 rbtdb->current_version->salt_length;
1106 memcpy(version->salt, rbtdb->current_version->salt,
1107 version->salt_length);
1110 version->iterations = 0;
1112 version->salt_length = 0;
1113 memset(version->salt, 0, sizeof(version->salt));
1115 rbtdb->next_serial++;
1116 rbtdb->future_version = version;
1118 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1120 if (version == NULL)
1121 return (ISC_R_NOMEMORY);
1123 *versionp = version;
1125 return (ISC_R_SUCCESS);
1129 attachversion(dns_db_t *db, dns_dbversion_t *source,
1130 dns_dbversion_t **targetp)
1132 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1133 rbtdb_version_t *rbtversion = source;
1136 REQUIRE(VALID_RBTDB(rbtdb));
1138 isc_refcount_increment(&rbtversion->references, &refs);
1141 *targetp = rbtversion;
1144 static rbtdb_changed_t *
1145 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1146 dns_rbtnode_t *node)
1148 rbtdb_changed_t *changed;
1152 * Caller must be holding the node lock if its reference must be
1153 * protected by the lock.
1156 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1158 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1160 REQUIRE(version->writer);
1162 if (changed != NULL) {
1163 dns_rbtnode_refincrement(node, &refs);
1165 changed->node = node;
1166 changed->dirty = ISC_FALSE;
1167 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1169 version->commit_ok = ISC_FALSE;
1171 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1177 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1182 unsigned char *raw; /* RDATASLAB */
1185 * The caller must be holding the corresponding node lock.
1191 raw = (unsigned char *)header + sizeof(*header);
1192 count = raw[0] * 256 + raw[1];
1195 * Sanity check: since an additional cache entry has a reference to
1196 * the original DB node (in the callback arg), there should be no
1197 * acache entries when the node can be freed.
1199 for (i = 0; i < count; i++)
1200 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1202 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1206 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1208 if (dns_name_dynamic(&(*noqname)->name))
1209 dns_name_free(&(*noqname)->name, mctx);
1210 if ((*noqname)->neg != NULL)
1211 isc_mem_put(mctx, (*noqname)->neg,
1212 dns_rdataslab_size((*noqname)->neg, 0));
1213 if ((*noqname)->negsig != NULL)
1214 isc_mem_put(mctx, (*noqname)->negsig,
1215 dns_rdataslab_size((*noqname)->negsig, 0));
1216 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1221 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1223 ISC_LINK_INIT(h, link);
1227 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1228 fprintf(stderr, "initialized header: %p\n", h);
1234 static inline rdatasetheader_t *
1235 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1237 rdatasetheader_t *h;
1239 h = isc_mem_get(mctx, sizeof(*h));
1244 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1245 fprintf(stderr, "allocated header: %p\n", h);
1247 init_rdataset(rbtdb, h);
1252 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1257 if (EXISTS(rdataset) &&
1258 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1259 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1262 idx = rdataset->node->locknum;
1263 if (ISC_LINK_LINKED(rdataset, link)) {
1264 INSIST(IS_CACHE(rbtdb));
1265 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1267 if (rdataset->heap_index != 0)
1268 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1269 rdataset->heap_index = 0;
1271 if (rdataset->noqname != NULL)
1272 free_noqname(mctx, &rdataset->noqname);
1273 if (rdataset->closest != NULL)
1274 free_noqname(mctx, &rdataset->closest);
1276 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1277 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1279 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1280 size = sizeof(*rdataset);
1282 size = dns_rdataslab_size((unsigned char *)rdataset,
1284 isc_mem_put(mctx, rdataset, size);
1288 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1289 rdatasetheader_t *header, *dcurrent;
1290 isc_boolean_t make_dirty = ISC_FALSE;
1293 * Caller must hold the node lock.
1297 * We set the IGNORE attribute on rdatasets with serial number
1298 * 'serial'. When the reference count goes to zero, these rdatasets
1299 * will be cleaned up; until that time, they will be ignored.
1301 for (header = node->data; header != NULL; header = header->next) {
1302 if (header->serial == serial) {
1303 header->attributes |= RDATASET_ATTR_IGNORE;
1304 make_dirty = ISC_TRUE;
1306 for (dcurrent = header->down;
1308 dcurrent = dcurrent->down) {
1309 if (dcurrent->serial == serial) {
1310 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1311 make_dirty = ISC_TRUE;
1320 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1322 rdatasetheader_t *d, *down_next;
1324 for (d = top->down; d != NULL; d = down_next) {
1325 down_next = d->down;
1326 free_rdataset(rbtdb, mctx, d);
1332 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1333 rdatasetheader_t *current, *top_prev, *top_next;
1334 isc_mem_t *mctx = rbtdb->common.mctx;
1337 * Caller must be holding the node lock.
1341 for (current = node->data; current != NULL; current = top_next) {
1342 top_next = current->next;
1343 clean_stale_headers(rbtdb, mctx, current);
1345 * If current is nonexistent or stale, we can clean it up.
1347 if ((current->attributes &
1348 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1349 if (top_prev != NULL)
1350 top_prev->next = current->next;
1352 node->data = current->next;
1353 free_rdataset(rbtdb, mctx, current);
1361 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1362 rbtdb_serial_t least_serial)
1364 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1365 rdatasetheader_t *top_prev, *top_next;
1366 isc_mem_t *mctx = rbtdb->common.mctx;
1367 isc_boolean_t still_dirty = ISC_FALSE;
1370 * Caller must be holding the node lock.
1372 REQUIRE(least_serial != 0);
1375 for (current = node->data; current != NULL; current = top_next) {
1376 top_next = current->next;
1379 * First, we clean up any instances of multiple rdatasets
1380 * with the same serial number, or that have the IGNORE
1384 for (dcurrent = current->down;
1386 dcurrent = down_next) {
1387 down_next = dcurrent->down;
1388 INSIST(dcurrent->serial <= dparent->serial);
1389 if (dcurrent->serial == dparent->serial ||
1391 if (down_next != NULL)
1392 down_next->next = dparent;
1393 dparent->down = down_next;
1394 free_rdataset(rbtdb, mctx, dcurrent);
1400 * We've now eliminated all IGNORE datasets with the possible
1401 * exception of current, which we now check.
1403 if (IGNORE(current)) {
1404 down_next = current->down;
1405 if (down_next == NULL) {
1406 if (top_prev != NULL)
1407 top_prev->next = current->next;
1409 node->data = current->next;
1410 free_rdataset(rbtdb, mctx, current);
1412 * current no longer exists, so we can
1413 * just continue with the loop.
1418 * Pull up current->down, making it the new
1421 if (top_prev != NULL)
1422 top_prev->next = down_next;
1424 node->data = down_next;
1425 down_next->next = top_next;
1426 free_rdataset(rbtdb, mctx, current);
1427 current = down_next;
1432 * We now try to find the first down node less than the
1436 for (dcurrent = current->down;
1438 dcurrent = down_next) {
1439 down_next = dcurrent->down;
1440 if (dcurrent->serial < least_serial)
1446 * If there is a such an rdataset, delete it and any older
1449 if (dcurrent != NULL) {
1451 down_next = dcurrent->down;
1452 INSIST(dcurrent->serial <= least_serial);
1453 free_rdataset(rbtdb, mctx, dcurrent);
1454 dcurrent = down_next;
1455 } while (dcurrent != NULL);
1456 dparent->down = NULL;
1460 * Note. The serial number of 'current' might be less than
1461 * least_serial too, but we cannot delete it because it is
1462 * the most recent version, unless it is a NONEXISTENT
1465 if (current->down != NULL) {
1466 still_dirty = ISC_TRUE;
1470 * If this is a NONEXISTENT rdataset, we can delete it.
1472 if (NONEXISTENT(current)) {
1473 if (top_prev != NULL)
1474 top_prev->next = current->next;
1476 node->data = current->next;
1477 free_rdataset(rbtdb, mctx, current);
1487 * Clean up dead nodes. These are nodes which have no references, and
1488 * have no data. They are dead but we could not or chose not to delete
1489 * them when we deleted all the data at that node because we did not want
1490 * to wait for the tree write lock.
1492 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1495 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1496 dns_rbtnode_t *node;
1497 isc_result_t result;
1498 int count = 10; /* XXXJT: should be adjustable */
1500 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1501 while (node != NULL && count > 0) {
1502 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1505 * Since we're holding a tree write lock, it should be
1506 * impossible for this node to be referenced by others.
1508 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1509 node->data == NULL);
1511 INSIST(!ISC_LINK_LINKED(node, deadlink));
1513 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1516 result = dns_rbt_deletenode(rbtdb->tree, node,
1518 if (result != ISC_R_SUCCESS)
1519 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1520 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1521 "cleanup_dead_nodes: "
1522 "dns_rbt_deletenode: %s",
1523 isc_result_totext(result));
1524 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1530 * Caller must be holding the node lock if its reference must be protected
1534 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1535 unsigned int lockrefs, noderefs;
1536 isc_refcount_t *lockref;
1538 dns_rbtnode_refincrement0(node, &noderefs);
1539 if (noderefs == 1) { /* this is the first reference to the node */
1540 lockref = &rbtdb->node_locks[node->locknum].references;
1541 isc_refcount_increment0(lockref, &lockrefs);
1542 INSIST(lockrefs != 0);
1544 INSIST(noderefs != 0);
1548 * This function is assumed to be called when a node is newly referenced
1549 * and can be in the deadnode list. In that case the node must be retrieved
1550 * from the list because it is going to be used. In addition, if the caller
1551 * happens to hold a write lock on the tree, it's a good chance to purge dead
1553 * Note: while a new reference is gained in multiple places, there are only very
1554 * few cases where the node can be in the deadnode list (only empty nodes can
1555 * have been added to the list).
1558 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1559 isc_rwlocktype_t treelocktype)
1561 isc_boolean_t need_relock = ISC_FALSE;
1563 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1564 new_reference(rbtdb, node);
1566 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1567 isc_rwlocktype_read);
1568 if (ISC_LINK_LINKED(node, deadlink))
1569 need_relock = ISC_TRUE;
1570 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1571 treelocktype == isc_rwlocktype_write)
1572 need_relock = ISC_TRUE;
1573 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1574 isc_rwlocktype_read);
1576 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1577 isc_rwlocktype_write);
1578 if (ISC_LINK_LINKED(node, deadlink))
1579 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1581 if (treelocktype == isc_rwlocktype_write)
1582 cleanup_dead_nodes(rbtdb, node->locknum);
1583 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1584 isc_rwlocktype_write);
1587 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1591 * Caller must be holding the node lock; either the "strong", read or write
1592 * lock. Note that the lock must be held even when node references are
1593 * atomically modified; in that case the decrement operation itself does not
1594 * have to be protected, but we must avoid a race condition where multiple
1595 * threads are decreasing the reference to zero simultaneously and at least
1596 * one of them is going to free the node.
1597 * This function returns ISC_TRUE if and only if the node reference decreases
1600 static isc_boolean_t
1601 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1602 rbtdb_serial_t least_serial,
1603 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1604 isc_boolean_t pruning)
1606 isc_result_t result;
1607 isc_boolean_t write_locked;
1608 rbtdb_nodelock_t *nodelock;
1609 unsigned int refs, nrefs;
1610 int bucket = node->locknum;
1611 isc_boolean_t no_reference;
1613 nodelock = &rbtdb->node_locks[bucket];
1615 /* Handle easy and typical case first. */
1616 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1617 dns_rbtnode_refdecrement(node, &nrefs);
1618 INSIST((int)nrefs >= 0);
1620 isc_refcount_decrement(&nodelock->references, &refs);
1621 INSIST((int)refs >= 0);
1623 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1626 /* Upgrade the lock? */
1627 if (nlock == isc_rwlocktype_read) {
1628 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1629 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1631 dns_rbtnode_refdecrement(node, &nrefs);
1632 INSIST((int)nrefs >= 0);
1634 /* Restore the lock? */
1635 if (nlock == isc_rwlocktype_read)
1636 NODE_WEAKDOWNGRADE(&nodelock->lock);
1640 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1641 if (IS_CACHE(rbtdb))
1642 clean_cache_node(rbtdb, node);
1644 if (least_serial == 0) {
1646 * Caller doesn't know the least serial.
1649 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1650 least_serial = rbtdb->least_serial;
1651 RBTDB_UNLOCK(&rbtdb->lock,
1652 isc_rwlocktype_read);
1654 clean_zone_node(rbtdb, node, least_serial);
1658 isc_refcount_decrement(&nodelock->references, &refs);
1659 INSIST((int)refs >= 0);
1662 * XXXDCL should this only be done for cache zones?
1664 if (node->data != NULL || node->down != NULL) {
1665 /* Restore the lock? */
1666 if (nlock == isc_rwlocktype_read)
1667 NODE_WEAKDOWNGRADE(&nodelock->lock);
1672 * Attempt to switch to a write lock on the tree. If this fails,
1673 * we will add this node to a linked list of nodes in this locking
1674 * bucket which we will free later.
1676 if (tlock != isc_rwlocktype_write) {
1678 * Locking hierarchy notwithstanding, we don't need to free
1679 * the node lock before acquiring the tree write lock because
1680 * we only do a trylock.
1682 if (tlock == isc_rwlocktype_read)
1683 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1685 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1686 isc_rwlocktype_write);
1687 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1688 result == ISC_R_LOCKBUSY);
1690 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1692 write_locked = ISC_TRUE;
1694 no_reference = ISC_TRUE;
1695 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1697 * We can now delete the node if the reference counter is
1698 * zero. This should be typically the case, but a different
1699 * thread may still gain a (new) reference just before the
1700 * current thread locks the tree (e.g., in findnode()).
1704 * If this node is the only one in the level it's in, deleting
1705 * this node may recursively make its parent the only node in
1706 * the parent level; if so, and if no one is currently using
1707 * the parent node, this is almost the only opportunity to
1708 * clean it up. But the recursive cleanup is not that trivial
1709 * since the child and parent may be in different lock buckets,
1710 * which would cause a lock order reversal problem. To avoid
1711 * the trouble, we'll dispatch a separate event for batch
1712 * cleaning. We need to check whether we're deleting the node
1713 * as a result of pruning to avoid infinite dispatching.
1714 * Note: pruning happens only when a task has been set for the
1715 * rbtdb. If the user of the rbtdb chooses not to set a task,
1716 * it's their responsibility to purge stale leaves (e.g. by
1717 * periodic walk-through).
1719 if (!pruning && node->parent != NULL &&
1720 node->parent->down == node && node->left == NULL &&
1721 node->right == NULL && rbtdb->task != NULL) {
1725 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1728 sizeof(isc_event_t));
1730 new_reference(rbtdb, node);
1732 attach((dns_db_t *)rbtdb, &db);
1734 isc_task_send(rbtdb->task, &ev);
1735 no_reference = ISC_FALSE;
1738 * XXX: this is a weird situation. We could
1739 * ignore this error case, but then the stale
1740 * node will unlikely be purged except via a
1741 * rare condition such as manual cleanup. So
1742 * we queue it in the deadnodes list, hoping
1743 * the memory shortage is temporary and the node
1744 * will be deleted later.
1746 isc_log_write(dns_lctx,
1747 DNS_LOGCATEGORY_DATABASE,
1748 DNS_LOGMODULE_CACHE,
1750 "decrement_reference: failed to "
1751 "allocate pruning event");
1752 INSIST(!ISC_LINK_LINKED(node, deadlink));
1753 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1757 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1758 char printname[DNS_NAME_FORMATSIZE];
1760 isc_log_write(dns_lctx,
1761 DNS_LOGCATEGORY_DATABASE,
1762 DNS_LOGMODULE_CACHE,
1764 "decrement_reference: "
1765 "delete from rbt: %p %s",
1767 dns_rbt_formatnodename(node,
1769 sizeof(printname)));
1772 INSIST(!ISC_LINK_LINKED(node, deadlink));
1774 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1777 result = dns_rbt_deletenode(rbtdb->tree, node,
1779 if (result != ISC_R_SUCCESS) {
1780 isc_log_write(dns_lctx,
1781 DNS_LOGCATEGORY_DATABASE,
1782 DNS_LOGMODULE_CACHE,
1784 "decrement_reference: "
1785 "dns_rbt_deletenode: %s",
1786 isc_result_totext(result));
1789 } else if (dns_rbtnode_refcurrent(node) == 0) {
1790 INSIST(!ISC_LINK_LINKED(node, deadlink));
1791 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1793 no_reference = ISC_FALSE;
1795 /* Restore the lock? */
1796 if (nlock == isc_rwlocktype_read)
1797 NODE_WEAKDOWNGRADE(&nodelock->lock);
1800 * Relock a read lock, or unlock the write lock if no lock was held.
1802 if (tlock == isc_rwlocktype_none)
1804 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1806 if (tlock == isc_rwlocktype_read)
1808 isc_rwlock_downgrade(&rbtdb->tree_lock);
1810 return (no_reference);
1814 * Prune the tree by recursively cleaning-up single leaves. In the worst
1815 * case, the number of iteration is the number of tree levels, which is at
1816 * most the maximum number of domain name labels, i.e, 127. In practice, this
1817 * should be much smaller (only a few times), and even the worst case would be
1818 * acceptable for a single event.
1821 prune_tree(isc_task_t *task, isc_event_t *event) {
1822 dns_rbtdb_t *rbtdb = event->ev_sender;
1823 dns_rbtnode_t *node = event->ev_arg;
1824 dns_rbtnode_t *parent;
1825 unsigned int locknum;
1829 isc_event_free(&event);
1831 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1832 locknum = node->locknum;
1833 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1835 parent = node->parent;
1836 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1837 isc_rwlocktype_write, ISC_TRUE);
1839 if (parent != NULL && parent->down == NULL) {
1841 * node was the only down child of the parent and has
1842 * just been removed. We'll then need to examine the
1843 * parent. Keep the lock if possible; otherwise,
1844 * release the old lock and acquire one for the parent.
1846 if (parent->locknum != locknum) {
1847 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1848 isc_rwlocktype_write);
1849 locknum = parent->locknum;
1850 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1851 isc_rwlocktype_write);
1855 * We need to gain a reference to the node before
1856 * decrementing it in the next iteration. In addition,
1857 * if the node is in the dead-nodes list, extract it
1858 * from the list beforehand as we do in
1859 * reactivate_node().
1861 new_reference(rbtdb, parent);
1862 if (ISC_LINK_LINKED(parent, deadlink)) {
1863 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1870 } while (node != NULL);
1871 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1872 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1874 detach((dns_db_t **)&rbtdb);
1878 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1879 rbtdb_changedlist_t *cleanup_list)
1882 * Caller must be holding the database lock.
1885 rbtdb->least_serial = version->serial;
1886 *cleanup_list = version->changed_list;
1887 ISC_LIST_INIT(version->changed_list);
1891 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1892 rbtdb_changed_t *changed, *next_changed;
1895 * If the changed record is dirty, then
1896 * an update created multiple versions of
1897 * a given rdataset. We keep this list
1898 * until we're the least open version, at
1899 * which point it's safe to get rid of any
1902 * If the changed record isn't dirty, then
1903 * we don't need it anymore since we're
1904 * committing and not rolling back.
1906 * The caller must be holding the database lock.
1908 for (changed = HEAD(version->changed_list);
1910 changed = next_changed) {
1911 next_changed = NEXT(changed, link);
1912 if (!changed->dirty) {
1913 UNLINK(version->changed_list,
1915 APPEND(*cleanup_list,
1922 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1923 dns_rdataset_t keyset;
1924 dns_rdataset_t nsecset, signsecset;
1925 dns_rdata_t rdata = DNS_RDATA_INIT;
1926 isc_boolean_t haszonekey = ISC_FALSE;
1927 isc_boolean_t hasnsec = ISC_FALSE;
1928 isc_boolean_t hasoptbit = ISC_FALSE;
1929 isc_boolean_t nsec3createflag = ISC_FALSE;
1930 isc_result_t result;
1932 dns_rdataset_init(&keyset);
1933 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1934 0, 0, &keyset, NULL);
1935 if (result == ISC_R_SUCCESS) {
1936 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1937 result = dns_rdataset_first(&keyset);
1938 while (result == ISC_R_SUCCESS) {
1939 dns_rdataset_current(&keyset, &keyrdata);
1940 if (dns_zonekey_iszonekey(&keyrdata)) {
1941 haszonekey = ISC_TRUE;
1944 result = dns_rdataset_next(&keyset);
1946 dns_rdataset_disassociate(&keyset);
1949 version->secure = dns_db_insecure;
1950 version->havensec3 = ISC_FALSE;
1954 dns_rdataset_init(&nsecset);
1955 dns_rdataset_init(&signsecset);
1956 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1957 0, 0, &nsecset, &signsecset);
1958 if (result == ISC_R_SUCCESS) {
1959 if (dns_rdataset_isassociated(&signsecset)) {
1961 result = dns_rdataset_first(&nsecset);
1962 if (result == ISC_R_SUCCESS) {
1963 dns_rdataset_current(&nsecset, &rdata);
1964 hasoptbit = dns_nsec_typepresent(&rdata,
1967 dns_rdataset_disassociate(&signsecset);
1969 dns_rdataset_disassociate(&nsecset);
1972 setnsec3parameters(db, version, &nsec3createflag);
1975 * Do we have a valid NSEC/NSEC3 chain?
1977 if (version->havensec3 || (hasnsec && !hasoptbit))
1978 version->secure = dns_db_secure;
1980 * Do we have a NSEC/NSEC3 chain under creation?
1982 else if (hasoptbit || nsec3createflag)
1983 version->secure = dns_db_partial;
1985 version->secure = dns_db_insecure;
1989 * Walk the origin node looking for NSEC3PARAM records.
1990 * Cache the nsec3 parameters.
1993 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1994 isc_boolean_t *nsec3createflag)
1996 dns_rbtnode_t *node;
1997 dns_rdata_nsec3param_t nsec3param;
1998 dns_rdata_t rdata = DNS_RDATA_INIT;
1999 isc_region_t region;
2000 isc_result_t result;
2001 rdatasetheader_t *header, *header_next;
2002 unsigned char *raw; /* RDATASLAB */
2003 unsigned int count, length;
2004 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2006 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2007 version->havensec3 = ISC_FALSE;
2008 node = rbtdb->origin_node;
2009 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2010 isc_rwlocktype_read);
2011 for (header = node->data;
2013 header = header_next) {
2014 header_next = header->next;
2016 if (header->serial <= version->serial &&
2018 if (NONEXISTENT(header))
2022 header = header->down;
2023 } while (header != NULL);
2025 if (header != NULL &&
2026 header->type == dns_rdatatype_nsec3param) {
2028 * Find A NSEC3PARAM with a supported algorithm.
2030 raw = (unsigned char *)header + sizeof(*header);
2031 count = raw[0] * 256 + raw[1]; /* count */
2032 #if DNS_RDATASET_FIXED
2033 raw += count * 4 + 2;
2037 while (count-- > 0U) {
2038 length = raw[0] * 256 + raw[1];
2039 #if DNS_RDATASET_FIXED
2045 region.length = length;
2047 dns_rdata_fromregion(&rdata,
2048 rbtdb->common.rdclass,
2049 dns_rdatatype_nsec3param,
2051 result = dns_rdata_tostruct(&rdata,
2054 INSIST(result == ISC_R_SUCCESS);
2055 dns_rdata_reset(&rdata);
2057 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2058 !dns_nsec3_supportedhash(nsec3param.hash))
2061 #ifdef RFC5155_STRICT
2062 if (nsec3param.flags != 0)
2065 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2067 *nsec3createflag = ISC_TRUE;
2068 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2073 memcpy(version->salt, nsec3param.salt,
2074 nsec3param.salt_length);
2075 version->hash = nsec3param.hash;
2076 version->salt_length = nsec3param.salt_length;
2077 version->iterations = nsec3param.iterations;
2078 version->flags = nsec3param.flags;
2079 version->havensec3 = ISC_TRUE;
2081 * Look for a better algorithm than the
2082 * unknown test algorithm.
2084 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2090 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2091 isc_rwlocktype_read);
2092 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2096 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2097 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2098 rbtdb_version_t *version, *cleanup_version, *least_greater;
2099 isc_boolean_t rollback = ISC_FALSE;
2100 rbtdb_changedlist_t cleanup_list;
2101 rdatasetheaderlist_t resigned_list;
2102 rbtdb_changed_t *changed, *next_changed;
2103 rbtdb_serial_t serial, least_serial;
2104 dns_rbtnode_t *rbtnode;
2106 rdatasetheader_t *header;
2107 isc_boolean_t writer;
2109 REQUIRE(VALID_RBTDB(rbtdb));
2110 version = (rbtdb_version_t *)*versionp;
2112 cleanup_version = NULL;
2113 ISC_LIST_INIT(cleanup_list);
2114 ISC_LIST_INIT(resigned_list);
2116 isc_refcount_decrement(&version->references, &refs);
2117 if (refs > 0) { /* typical and easy case first */
2119 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2120 INSIST(!version->writer);
2121 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2126 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2127 serial = version->serial;
2128 writer = version->writer;
2129 if (version->writer) {
2132 rbtdb_version_t *cur_version;
2134 INSIST(version->commit_ok);
2135 INSIST(version == rbtdb->future_version);
2137 * The current version is going to be replaced.
2138 * Release the (likely last) reference to it from the
2139 * DB itself and unlink it from the open list.
2141 cur_version = rbtdb->current_version;
2142 isc_refcount_decrement(&cur_version->references,
2145 if (cur_version->serial == rbtdb->least_serial)
2146 INSIST(EMPTY(cur_version->changed_list));
2147 UNLINK(rbtdb->open_versions,
2150 if (EMPTY(rbtdb->open_versions)) {
2152 * We're going to become the least open
2155 make_least_version(rbtdb, version,
2159 * Some other open version is the
2160 * least version. We can't cleanup
2161 * records that were changed in this
2162 * version because the older versions
2163 * may still be in use by an open
2166 * We can, however, discard the
2167 * changed records for things that
2168 * we've added that didn't exist in
2171 cleanup_nondirty(version, &cleanup_list);
2174 * If the (soon to be former) current version
2175 * isn't being used by anyone, we can clean
2179 cleanup_version = cur_version;
2180 APPENDLIST(version->changed_list,
2181 cleanup_version->changed_list,
2185 * Become the current version.
2187 version->writer = ISC_FALSE;
2188 rbtdb->current_version = version;
2189 rbtdb->current_serial = version->serial;
2190 rbtdb->future_version = NULL;
2193 * Keep the current version in the open list, and
2194 * gain a reference for the DB itself (see the DB
2195 * creation function below). This must be the only
2196 * case where we need to increment the counter from
2197 * zero and need to use isc_refcount_increment0().
2199 isc_refcount_increment0(&version->references,
2201 INSIST(cur_ref == 1);
2202 PREPEND(rbtdb->open_versions,
2203 rbtdb->current_version, link);
2204 resigned_list = version->resigned_list;
2205 ISC_LIST_INIT(version->resigned_list);
2208 * We're rolling back this transaction.
2210 cleanup_list = version->changed_list;
2211 ISC_LIST_INIT(version->changed_list);
2212 resigned_list = version->resigned_list;
2213 ISC_LIST_INIT(version->resigned_list);
2214 rollback = ISC_TRUE;
2215 cleanup_version = version;
2216 rbtdb->future_version = NULL;
2219 if (version != rbtdb->current_version) {
2221 * There are no external or internal references
2222 * to this version and it can be cleaned up.
2224 cleanup_version = version;
2227 * Find the version with the least serial
2228 * number greater than ours.
2230 least_greater = PREV(version, link);
2231 if (least_greater == NULL)
2232 least_greater = rbtdb->current_version;
2234 INSIST(version->serial < least_greater->serial);
2236 * Is this the least open version?
2238 if (version->serial == rbtdb->least_serial) {
2240 * Yes. Install the new least open
2243 make_least_version(rbtdb,
2248 * Add any unexecuted cleanups to
2249 * those of the least greater version.
2251 APPENDLIST(least_greater->changed_list,
2252 version->changed_list,
2255 } else if (version->serial == rbtdb->least_serial)
2256 INSIST(EMPTY(version->changed_list));
2257 UNLINK(rbtdb->open_versions, version, link);
2259 least_serial = rbtdb->least_serial;
2260 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2263 * Update the zone's secure status.
2265 if (writer && commit && !IS_CACHE(rbtdb))
2266 iszonesecure(db, version, rbtdb->origin_node);
2268 if (cleanup_version != NULL) {
2269 INSIST(EMPTY(cleanup_version->changed_list));
2270 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2271 sizeof(*cleanup_version));
2275 * Commit/rollback re-signed headers.
2277 for (header = HEAD(resigned_list);
2279 header = HEAD(resigned_list)) {
2282 ISC_LIST_UNLINK(resigned_list, header, link);
2284 lock = &rbtdb->node_locks[header->node->locknum].lock;
2285 NODE_LOCK(lock, isc_rwlocktype_write);
2287 resign_insert(rbtdb, header->node->locknum, header);
2288 decrement_reference(rbtdb, header->node, least_serial,
2289 isc_rwlocktype_write, isc_rwlocktype_none,
2291 NODE_UNLOCK(lock, isc_rwlocktype_write);
2294 if (!EMPTY(cleanup_list)) {
2296 * We acquire a tree write lock here in order to make sure
2297 * that stale nodes will be removed in decrement_reference().
2298 * If we didn't have the lock, those nodes could miss the
2299 * chance to be removed until the server stops. The write lock
2300 * is expensive, but this event should be rare enough to justify
2303 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2304 for (changed = HEAD(cleanup_list);
2306 changed = next_changed) {
2309 next_changed = NEXT(changed, link);
2310 rbtnode = changed->node;
2311 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2313 NODE_LOCK(lock, isc_rwlocktype_write);
2315 * This is a good opportunity to purge any dead nodes,
2318 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2321 rollback_node(rbtnode, serial);
2322 decrement_reference(rbtdb, rbtnode, least_serial,
2323 isc_rwlocktype_write,
2324 isc_rwlocktype_write, ISC_FALSE);
2326 NODE_UNLOCK(lock, isc_rwlocktype_write);
2328 isc_mem_put(rbtdb->common.mctx, changed,
2331 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2339 * Add the necessary magic for the wildcard name 'name'
2340 * to be found in 'rbtdb'.
2342 * In order for wildcard matching to work correctly in
2343 * zone_find(), we must ensure that a node for the wildcarding
2344 * level exists in the database, and has its 'find_callback'
2345 * and 'wild' bits set.
2347 * E.g. if the wildcard name is "*.sub.example." then we
2348 * must ensure that "sub.example." exists and is marked as
2352 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2353 isc_result_t result;
2354 dns_name_t foundname;
2355 dns_offsets_t offsets;
2357 dns_rbtnode_t *node = NULL;
2359 dns_name_init(&foundname, offsets);
2360 n = dns_name_countlabels(name);
2363 dns_name_getlabelsequence(name, 1, n, &foundname);
2364 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2365 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2368 node->find_callback = 1;
2370 return (ISC_R_SUCCESS);
2374 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2375 isc_result_t result;
2376 dns_name_t foundname;
2377 dns_offsets_t offsets;
2378 unsigned int n, l, i;
2380 dns_name_init(&foundname, offsets);
2381 n = dns_name_countlabels(name);
2382 l = dns_name_countlabels(&rbtdb->common.origin);
2385 dns_rbtnode_t *node = NULL; /* dummy */
2386 dns_name_getlabelsequence(name, n - i, i, &foundname);
2387 if (dns_name_iswildcard(&foundname)) {
2388 result = add_wildcard_magic(rbtdb, &foundname);
2389 if (result != ISC_R_SUCCESS)
2391 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2393 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2399 return (ISC_R_SUCCESS);
2403 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2404 dns_dbnode_t **nodep)
2406 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2407 dns_rbtnode_t *node = NULL;
2408 dns_name_t nodename;
2409 isc_result_t result;
2410 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2412 REQUIRE(VALID_RBTDB(rbtdb));
2414 dns_name_init(&nodename, NULL);
2415 RWLOCK(&rbtdb->tree_lock, locktype);
2416 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2417 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2418 if (result != ISC_R_SUCCESS) {
2419 RWUNLOCK(&rbtdb->tree_lock, locktype);
2421 if (result == DNS_R_PARTIALMATCH)
2422 result = ISC_R_NOTFOUND;
2426 * It would be nice to try to upgrade the lock instead of
2427 * unlocking then relocking.
2429 locktype = isc_rwlocktype_write;
2430 RWLOCK(&rbtdb->tree_lock, locktype);
2432 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2433 if (result == ISC_R_SUCCESS) {
2434 dns_rbt_namefromnode(node, &nodename);
2435 #ifdef DNS_RBT_USEHASH
2436 node->locknum = node->hashval % rbtdb->node_lock_count;
2438 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2439 rbtdb->node_lock_count;
2442 add_empty_wildcards(rbtdb, name);
2444 if (dns_name_iswildcard(name)) {
2445 result = add_wildcard_magic(rbtdb, name);
2446 if (result != ISC_R_SUCCESS) {
2447 RWUNLOCK(&rbtdb->tree_lock, locktype);
2451 } else if (result != ISC_R_EXISTS) {
2452 RWUNLOCK(&rbtdb->tree_lock, locktype);
2456 reactivate_node(rbtdb, node, locktype);
2457 RWUNLOCK(&rbtdb->tree_lock, locktype);
2459 *nodep = (dns_dbnode_t *)node;
2461 return (ISC_R_SUCCESS);
2465 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2466 dns_dbnode_t **nodep)
2468 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2469 dns_rbtnode_t *node = NULL;
2470 dns_name_t nodename;
2471 isc_result_t result;
2472 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2474 REQUIRE(VALID_RBTDB(rbtdb));
2476 dns_name_init(&nodename, NULL);
2477 RWLOCK(&rbtdb->tree_lock, locktype);
2478 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2479 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2480 if (result != ISC_R_SUCCESS) {
2481 RWUNLOCK(&rbtdb->tree_lock, locktype);
2483 if (result == DNS_R_PARTIALMATCH)
2484 result = ISC_R_NOTFOUND;
2488 * It would be nice to try to upgrade the lock instead of
2489 * unlocking then relocking.
2491 locktype = isc_rwlocktype_write;
2492 RWLOCK(&rbtdb->tree_lock, locktype);
2494 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2495 if (result == ISC_R_SUCCESS) {
2496 dns_rbt_namefromnode(node, &nodename);
2497 #ifdef DNS_RBT_USEHASH
2498 node->locknum = node->hashval % rbtdb->node_lock_count;
2500 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2501 rbtdb->node_lock_count;
2504 } else if (result != ISC_R_EXISTS) {
2505 RWUNLOCK(&rbtdb->tree_lock, locktype);
2509 INSIST(node->nsec3);
2510 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2511 new_reference(rbtdb, node);
2512 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2513 RWUNLOCK(&rbtdb->tree_lock, locktype);
2515 *nodep = (dns_dbnode_t *)node;
2517 return (ISC_R_SUCCESS);
2521 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2522 rbtdb_search_t *search = arg;
2523 rdatasetheader_t *header, *header_next;
2524 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2525 rdatasetheader_t *found;
2526 isc_result_t result;
2527 dns_rbtnode_t *onode;
2530 * We only want to remember the topmost zone cut, since it's the one
2531 * that counts, so we'll just continue if we've already found a
2534 if (search->zonecut != NULL)
2535 return (DNS_R_CONTINUE);
2538 result = DNS_R_CONTINUE;
2539 onode = search->rbtdb->origin_node;
2541 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2542 isc_rwlocktype_read);
2545 * Look for an NS or DNAME rdataset active in our version.
2548 dname_header = NULL;
2549 sigdname_header = NULL;
2550 for (header = node->data; header != NULL; header = header_next) {
2551 header_next = header->next;
2552 if (header->type == dns_rdatatype_ns ||
2553 header->type == dns_rdatatype_dname ||
2554 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2556 if (header->serial <= search->serial &&
2559 * Is this a "this rdataset doesn't
2562 if (NONEXISTENT(header))
2566 header = header->down;
2567 } while (header != NULL);
2568 if (header != NULL) {
2569 if (header->type == dns_rdatatype_dname)
2570 dname_header = header;
2571 else if (header->type ==
2572 RBTDB_RDATATYPE_SIGDNAME)
2573 sigdname_header = header;
2574 else if (node != onode ||
2575 IS_STUB(search->rbtdb)) {
2577 * We've found an NS rdataset that
2578 * isn't at the origin node. We check
2579 * that they're not at the origin node,
2580 * because otherwise we'd erroneously
2581 * treat the zone top as if it were
2591 * Did we find anything?
2593 if (dname_header != NULL) {
2595 * Note that DNAME has precedence over NS if both exist.
2597 found = dname_header;
2598 search->zonecut_sigrdataset = sigdname_header;
2599 } else if (ns_header != NULL) {
2601 search->zonecut_sigrdataset = NULL;
2604 if (found != NULL) {
2606 * We increment the reference count on node to ensure that
2607 * search->zonecut_rdataset will still be valid later.
2609 new_reference(search->rbtdb, node);
2610 search->zonecut = node;
2611 search->zonecut_rdataset = found;
2612 search->need_cleanup = ISC_TRUE;
2614 * Since we've found a zonecut, anything beneath it is
2615 * glue and is not subject to wildcard matching, so we
2616 * may clear search->wild.
2618 search->wild = ISC_FALSE;
2619 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2621 * If the caller does not want to find glue, then
2622 * this is the best answer and the search should
2625 result = DNS_R_PARTIALMATCH;
2630 * The search will continue beneath the zone cut.
2631 * This may or may not be the best match. In case it
2632 * is, we need to remember the node name.
2634 zcname = dns_fixedname_name(&search->zonecut_name);
2635 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2637 search->copy_name = ISC_TRUE;
2641 * There is no zonecut at this node which is active in this
2644 * If this is a "wild" node and the caller hasn't disabled
2645 * wildcard matching, remember that we've seen a wild node
2646 * in case we need to go searching for wildcard matches
2649 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2650 search->wild = ISC_TRUE;
2653 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2654 isc_rwlocktype_read);
2660 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2661 rdatasetheader_t *header, isc_stdtime_t now,
2662 dns_rdataset_t *rdataset)
2664 unsigned char *raw; /* RDATASLAB */
2667 * Caller must be holding the node reader lock.
2668 * XXXJT: technically, we need a writer lock, since we'll increment
2669 * the header count below. However, since the actual counter value
2670 * doesn't matter, we prioritize performance here. (We may want to
2671 * use atomic increment when available).
2674 if (rdataset == NULL)
2677 new_reference(rbtdb, node);
2679 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2681 rdataset->methods = &rdataset_methods;
2682 rdataset->rdclass = rbtdb->common.rdclass;
2683 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2684 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2685 rdataset->ttl = header->rdh_ttl - now;
2686 rdataset->trust = header->trust;
2687 if (NXDOMAIN(header))
2688 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2690 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2691 rdataset->private1 = rbtdb;
2692 rdataset->private2 = node;
2693 raw = (unsigned char *)header + sizeof(*header);
2694 rdataset->private3 = raw;
2695 rdataset->count = header->count++;
2696 if (rdataset->count == ISC_UINT32_MAX)
2697 rdataset->count = 0;
2700 * Reset iterator state.
2702 rdataset->privateuint4 = 0;
2703 rdataset->private5 = NULL;
2706 * Add noqname proof.
2708 rdataset->private6 = header->noqname;
2709 if (rdataset->private6 != NULL)
2710 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2711 rdataset->private7 = header->closest;
2712 if (rdataset->private7 != NULL)
2713 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2716 * Copy out re-signing information.
2718 if (RESIGN(header)) {
2719 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2720 rdataset->resign = header->resign;
2722 rdataset->resign = 0;
2725 static inline isc_result_t
2726 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2727 dns_name_t *foundname, dns_rdataset_t *rdataset,
2728 dns_rdataset_t *sigrdataset)
2730 isc_result_t result;
2732 rbtdb_rdatatype_t type;
2733 dns_rbtnode_t *node;
2736 * The caller MUST NOT be holding any node locks.
2739 node = search->zonecut;
2740 type = search->zonecut_rdataset->type;
2743 * If we have to set foundname, we do it before anything else.
2744 * If we were to set foundname after we had set nodep or bound the
2745 * rdataset, then we'd have to undo that work if dns_name_copy()
2746 * failed. By setting foundname first, there's nothing to undo if
2749 if (foundname != NULL && search->copy_name) {
2750 zcname = dns_fixedname_name(&search->zonecut_name);
2751 result = dns_name_copy(zcname, foundname, NULL);
2752 if (result != ISC_R_SUCCESS)
2755 if (nodep != NULL) {
2757 * Note that we don't have to increment the node's reference
2758 * count here because we're going to use the reference we
2759 * already have in the search block.
2762 search->need_cleanup = ISC_FALSE;
2764 if (rdataset != NULL) {
2765 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2766 isc_rwlocktype_read);
2767 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2768 search->now, rdataset);
2769 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2770 bind_rdataset(search->rbtdb, node,
2771 search->zonecut_sigrdataset,
2772 search->now, sigrdataset);
2773 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2774 isc_rwlocktype_read);
2777 if (type == dns_rdatatype_dname)
2778 return (DNS_R_DNAME);
2779 return (DNS_R_DELEGATION);
2782 static inline isc_boolean_t
2783 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2784 dns_rbtnode_t *node)
2786 unsigned char *raw; /* RDATASLAB */
2787 unsigned int count, size;
2789 isc_boolean_t valid = ISC_FALSE;
2790 dns_offsets_t offsets;
2791 isc_region_t region;
2792 rdatasetheader_t *header;
2795 * No additional locking is required.
2799 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2800 * if it occurs at a zone cut, but is not valid below it.
2802 if (type == dns_rdatatype_ns) {
2803 if (node != search->zonecut) {
2806 } else if (type != dns_rdatatype_a &&
2807 type != dns_rdatatype_aaaa &&
2808 type != dns_rdatatype_a6) {
2812 header = search->zonecut_rdataset;
2813 raw = (unsigned char *)header + sizeof(*header);
2814 count = raw[0] * 256 + raw[1];
2815 #if DNS_RDATASET_FIXED
2816 raw += 2 + (4 * count);
2823 size = raw[0] * 256 + raw[1];
2824 #if DNS_RDATASET_FIXED
2830 region.length = size;
2833 * XXX Until we have rdata structures, we have no choice but
2834 * to directly access the rdata format.
2836 dns_name_init(&ns_name, offsets);
2837 dns_name_fromregion(&ns_name, ®ion);
2838 if (dns_name_compare(&ns_name, name) == 0) {
2847 static inline isc_boolean_t
2848 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2851 dns_fixedname_t fnext;
2852 dns_fixedname_t forigin;
2857 dns_rbtnode_t *node;
2858 isc_result_t result;
2859 isc_boolean_t answer = ISC_FALSE;
2860 rdatasetheader_t *header;
2862 rbtdb = search->rbtdb;
2864 dns_name_init(&prefix, NULL);
2865 dns_fixedname_init(&fnext);
2866 next = dns_fixedname_name(&fnext);
2867 dns_fixedname_init(&forigin);
2868 origin = dns_fixedname_name(&forigin);
2870 result = dns_rbtnodechain_next(chain, NULL, NULL);
2871 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2873 result = dns_rbtnodechain_current(chain, &prefix,
2875 if (result != ISC_R_SUCCESS)
2877 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2878 isc_rwlocktype_read);
2879 for (header = node->data;
2881 header = header->next) {
2882 if (header->serial <= search->serial &&
2883 !IGNORE(header) && EXISTS(header))
2886 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2887 isc_rwlocktype_read);
2890 result = dns_rbtnodechain_next(chain, NULL, NULL);
2892 if (result == ISC_R_SUCCESS)
2893 result = dns_name_concatenate(&prefix, origin, next, NULL);
2894 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2899 static inline isc_boolean_t
2900 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2901 dns_fixedname_t fnext;
2902 dns_fixedname_t forigin;
2903 dns_fixedname_t fprev;
2911 dns_rbtnode_t *node;
2912 dns_rbtnodechain_t chain;
2913 isc_boolean_t check_next = ISC_TRUE;
2914 isc_boolean_t check_prev = ISC_TRUE;
2915 isc_boolean_t answer = ISC_FALSE;
2916 isc_result_t result;
2917 rdatasetheader_t *header;
2920 rbtdb = search->rbtdb;
2922 dns_name_init(&name, NULL);
2923 dns_name_init(&tname, NULL);
2924 dns_name_init(&rname, NULL);
2925 dns_fixedname_init(&fnext);
2926 next = dns_fixedname_name(&fnext);
2927 dns_fixedname_init(&fprev);
2928 prev = dns_fixedname_name(&fprev);
2929 dns_fixedname_init(&forigin);
2930 origin = dns_fixedname_name(&forigin);
2933 * Find if qname is at or below a empty node.
2934 * Use our own copy of the chain.
2937 chain = search->chain;
2940 result = dns_rbtnodechain_current(&chain, &name,
2942 if (result != ISC_R_SUCCESS)
2944 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2945 isc_rwlocktype_read);
2946 for (header = node->data;
2948 header = header->next) {
2949 if (header->serial <= search->serial &&
2950 !IGNORE(header) && EXISTS(header))
2953 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2954 isc_rwlocktype_read);
2957 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2958 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2959 if (result == ISC_R_SUCCESS)
2960 result = dns_name_concatenate(&name, origin, prev, NULL);
2961 if (result != ISC_R_SUCCESS)
2962 check_prev = ISC_FALSE;
2964 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2965 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2967 result = dns_rbtnodechain_current(&chain, &name,
2969 if (result != ISC_R_SUCCESS)
2971 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2972 isc_rwlocktype_read);
2973 for (header = node->data;
2975 header = header->next) {
2976 if (header->serial <= search->serial &&
2977 !IGNORE(header) && EXISTS(header))
2980 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2981 isc_rwlocktype_read);
2984 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2986 if (result == ISC_R_SUCCESS)
2987 result = dns_name_concatenate(&name, origin, next, NULL);
2988 if (result != ISC_R_SUCCESS)
2989 check_next = ISC_FALSE;
2991 dns_name_clone(qname, &rname);
2994 * Remove the wildcard label to find the terminal name.
2996 n = dns_name_countlabels(wname);
2997 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3000 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3001 (check_next && dns_name_issubdomain(next, &rname))) {
3006 * Remove the left hand label.
3008 n = dns_name_countlabels(&rname);
3009 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3010 } while (!dns_name_equal(&rname, &tname));
3014 static inline isc_result_t
3015 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3019 dns_rbtnode_t *node, *level_node, *wnode;
3020 rdatasetheader_t *header;
3021 isc_result_t result = ISC_R_NOTFOUND;
3024 dns_fixedname_t fwname;
3026 isc_boolean_t done, wild, active;
3027 dns_rbtnodechain_t wchain;
3030 * Caller must be holding the tree lock and MUST NOT be holding
3035 * Examine each ancestor level. If the level's wild bit
3036 * is set, then construct the corresponding wildcard name and
3037 * search for it. If the wildcard node exists, and is active in
3038 * this version, we're done. If not, then we next check to see
3039 * if the ancestor is active in this version. If so, then there
3040 * can be no possible wildcard match and again we're done. If not,
3041 * continue the search.
3044 rbtdb = search->rbtdb;
3045 i = search->chain.level_matches;
3049 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3050 isc_rwlocktype_read);
3053 * First we try to figure out if this node is active in
3054 * the search's version. We do this now, even though we
3055 * may not need the information, because it simplifies the
3056 * locking and code flow.
3058 for (header = node->data;
3060 header = header->next) {
3061 if (header->serial <= search->serial &&
3062 !IGNORE(header) && EXISTS(header))
3075 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3076 isc_rwlocktype_read);
3080 * Construct the wildcard name for this level.
3082 dns_name_init(&name, NULL);
3083 dns_rbt_namefromnode(node, &name);
3084 dns_fixedname_init(&fwname);
3085 wname = dns_fixedname_name(&fwname);
3086 result = dns_name_concatenate(dns_wildcardname, &name,
3089 while (result == ISC_R_SUCCESS && j != 0) {
3091 level_node = search->chain.levels[j];
3092 dns_name_init(&name, NULL);
3093 dns_rbt_namefromnode(level_node, &name);
3094 result = dns_name_concatenate(wname,
3099 if (result != ISC_R_SUCCESS)
3103 dns_rbtnodechain_init(&wchain, NULL);
3104 result = dns_rbt_findnode(rbtdb->tree, wname,
3105 NULL, &wnode, &wchain,
3106 DNS_RBTFIND_EMPTYDATA,
3108 if (result == ISC_R_SUCCESS) {
3112 * We have found the wildcard node. If it
3113 * is active in the search's version, we're
3116 lock = &rbtdb->node_locks[wnode->locknum].lock;
3117 NODE_LOCK(lock, isc_rwlocktype_read);
3118 for (header = wnode->data;
3120 header = header->next) {
3121 if (header->serial <= search->serial &&
3122 !IGNORE(header) && EXISTS(header))
3125 NODE_UNLOCK(lock, isc_rwlocktype_read);
3126 if (header != NULL ||
3127 activeempty(search, &wchain, wname)) {
3128 if (activeemtpynode(search, qname,
3130 return (ISC_R_NOTFOUND);
3133 * The wildcard node is active!
3135 * Note: result is still ISC_R_SUCCESS
3136 * so we don't have to set it.
3141 } else if (result != ISC_R_NOTFOUND &&
3142 result != DNS_R_PARTIALMATCH) {
3144 * An error has occurred. Bail out.
3152 * The level node is active. Any wildcarding
3153 * present at higher levels has no
3154 * effect and we're done.
3156 result = ISC_R_NOTFOUND;
3162 node = search->chain.levels[i];
3170 static isc_boolean_t
3171 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3173 dns_rdata_t rdata = DNS_RDATA_INIT;
3174 dns_rdata_nsec3_t nsec3;
3175 unsigned char *raw; /* RDATASLAB */
3176 unsigned int rdlen, count;
3177 isc_region_t region;
3178 isc_result_t result;
3180 REQUIRE(header->type == dns_rdatatype_nsec3);
3182 raw = (unsigned char *)header + sizeof(*header);
3183 count = raw[0] * 256 + raw[1]; /* count */
3184 #if DNS_RDATASET_FIXED
3185 raw += count * 4 + 2;
3189 while (count-- > 0) {
3190 rdlen = raw[0] * 256 + raw[1];
3191 #if DNS_RDATASET_FIXED
3197 region.length = rdlen;
3198 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3199 dns_rdatatype_nsec3, ®ion);
3201 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3202 INSIST(result == ISC_R_SUCCESS);
3203 if (nsec3.hash == search->rbtversion->hash &&
3204 nsec3.iterations == search->rbtversion->iterations &&
3205 nsec3.salt_length == search->rbtversion->salt_length &&
3206 memcmp(nsec3.salt, search->rbtversion->salt,
3207 nsec3.salt_length) == 0)
3209 dns_rdata_reset(&rdata);
3215 * Find node of the NSEC/NSEC3 record that is 'name'.
3217 static inline isc_result_t
3218 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3219 dns_name_t *foundname, dns_rdataset_t *rdataset,
3220 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3221 dns_db_secure_t secure)
3223 dns_rbtnode_t *node;
3224 rdatasetheader_t *header, *header_next, *found, *foundsig;
3225 isc_boolean_t empty_node;
3226 isc_result_t result;
3227 dns_fixedname_t fname, forigin;
3228 dns_name_t *name, *origin;
3229 dns_rdatatype_t type;
3230 rbtdb_rdatatype_t sigtype;
3231 isc_boolean_t wraps;
3232 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3234 if (tree == search->rbtdb->nsec3) {
3235 type = dns_rdatatype_nsec3;
3236 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3239 type = dns_rdatatype_nsec;
3240 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3247 dns_fixedname_init(&fname);
3248 name = dns_fixedname_name(&fname);
3249 dns_fixedname_init(&forigin);
3250 origin = dns_fixedname_name(&forigin);
3251 result = dns_rbtnodechain_current(&search->chain, name,
3253 if (result != ISC_R_SUCCESS)
3255 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3256 isc_rwlocktype_read);
3259 empty_node = ISC_TRUE;
3260 for (header = node->data;
3262 header = header_next) {
3263 header_next = header->next;
3265 * Look for an active, extant NSEC or RRSIG NSEC.
3268 if (header->serial <= search->serial &&
3271 * Is this a "this rdataset doesn't
3274 if (NONEXISTENT(header))
3278 header = header->down;
3279 } while (header != NULL);
3280 if (header != NULL) {
3282 * We now know that there is at least one
3283 * active rdataset at this node.
3285 empty_node = ISC_FALSE;
3286 if (header->type == type) {
3288 if (foundsig != NULL)
3290 } else if (header->type == sigtype) {
3298 if (found != NULL && search->rbtversion->havensec3 &&
3299 found->type == dns_rdatatype_nsec3 &&
3300 !matchparams(found, search)) {
3301 empty_node = ISC_TRUE;
3304 result = dns_rbtnodechain_prev(&search->chain,
3306 } else if (found != NULL &&
3307 (foundsig != NULL || !need_sig))
3310 * We've found the right NSEC/NSEC3 record.
3312 * Note: for this to really be the right
3313 * NSEC record, it's essential that the NSEC
3314 * records of any nodes obscured by a zone
3315 * cut have been removed; we assume this is
3318 result = dns_name_concatenate(name, origin,
3320 if (result == ISC_R_SUCCESS) {
3321 if (nodep != NULL) {
3322 new_reference(search->rbtdb,
3326 bind_rdataset(search->rbtdb, node,
3329 if (foundsig != NULL)
3330 bind_rdataset(search->rbtdb,
3336 } else if (found == NULL && foundsig == NULL) {
3338 * This node is active, but has no NSEC or
3339 * RRSIG NSEC. That means it's glue or
3340 * other obscured zone data that isn't
3341 * relevant for our search. Treat the
3342 * node as if it were empty and keep looking.
3344 empty_node = ISC_TRUE;
3345 result = dns_rbtnodechain_prev(&search->chain,
3349 * We found an active node, but either the
3350 * NSEC or the RRSIG NSEC is missing. This
3353 result = DNS_R_BADDB;
3357 * This node isn't active. We've got to keep
3360 result = dns_rbtnodechain_prev(&search->chain, NULL,
3363 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3364 isc_rwlocktype_read);
3365 } while (empty_node && result == ISC_R_SUCCESS);
3367 if (result == ISC_R_NOMORE && wraps) {
3368 result = dns_rbtnodechain_last(&search->chain, tree,
3370 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3377 * If the result is ISC_R_NOMORE, then we got to the beginning of
3378 * the database and didn't find a NSEC record. This shouldn't
3381 if (result == ISC_R_NOMORE)
3382 result = DNS_R_BADDB;
3388 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3389 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3390 dns_dbnode_t **nodep, dns_name_t *foundname,
3391 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3393 dns_rbtnode_t *node = NULL;
3394 isc_result_t result;
3395 rbtdb_search_t search;
3396 isc_boolean_t cname_ok = ISC_TRUE;
3397 isc_boolean_t close_version = ISC_FALSE;
3398 isc_boolean_t maybe_zonecut = ISC_FALSE;
3399 isc_boolean_t at_zonecut = ISC_FALSE;
3401 isc_boolean_t empty_node;
3402 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3403 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3404 rbtdb_rdatatype_t sigtype;
3405 isc_boolean_t active;
3406 dns_rbtnodechain_t chain;
3410 search.rbtdb = (dns_rbtdb_t *)db;
3412 REQUIRE(VALID_RBTDB(search.rbtdb));
3415 * We don't care about 'now'.
3420 * If the caller didn't supply a version, attach to the current
3423 if (version == NULL) {
3424 currentversion(db, &version);
3425 close_version = ISC_TRUE;
3428 search.rbtversion = version;
3429 search.serial = search.rbtversion->serial;
3430 search.options = options;
3431 search.copy_name = ISC_FALSE;
3432 search.need_cleanup = ISC_FALSE;
3433 search.wild = ISC_FALSE;
3434 search.zonecut = NULL;
3435 dns_fixedname_init(&search.zonecut_name);
3436 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3440 * 'wild' will be true iff. we've matched a wildcard.
3444 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3447 * Search down from the root of the tree. If, while going down, we
3448 * encounter a callback node, zone_zonecut_callback() will search the
3449 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3451 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3453 result = dns_rbt_findnode(tree, name, foundname, &node,
3454 &search.chain, DNS_RBTFIND_EMPTYDATA,
3455 zone_zonecut_callback, &search);
3457 if (result == DNS_R_PARTIALMATCH) {
3459 if (search.zonecut != NULL) {
3460 result = setup_delegation(&search, nodep, foundname,
3461 rdataset, sigrdataset);
3467 * At least one of the levels in the search chain
3468 * potentially has a wildcard. For each such level,
3469 * we must see if there's a matching wildcard active
3470 * in the current version.
3472 result = find_wildcard(&search, &node, name);
3473 if (result == ISC_R_SUCCESS) {
3474 result = dns_name_copy(name, foundname, NULL);
3475 if (result != ISC_R_SUCCESS)
3480 else if (result != ISC_R_NOTFOUND)
3484 chain = search.chain;
3485 active = activeempty(&search, &chain, name);
3488 * If we're here, then the name does not exist, is not
3489 * beneath a zonecut, and there's no matching wildcard.
3491 if ((search.rbtversion->secure == dns_db_secure &&
3492 !search.rbtversion->havensec3) ||
3493 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3494 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3496 result = find_closest_nsec(&search, nodep, foundname,
3497 rdataset, sigrdataset, tree,
3498 search.rbtversion->secure);
3499 if (result == ISC_R_SUCCESS)
3500 result = active ? DNS_R_EMPTYNAME :
3503 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3505 } else if (result != ISC_R_SUCCESS)
3510 * We have found a node whose name is the desired name, or we
3511 * have matched a wildcard.
3514 if (search.zonecut != NULL) {
3516 * If we're beneath a zone cut, we don't want to look for
3517 * CNAMEs because they're not legitimate zone glue.
3519 cname_ok = ISC_FALSE;
3522 * The node may be a zone cut itself. If it might be one,
3523 * make sure we check for it later.
3525 * DS records live above the zone cut in ordinary zone so
3526 * we want to ignore any referral.
3528 * Stub zones don't have anything "above" the delgation so
3529 * we always return a referral.
3531 if (node->find_callback &&
3532 ((node != search.rbtdb->origin_node &&
3533 !dns_rdatatype_atparent(type)) ||
3534 IS_STUB(search.rbtdb)))
3535 maybe_zonecut = ISC_TRUE;
3539 * Certain DNSSEC types are not subject to CNAME matching
3540 * (RFC4035, section 2.5 and RFC3007).
3542 * We don't check for RRSIG, because we don't store RRSIG records
3545 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3546 cname_ok = ISC_FALSE;
3549 * We now go looking for rdata...
3552 lock = &search.rbtdb->node_locks[node->locknum].lock;
3553 NODE_LOCK(lock, isc_rwlocktype_read);
3557 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3561 empty_node = ISC_TRUE;
3562 for (header = node->data; header != NULL; header = header_next) {
3563 header_next = header->next;
3565 * Look for an active, extant rdataset.
3568 if (header->serial <= search.serial &&
3571 * Is this a "this rdataset doesn't
3574 if (NONEXISTENT(header))
3578 header = header->down;
3579 } while (header != NULL);
3580 if (header != NULL) {
3582 * We now know that there is at least one active
3583 * rdataset at this node.
3585 empty_node = ISC_FALSE;
3588 * Do special zone cut handling, if requested.
3590 if (maybe_zonecut &&
3591 header->type == dns_rdatatype_ns) {
3593 * We increment the reference count on node to
3594 * ensure that search->zonecut_rdataset will
3595 * still be valid later.
3597 new_reference(search.rbtdb, node);
3598 search.zonecut = node;
3599 search.zonecut_rdataset = header;
3600 search.zonecut_sigrdataset = NULL;
3601 search.need_cleanup = ISC_TRUE;
3602 maybe_zonecut = ISC_FALSE;
3603 at_zonecut = ISC_TRUE;
3605 * It is not clear if KEY should still be
3606 * allowed at the parent side of the zone
3607 * cut or not. It is needed for RFC3007
3608 * validated updates.
3610 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3611 && type != dns_rdatatype_nsec
3612 && type != dns_rdatatype_key) {
3614 * Glue is not OK, but any answer we
3615 * could return would be glue. Return
3621 if (found != NULL && foundsig != NULL)
3627 * If the NSEC3 record doesn't match the chain
3628 * we are using behave as if it isn't here.
3630 if (header->type == dns_rdatatype_nsec3 &&
3631 !matchparams(header, &search)) {
3632 NODE_UNLOCK(lock, isc_rwlocktype_read);
3636 * If we found a type we were looking for,
3639 if (header->type == type ||
3640 type == dns_rdatatype_any ||
3641 (header->type == dns_rdatatype_cname &&
3644 * We've found the answer!
3647 if (header->type == dns_rdatatype_cname &&
3650 * We may be finding a CNAME instead
3651 * of the desired type.
3653 * If we've already got the CNAME RRSIG,
3654 * use it, otherwise change sigtype
3655 * so that we find it.
3657 if (cnamesig != NULL)
3658 foundsig = cnamesig;
3661 RBTDB_RDATATYPE_SIGCNAME;
3664 * If we've got all we need, end the search.
3666 if (!maybe_zonecut && foundsig != NULL)
3668 } else if (header->type == sigtype) {
3670 * We've found the RRSIG rdataset for our
3671 * target type. Remember it.
3675 * If we've got all we need, end the search.
3677 if (!maybe_zonecut && found != NULL)
3679 } else if (header->type == dns_rdatatype_nsec &&
3680 !search.rbtversion->havensec3) {
3682 * Remember a NSEC rdataset even if we're
3683 * not specifically looking for it, because
3684 * we might need it later.
3686 nsecheader = header;
3687 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3688 !search.rbtversion->havensec3) {
3690 * If we need the NSEC rdataset, we'll also
3691 * need its signature.
3694 } else if (cname_ok &&
3695 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3697 * If we get a CNAME match, we'll also need
3707 * We have an exact match for the name, but there are no
3708 * active rdatasets in the desired version. That means that
3709 * this node doesn't exist in the desired version, and that
3710 * we really have a partial match.
3713 NODE_UNLOCK(lock, isc_rwlocktype_read);
3719 * If we didn't find what we were looking for...
3721 if (found == NULL) {
3722 if (search.zonecut != NULL) {
3724 * We were trying to find glue at a node beneath a
3725 * zone cut, but didn't.
3727 * Return the delegation.
3729 NODE_UNLOCK(lock, isc_rwlocktype_read);
3730 result = setup_delegation(&search, nodep, foundname,
3731 rdataset, sigrdataset);
3735 * The desired type doesn't exist.
3737 result = DNS_R_NXRRSET;
3738 if (search.rbtversion->secure == dns_db_secure &&
3739 !search.rbtversion->havensec3 &&
3740 (nsecheader == NULL || nsecsig == NULL)) {
3742 * The zone is secure but there's no NSEC,
3743 * or the NSEC has no signature!
3746 result = DNS_R_BADDB;
3750 NODE_UNLOCK(lock, isc_rwlocktype_read);
3751 result = find_closest_nsec(&search, nodep, foundname,
3752 rdataset, sigrdataset,
3754 search.rbtversion->secure);
3755 if (result == ISC_R_SUCCESS)
3756 result = DNS_R_EMPTYWILD;
3759 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3763 * There's no NSEC record, and we were told
3766 result = DNS_R_BADDB;
3769 if (nodep != NULL) {
3770 new_reference(search.rbtdb, node);
3773 if ((search.rbtversion->secure == dns_db_secure &&
3774 !search.rbtversion->havensec3) ||
3775 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3777 bind_rdataset(search.rbtdb, node, nsecheader,
3779 if (nsecsig != NULL)
3780 bind_rdataset(search.rbtdb, node,
3781 nsecsig, 0, sigrdataset);
3784 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3789 * We found what we were looking for, or we found a CNAME.
3792 if (type != found->type &&
3793 type != dns_rdatatype_any &&
3794 found->type == dns_rdatatype_cname) {
3796 * We weren't doing an ANY query and we found a CNAME instead
3797 * of the type we were looking for, so we need to indicate
3798 * that result to the caller.
3800 result = DNS_R_CNAME;
3801 } else if (search.zonecut != NULL) {
3803 * If we're beneath a zone cut, we must indicate that the
3804 * result is glue, unless we're actually at the zone cut
3805 * and the type is NSEC or KEY.
3807 if (search.zonecut == node) {
3809 * It is not clear if KEY should still be
3810 * allowed at the parent side of the zone
3811 * cut or not. It is needed for RFC3007
3812 * validated updates.
3814 if (type == dns_rdatatype_nsec ||
3815 type == dns_rdatatype_nsec3 ||
3816 type == dns_rdatatype_key)
3817 result = ISC_R_SUCCESS;
3818 else if (type == dns_rdatatype_any)
3819 result = DNS_R_ZONECUT;
3821 result = DNS_R_GLUE;
3823 result = DNS_R_GLUE;
3825 * We might have found data that isn't glue, but was occluded
3826 * by a dynamic update. If the caller cares about this, they
3827 * will have told us to validate glue.
3829 * XXX We should cache the glue validity state!
3831 if (result == DNS_R_GLUE &&
3832 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3833 !valid_glue(&search, foundname, type, node)) {
3834 NODE_UNLOCK(lock, isc_rwlocktype_read);
3835 result = setup_delegation(&search, nodep, foundname,
3836 rdataset, sigrdataset);
3841 * An ordinary successful query!
3843 result = ISC_R_SUCCESS;
3846 if (nodep != NULL) {
3848 new_reference(search.rbtdb, node);
3850 search.need_cleanup = ISC_FALSE;
3854 if (type != dns_rdatatype_any) {
3855 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3856 if (foundsig != NULL)
3857 bind_rdataset(search.rbtdb, node, foundsig, 0,
3862 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3865 NODE_UNLOCK(lock, isc_rwlocktype_read);
3868 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3871 * If we found a zonecut but aren't going to use it, we have to
3874 if (search.need_cleanup) {
3875 node = search.zonecut;
3876 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3878 NODE_LOCK(lock, isc_rwlocktype_read);
3879 decrement_reference(search.rbtdb, node, 0,
3880 isc_rwlocktype_read, isc_rwlocktype_none,
3882 NODE_UNLOCK(lock, isc_rwlocktype_read);
3886 closeversion(db, &version, ISC_FALSE);
3888 dns_rbtnodechain_reset(&search.chain);
3894 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3895 isc_stdtime_t now, dns_dbnode_t **nodep,
3896 dns_name_t *foundname,
3897 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3906 UNUSED(sigrdataset);
3908 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3910 return (ISC_R_NOTIMPLEMENTED);
3914 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3915 rbtdb_search_t *search = arg;
3916 rdatasetheader_t *header, *header_prev, *header_next;
3917 rdatasetheader_t *dname_header, *sigdname_header;
3918 isc_result_t result;
3920 isc_rwlocktype_t locktype;
3924 REQUIRE(search->zonecut == NULL);
3927 * Keep compiler silent.
3931 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3932 locktype = isc_rwlocktype_read;
3933 NODE_LOCK(lock, locktype);
3936 * Look for a DNAME or RRSIG DNAME rdataset.
3938 dname_header = NULL;
3939 sigdname_header = NULL;
3941 for (header = node->data; header != NULL; header = header_next) {
3942 header_next = header->next;
3943 if (header->rdh_ttl <= search->now) {
3945 * This rdataset is stale. If no one else is
3946 * using the node, we can clean it up right
3947 * now, otherwise we mark it as stale, and
3948 * the node as dirty, so it will get cleaned
3951 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3952 (locktype == isc_rwlocktype_write ||
3953 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3955 * We update the node's status only when we
3956 * can get write access; otherwise, we leave
3957 * others to this work. Periodical cleaning
3958 * will eventually take the job as the last
3960 * We won't downgrade the lock, since other
3961 * rdatasets are probably stale, too.
3963 locktype = isc_rwlocktype_write;
3965 if (dns_rbtnode_refcurrent(node) == 0) {
3969 * header->down can be non-NULL if the
3970 * refcount has just decremented to 0
3971 * but decrement_reference() has not
3972 * performed clean_cache_node(), in
3973 * which case we need to purge the
3974 * stale headers first.
3976 mctx = search->rbtdb->common.mctx;
3977 clean_stale_headers(search->rbtdb,
3980 if (header_prev != NULL)
3984 node->data = header->next;
3985 free_rdataset(search->rbtdb, mctx,
3988 header->attributes |=
3989 RDATASET_ATTR_STALE;
3991 header_prev = header;
3994 header_prev = header;
3995 } else if (header->type == dns_rdatatype_dname &&
3997 dname_header = header;
3998 header_prev = header;
3999 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4001 sigdname_header = header;
4002 header_prev = header;
4004 header_prev = header;
4007 if (dname_header != NULL &&
4008 (!DNS_TRUST_PENDING(dname_header->trust) ||
4009 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4011 * We increment the reference count on node to ensure that
4012 * search->zonecut_rdataset will still be valid later.
4014 new_reference(search->rbtdb, node);
4015 INSIST(!ISC_LINK_LINKED(node, deadlink));
4016 search->zonecut = node;
4017 search->zonecut_rdataset = dname_header;
4018 search->zonecut_sigrdataset = sigdname_header;
4019 search->need_cleanup = ISC_TRUE;
4020 result = DNS_R_PARTIALMATCH;
4022 result = DNS_R_CONTINUE;
4024 NODE_UNLOCK(lock, locktype);
4029 static inline isc_result_t
4030 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4031 dns_dbnode_t **nodep, dns_name_t *foundname,
4032 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4035 dns_rbtnode_t *level_node;
4036 rdatasetheader_t *header, *header_prev, *header_next;
4037 rdatasetheader_t *found, *foundsig;
4038 isc_result_t result = ISC_R_NOTFOUND;
4043 isc_rwlocktype_t locktype;
4046 * Caller must be holding the tree lock.
4049 rbtdb = search->rbtdb;
4050 i = search->chain.level_matches;
4053 locktype = isc_rwlocktype_read;
4054 lock = &rbtdb->node_locks[node->locknum].lock;
4055 NODE_LOCK(lock, locktype);
4058 * Look for NS and RRSIG NS rdatasets.
4063 for (header = node->data;
4065 header = header_next) {
4066 header_next = header->next;
4067 if (header->rdh_ttl <= search->now) {
4069 * This rdataset is stale. If no one else is
4070 * using the node, we can clean it up right
4071 * now, otherwise we mark it as stale, and
4072 * the node as dirty, so it will get cleaned
4075 if ((header->rdh_ttl <= search->now -
4077 (locktype == isc_rwlocktype_write ||
4078 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4080 * We update the node's status only
4081 * when we can get write access.
4083 locktype = isc_rwlocktype_write;
4085 if (dns_rbtnode_refcurrent(node)
4089 m = search->rbtdb->common.mctx;
4090 clean_stale_headers(
4093 if (header_prev != NULL)
4099 free_rdataset(rbtdb, m,
4102 header->attributes |=
4103 RDATASET_ATTR_STALE;
4105 header_prev = header;
4108 header_prev = header;
4109 } else if (EXISTS(header)) {
4111 * We've found an extant rdataset. See if
4112 * we're interested in it.
4114 if (header->type == dns_rdatatype_ns) {
4116 if (foundsig != NULL)
4118 } else if (header->type ==
4119 RBTDB_RDATATYPE_SIGNS) {
4124 header_prev = header;
4126 header_prev = header;
4129 if (found != NULL) {
4131 * If we have to set foundname, we do it before
4132 * anything else. If we were to set foundname after
4133 * we had set nodep or bound the rdataset, then we'd
4134 * have to undo that work if dns_name_concatenate()
4135 * failed. By setting foundname first, there's
4136 * nothing to undo if we have trouble.
4138 if (foundname != NULL) {
4139 dns_name_init(&name, NULL);
4140 dns_rbt_namefromnode(node, &name);
4141 result = dns_name_copy(&name, foundname, NULL);
4142 while (result == ISC_R_SUCCESS && i > 0) {
4144 level_node = search->chain.levels[i];
4145 dns_name_init(&name, NULL);
4146 dns_rbt_namefromnode(level_node,
4149 dns_name_concatenate(foundname,
4154 if (result != ISC_R_SUCCESS) {
4159 result = DNS_R_DELEGATION;
4160 if (nodep != NULL) {
4161 new_reference(search->rbtdb, node);
4164 bind_rdataset(search->rbtdb, node, found, search->now,
4166 if (foundsig != NULL)
4167 bind_rdataset(search->rbtdb, node, foundsig,
4168 search->now, sigrdataset);
4169 if (need_headerupdate(found, search->now) ||
4170 (foundsig != NULL &&
4171 need_headerupdate(foundsig, search->now))) {
4172 if (locktype != isc_rwlocktype_write) {
4173 NODE_UNLOCK(lock, locktype);
4174 NODE_LOCK(lock, isc_rwlocktype_write);
4175 locktype = isc_rwlocktype_write;
4177 if (need_headerupdate(found, search->now))
4178 update_header(search->rbtdb, found,
4180 if (foundsig != NULL &&
4181 need_headerupdate(foundsig, search->now)) {
4182 update_header(search->rbtdb, foundsig,
4189 NODE_UNLOCK(lock, locktype);
4191 if (found == NULL && i > 0) {
4193 node = search->chain.levels[i];
4203 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4204 isc_stdtime_t now, dns_name_t *foundname,
4205 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4207 dns_rbtnode_t *node;
4208 rdatasetheader_t *header, *header_next, *header_prev;
4209 rdatasetheader_t *found, *foundsig;
4210 isc_boolean_t empty_node;
4211 isc_result_t result;
4212 dns_fixedname_t fname, forigin;
4213 dns_name_t *name, *origin;
4214 rbtdb_rdatatype_t matchtype, sigmatchtype;
4216 isc_rwlocktype_t locktype;
4218 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4219 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4220 dns_rdatatype_nsec);
4224 dns_fixedname_init(&fname);
4225 name = dns_fixedname_name(&fname);
4226 dns_fixedname_init(&forigin);
4227 origin = dns_fixedname_name(&forigin);
4228 result = dns_rbtnodechain_current(&search->chain, name,
4230 if (result != ISC_R_SUCCESS)
4232 locktype = isc_rwlocktype_read;
4233 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4234 NODE_LOCK(lock, locktype);
4237 empty_node = ISC_TRUE;
4239 for (header = node->data;
4241 header = header_next) {
4242 header_next = header->next;
4243 if (header->rdh_ttl <= now) {
4245 * This rdataset is stale. If no one else is
4246 * using the node, we can clean it up right
4247 * now, otherwise we mark it as stale, and the
4248 * node as dirty, so it will get cleaned up
4251 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4252 (locktype == isc_rwlocktype_write ||
4253 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4255 * We update the node's status only
4256 * when we can get write access.
4258 locktype = isc_rwlocktype_write;
4260 if (dns_rbtnode_refcurrent(node)
4264 m = search->rbtdb->common.mctx;
4265 clean_stale_headers(
4268 if (header_prev != NULL)
4272 node->data = header->next;
4273 free_rdataset(search->rbtdb, m,
4276 header->attributes |=
4277 RDATASET_ATTR_STALE;
4279 header_prev = header;
4282 header_prev = header;
4285 if (NONEXISTENT(header) ||
4286 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4287 header_prev = header;
4290 empty_node = ISC_FALSE;
4291 if (header->type == matchtype)
4293 else if (header->type == sigmatchtype)
4295 header_prev = header;
4297 if (found != NULL) {
4298 result = dns_name_concatenate(name, origin,
4300 if (result != ISC_R_SUCCESS)
4302 bind_rdataset(search->rbtdb, node, found,
4304 if (foundsig != NULL)
4305 bind_rdataset(search->rbtdb, node, foundsig,
4307 new_reference(search->rbtdb, node);
4309 result = DNS_R_COVERINGNSEC;
4310 } else if (!empty_node) {
4311 result = ISC_R_NOTFOUND;
4313 result = dns_rbtnodechain_prev(&search->chain, NULL,
4316 NODE_UNLOCK(lock, locktype);
4317 } while (empty_node && result == ISC_R_SUCCESS);
4322 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4323 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4324 dns_dbnode_t **nodep, dns_name_t *foundname,
4325 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4327 dns_rbtnode_t *node = NULL;
4328 isc_result_t result;
4329 rbtdb_search_t search;
4330 isc_boolean_t cname_ok = ISC_TRUE;
4331 isc_boolean_t empty_node;
4333 isc_rwlocktype_t locktype;
4334 rdatasetheader_t *header, *header_prev, *header_next;
4335 rdatasetheader_t *found, *nsheader;
4336 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4337 rdatasetheader_t *update, *updatesig;
4338 rbtdb_rdatatype_t sigtype, negtype;
4342 search.rbtdb = (dns_rbtdb_t *)db;
4344 REQUIRE(VALID_RBTDB(search.rbtdb));
4345 REQUIRE(version == NULL);
4348 isc_stdtime_get(&now);
4350 search.rbtversion = NULL;
4352 search.options = options;
4353 search.copy_name = ISC_FALSE;
4354 search.need_cleanup = ISC_FALSE;
4355 search.wild = ISC_FALSE;
4356 search.zonecut = NULL;
4357 dns_fixedname_init(&search.zonecut_name);
4358 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4363 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4366 * Search down from the root of the tree. If, while going down, we
4367 * encounter a callback node, cache_zonecut_callback() will search the
4368 * rdatasets at the zone cut for a DNAME rdataset.
4370 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4371 &search.chain, DNS_RBTFIND_EMPTYDATA,
4372 cache_zonecut_callback, &search);
4374 if (result == DNS_R_PARTIALMATCH) {
4375 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4376 result = find_coveringnsec(&search, nodep, now,
4377 foundname, rdataset,
4379 if (result == DNS_R_COVERINGNSEC)
4382 if (search.zonecut != NULL) {
4383 result = setup_delegation(&search, nodep, foundname,
4384 rdataset, sigrdataset);
4388 result = find_deepest_zonecut(&search, node, nodep,
4389 foundname, rdataset,
4393 } else if (result != ISC_R_SUCCESS)
4397 * Certain DNSSEC types are not subject to CNAME matching
4398 * (RFC4035, section 2.5 and RFC3007).
4400 * We don't check for RRSIG, because we don't store RRSIG records
4403 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4404 cname_ok = ISC_FALSE;
4407 * We now go looking for rdata...
4410 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4411 locktype = isc_rwlocktype_read;
4412 NODE_LOCK(lock, locktype);
4416 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4417 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4421 empty_node = ISC_TRUE;
4423 for (header = node->data; header != NULL; header = header_next) {
4424 header_next = header->next;
4425 if (header->rdh_ttl <= now) {
4427 * This rdataset is stale. If no one else is using the
4428 * node, we can clean it up right now, otherwise we
4429 * mark it as stale, and the node as dirty, so it will
4430 * get cleaned up later.
4432 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4433 (locktype == isc_rwlocktype_write ||
4434 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4436 * We update the node's status only when we
4437 * can get write access.
4439 locktype = isc_rwlocktype_write;
4441 if (dns_rbtnode_refcurrent(node) == 0) {
4444 mctx = search.rbtdb->common.mctx;
4445 clean_stale_headers(search.rbtdb, mctx,
4447 if (header_prev != NULL)
4451 node->data = header->next;
4452 free_rdataset(search.rbtdb, mctx,
4455 header->attributes |=
4456 RDATASET_ATTR_STALE;
4458 header_prev = header;
4461 header_prev = header;
4462 } else if (EXISTS(header)) {
4464 * We now know that there is at least one active
4465 * non-stale rdataset at this node.
4467 empty_node = ISC_FALSE;
4470 * If we found a type we were looking for, remember
4473 if (header->type == type ||
4474 (type == dns_rdatatype_any &&
4475 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4476 (cname_ok && header->type ==
4477 dns_rdatatype_cname)) {
4479 * We've found the answer.
4482 if (header->type == dns_rdatatype_cname &&
4486 * If we've already got the CNAME RRSIG,
4487 * use it, otherwise change sigtype
4488 * so that we find it.
4490 if (cnamesig != NULL)
4491 foundsig = cnamesig;
4494 RBTDB_RDATATYPE_SIGCNAME;
4495 foundsig = cnamesig;
4497 } else if (header->type == sigtype) {
4499 * We've found the RRSIG rdataset for our
4500 * target type. Remember it.
4503 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4504 header->type == negtype) {
4506 * We've found a negative cache entry.
4509 } else if (header->type == dns_rdatatype_ns) {
4511 * Remember a NS rdataset even if we're
4512 * not specifically looking for it, because
4513 * we might need it later.
4516 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4518 * If we need the NS rdataset, we'll also
4519 * need its signature.
4522 } else if (cname_ok &&
4523 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4525 * If we get a CNAME match, we'll also need
4530 header_prev = header;
4532 header_prev = header;
4537 * We have an exact match for the name, but there are no
4538 * extant rdatasets. That means that this node doesn't
4539 * meaningfully exist, and that we really have a partial match.
4541 NODE_UNLOCK(lock, locktype);
4546 * If we didn't find what we were looking for...
4548 if (found == NULL ||
4549 (DNS_TRUST_ADDITIONAL(found->trust) &&
4550 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4551 (found->trust == dns_trust_glue &&
4552 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4553 (DNS_TRUST_PENDING(found->trust) &&
4554 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4556 * If there is an NS rdataset at this node, then this is the
4559 if (nsheader != NULL) {
4560 if (nodep != NULL) {
4561 new_reference(search.rbtdb, node);
4562 INSIST(!ISC_LINK_LINKED(node, deadlink));
4565 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4567 if (need_headerupdate(nsheader, search.now))
4569 if (nssig != NULL) {
4570 bind_rdataset(search.rbtdb, node, nssig,
4571 search.now, sigrdataset);
4572 if (need_headerupdate(nssig, search.now))
4575 result = DNS_R_DELEGATION;
4580 * Go find the deepest zone cut.
4582 NODE_UNLOCK(lock, locktype);
4587 * We found what we were looking for, or we found a CNAME.
4590 if (nodep != NULL) {
4591 new_reference(search.rbtdb, node);
4592 INSIST(!ISC_LINK_LINKED(node, deadlink));
4596 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4598 * We found a negative cache entry.
4600 if (NXDOMAIN(found))
4601 result = DNS_R_NCACHENXDOMAIN;
4603 result = DNS_R_NCACHENXRRSET;
4604 } else if (type != found->type &&
4605 type != dns_rdatatype_any &&
4606 found->type == dns_rdatatype_cname) {
4608 * We weren't doing an ANY query and we found a CNAME instead
4609 * of the type we were looking for, so we need to indicate
4610 * that result to the caller.
4612 result = DNS_R_CNAME;
4615 * An ordinary successful query!
4617 result = ISC_R_SUCCESS;
4620 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4621 result == DNS_R_NCACHENXRRSET) {
4622 bind_rdataset(search.rbtdb, node, found, search.now,
4624 if (need_headerupdate(found, search.now))
4626 if (!NEGATIVE(found) && foundsig != NULL) {
4627 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4629 if (need_headerupdate(foundsig, search.now))
4630 updatesig = foundsig;
4635 if ((update != NULL || updatesig != NULL) &&
4636 locktype != isc_rwlocktype_write) {
4637 NODE_UNLOCK(lock, locktype);
4638 NODE_LOCK(lock, isc_rwlocktype_write);
4639 locktype = isc_rwlocktype_write;
4641 if (update != NULL && need_headerupdate(update, search.now))
4642 update_header(search.rbtdb, update, search.now);
4643 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4644 update_header(search.rbtdb, updatesig, search.now);
4646 NODE_UNLOCK(lock, locktype);
4649 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4652 * If we found a zonecut but aren't going to use it, we have to
4655 if (search.need_cleanup) {
4656 node = search.zonecut;
4657 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4659 NODE_LOCK(lock, isc_rwlocktype_read);
4660 decrement_reference(search.rbtdb, node, 0,
4661 isc_rwlocktype_read, isc_rwlocktype_none,
4663 NODE_UNLOCK(lock, isc_rwlocktype_read);
4666 dns_rbtnodechain_reset(&search.chain);
4672 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4673 isc_stdtime_t now, dns_dbnode_t **nodep,
4674 dns_name_t *foundname,
4675 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4677 dns_rbtnode_t *node = NULL;
4679 isc_result_t result;
4680 rbtdb_search_t search;
4681 rdatasetheader_t *header, *header_prev, *header_next;
4682 rdatasetheader_t *found, *foundsig;
4683 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4684 isc_rwlocktype_t locktype;
4686 search.rbtdb = (dns_rbtdb_t *)db;
4688 REQUIRE(VALID_RBTDB(search.rbtdb));
4691 isc_stdtime_get(&now);
4693 search.rbtversion = NULL;
4695 search.options = options;
4696 search.copy_name = ISC_FALSE;
4697 search.need_cleanup = ISC_FALSE;
4698 search.wild = ISC_FALSE;
4699 search.zonecut = NULL;
4700 dns_fixedname_init(&search.zonecut_name);
4701 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4704 if ((options & DNS_DBFIND_NOEXACT) != 0)
4705 rbtoptions |= DNS_RBTFIND_NOEXACT;
4707 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4710 * Search down from the root of the tree.
4712 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4713 &search.chain, rbtoptions, NULL, &search);
4715 if (result == DNS_R_PARTIALMATCH) {
4717 result = find_deepest_zonecut(&search, node, nodep, foundname,
4718 rdataset, sigrdataset);
4720 } else if (result != ISC_R_SUCCESS)
4724 * We now go looking for an NS rdataset at the node.
4727 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4728 locktype = isc_rwlocktype_read;
4729 NODE_LOCK(lock, locktype);
4734 for (header = node->data; header != NULL; header = header_next) {
4735 header_next = header->next;
4736 if (header->rdh_ttl <= now) {
4738 * This rdataset is stale. If no one else is using the
4739 * node, we can clean it up right now, otherwise we
4740 * mark it as stale, and the node as dirty, so it will
4741 * get cleaned up later.
4743 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4744 (locktype == isc_rwlocktype_write ||
4745 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4747 * We update the node's status only when we
4748 * can get write access.
4750 locktype = isc_rwlocktype_write;
4752 if (dns_rbtnode_refcurrent(node) == 0) {
4755 mctx = search.rbtdb->common.mctx;
4756 clean_stale_headers(search.rbtdb, mctx,
4758 if (header_prev != NULL)
4762 node->data = header->next;
4763 free_rdataset(search.rbtdb, mctx,
4766 header->attributes |=
4767 RDATASET_ATTR_STALE;
4769 header_prev = header;
4772 header_prev = header;
4773 } else if (EXISTS(header)) {
4775 * If we found a type we were looking for, remember
4778 if (header->type == dns_rdatatype_ns) {
4780 * Remember a NS rdataset even if we're
4781 * not specifically looking for it, because
4782 * we might need it later.
4785 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4787 * If we need the NS rdataset, we'll also
4788 * need its signature.
4792 header_prev = header;
4794 header_prev = header;
4797 if (found == NULL) {
4799 * No NS records here.
4801 NODE_UNLOCK(lock, locktype);
4805 if (nodep != NULL) {
4806 new_reference(search.rbtdb, node);
4807 INSIST(!ISC_LINK_LINKED(node, deadlink));
4811 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4812 if (foundsig != NULL)
4813 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4816 if (need_headerupdate(found, search.now) ||
4817 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4818 if (locktype != isc_rwlocktype_write) {
4819 NODE_UNLOCK(lock, locktype);
4820 NODE_LOCK(lock, isc_rwlocktype_write);
4821 locktype = isc_rwlocktype_write;
4823 if (need_headerupdate(found, search.now))
4824 update_header(search.rbtdb, found, search.now);
4825 if (foundsig != NULL &&
4826 need_headerupdate(foundsig, search.now)) {
4827 update_header(search.rbtdb, foundsig, search.now);
4831 NODE_UNLOCK(lock, locktype);
4834 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4836 INSIST(!search.need_cleanup);
4838 dns_rbtnodechain_reset(&search.chain);
4840 if (result == DNS_R_DELEGATION)
4841 result = ISC_R_SUCCESS;
4847 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4848 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4849 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4852 REQUIRE(VALID_RBTDB(rbtdb));
4853 REQUIRE(targetp != NULL && *targetp == NULL);
4855 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4856 dns_rbtnode_refincrement(node, &refs);
4858 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4864 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4865 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4866 dns_rbtnode_t *node;
4867 isc_boolean_t want_free = ISC_FALSE;
4868 isc_boolean_t inactive = ISC_FALSE;
4869 rbtdb_nodelock_t *nodelock;
4871 REQUIRE(VALID_RBTDB(rbtdb));
4872 REQUIRE(targetp != NULL && *targetp != NULL);
4874 node = (dns_rbtnode_t *)(*targetp);
4875 nodelock = &rbtdb->node_locks[node->locknum];
4877 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4879 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4880 isc_rwlocktype_none, ISC_FALSE)) {
4881 if (isc_refcount_current(&nodelock->references) == 0 &&
4882 nodelock->exiting) {
4883 inactive = ISC_TRUE;
4887 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4892 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4894 if (rbtdb->active == 0)
4895 want_free = ISC_TRUE;
4896 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4898 char buf[DNS_NAME_FORMATSIZE];
4899 if (dns_name_dynamic(&rbtdb->common.origin))
4900 dns_name_format(&rbtdb->common.origin, buf,
4903 strcpy(buf, "<UNKNOWN>");
4904 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4905 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4906 "calling free_rbtdb(%s)", buf);
4907 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4913 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4914 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4915 dns_rbtnode_t *rbtnode = node;
4916 rdatasetheader_t *header;
4917 isc_boolean_t force_expire = ISC_FALSE;
4919 * These are the category and module used by the cache cleaner.
4921 isc_boolean_t log = ISC_FALSE;
4922 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4923 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4924 int level = ISC_LOG_DEBUG(2);
4925 char printname[DNS_NAME_FORMATSIZE];
4927 REQUIRE(VALID_RBTDB(rbtdb));
4930 * Caller must hold a tree lock.
4934 isc_stdtime_get(&now);
4936 if (isc_mem_isovermem(rbtdb->common.mctx)) {
4939 isc_random_get(&val);
4941 * XXXDCL Could stand to have a better policy, like LRU.
4943 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4946 * Note that 'log' can be true IFF overmem is also true.
4947 * overmem can currently only be true for cache
4948 * databases -- hence all of the "overmem cache" log strings.
4950 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4952 isc_log_write(dns_lctx, category, module, level,
4953 "overmem cache: %s %s",
4954 force_expire ? "FORCE" : "check",
4955 dns_rbt_formatnodename(rbtnode,
4957 sizeof(printname)));
4961 * We may not need write access, but this code path is not performance
4962 * sensitive, so it should be okay to always lock as a writer.
4964 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4965 isc_rwlocktype_write);
4967 for (header = rbtnode->data; header != NULL; header = header->next)
4968 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4970 * We don't check if refcurrent(rbtnode) == 0 and try
4971 * to free like we do in cache_find(), because
4972 * refcurrent(rbtnode) must be non-zero. This is so
4973 * because 'node' is an argument to the function.
4975 header->attributes |= RDATASET_ATTR_STALE;
4978 isc_log_write(dns_lctx, category, module,
4979 level, "overmem cache: stale %s",
4981 } else if (force_expire) {
4982 if (! RETAIN(header)) {
4983 set_ttl(rbtdb, header, 0);
4984 header->attributes |= RDATASET_ATTR_STALE;
4987 isc_log_write(dns_lctx, category, module,
4988 level, "overmem cache: "
4989 "reprieve by RETAIN() %s",
4992 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
4993 isc_log_write(dns_lctx, category, module, level,
4994 "overmem cache: saved %s", printname);
4996 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4997 isc_rwlocktype_write);
4999 return (ISC_R_SUCCESS);
5003 overmem(dns_db_t *db, isc_boolean_t overmem) {
5004 /* This is an empty callback. See adb.c:water() */
5013 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5014 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5015 dns_rbtnode_t *rbtnode = node;
5016 isc_boolean_t first;
5018 REQUIRE(VALID_RBTDB(rbtdb));
5020 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5021 isc_rwlocktype_read);
5023 fprintf(out, "node %p, %u references, locknum = %u\n",
5024 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5026 if (rbtnode->data != NULL) {
5027 rdatasetheader_t *current, *top_next;
5029 for (current = rbtnode->data; current != NULL;
5030 current = top_next) {
5031 top_next = current->next;
5033 fprintf(out, "\ttype %u", current->type);
5039 "\tserial = %lu, ttl = %u, "
5040 "trust = %u, attributes = %u, "
5042 (unsigned long)current->serial,
5045 current->attributes,
5047 current = current->down;
5048 } while (current != NULL);
5051 fprintf(out, "(empty)\n");
5053 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5054 isc_rwlocktype_read);
5058 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5060 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5061 rbtdb_dbiterator_t *rbtdbiter;
5063 REQUIRE(VALID_RBTDB(rbtdb));
5065 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5066 if (rbtdbiter == NULL)
5067 return (ISC_R_NOMEMORY);
5069 rbtdbiter->common.methods = &dbiterator_methods;
5070 rbtdbiter->common.db = NULL;
5071 dns_db_attach(db, &rbtdbiter->common.db);
5072 rbtdbiter->common.relative_names =
5073 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5074 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5075 rbtdbiter->common.cleaning = ISC_FALSE;
5076 rbtdbiter->paused = ISC_TRUE;
5077 rbtdbiter->tree_locked = isc_rwlocktype_none;
5078 rbtdbiter->result = ISC_R_SUCCESS;
5079 dns_fixedname_init(&rbtdbiter->name);
5080 dns_fixedname_init(&rbtdbiter->origin);
5081 rbtdbiter->node = NULL;
5082 rbtdbiter->delete = 0;
5083 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5084 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5085 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5086 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5087 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5088 if (rbtdbiter->nsec3only)
5089 rbtdbiter->current = &rbtdbiter->nsec3chain;
5091 rbtdbiter->current = &rbtdbiter->chain;
5093 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5095 return (ISC_R_SUCCESS);
5099 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5100 dns_rdatatype_t type, dns_rdatatype_t covers,
5101 isc_stdtime_t now, dns_rdataset_t *rdataset,
5102 dns_rdataset_t *sigrdataset)
5104 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5105 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5106 rdatasetheader_t *header, *header_next, *found, *foundsig;
5107 rbtdb_serial_t serial;
5108 rbtdb_version_t *rbtversion = version;
5109 isc_boolean_t close_version = ISC_FALSE;
5110 rbtdb_rdatatype_t matchtype, sigmatchtype;
5112 REQUIRE(VALID_RBTDB(rbtdb));
5113 REQUIRE(type != dns_rdatatype_any);
5115 if (rbtversion == NULL) {
5116 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5117 close_version = ISC_TRUE;
5119 serial = rbtversion->serial;
5122 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5123 isc_rwlocktype_read);
5127 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5129 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5133 for (header = rbtnode->data; header != NULL; header = header_next) {
5134 header_next = header->next;
5136 if (header->serial <= serial &&
5139 * Is this a "this rdataset doesn't
5142 if (NONEXISTENT(header))
5146 header = header->down;
5147 } while (header != NULL);
5148 if (header != NULL) {
5150 * We have an active, extant rdataset. If it's a
5151 * type we're looking for, remember it.
5153 if (header->type == matchtype) {
5155 if (foundsig != NULL)
5157 } else if (header->type == sigmatchtype) {
5164 if (found != NULL) {
5165 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5166 if (foundsig != NULL)
5167 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5171 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5172 isc_rwlocktype_read);
5175 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5179 return (ISC_R_NOTFOUND);
5181 return (ISC_R_SUCCESS);
5185 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5186 dns_rdatatype_t type, dns_rdatatype_t covers,
5187 isc_stdtime_t now, dns_rdataset_t *rdataset,
5188 dns_rdataset_t *sigrdataset)
5190 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5191 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5192 rdatasetheader_t *header, *header_next, *found, *foundsig;
5193 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5194 isc_result_t result;
5196 isc_rwlocktype_t locktype;
5198 REQUIRE(VALID_RBTDB(rbtdb));
5199 REQUIRE(type != dns_rdatatype_any);
5203 result = ISC_R_SUCCESS;
5206 isc_stdtime_get(&now);
5208 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5209 locktype = isc_rwlocktype_read;
5210 NODE_LOCK(lock, locktype);
5214 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5215 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5217 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5221 for (header = rbtnode->data; header != NULL; header = header_next) {
5222 header_next = header->next;
5223 if (header->rdh_ttl <= now) {
5224 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5225 (locktype == isc_rwlocktype_write ||
5226 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5228 * We update the node's status only when we
5229 * can get write access.
5231 locktype = isc_rwlocktype_write;
5234 * We don't check if refcurrent(rbtnode) == 0
5235 * and try to free like we do in cache_find(),
5236 * because refcurrent(rbtnode) must be
5237 * non-zero. This is so because 'node' is an
5238 * argument to the function.
5240 header->attributes |= RDATASET_ATTR_STALE;
5243 } else if (EXISTS(header)) {
5244 if (header->type == matchtype)
5246 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5247 header->type == negtype)
5249 else if (header->type == sigmatchtype)
5253 if (found != NULL) {
5254 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5255 if (!NEGATIVE(found) && foundsig != NULL)
5256 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5260 NODE_UNLOCK(lock, locktype);
5263 return (ISC_R_NOTFOUND);
5265 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5267 * We found a negative cache entry.
5269 if (NXDOMAIN(found))
5270 result = DNS_R_NCACHENXDOMAIN;
5272 result = DNS_R_NCACHENXRRSET;
5279 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5280 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5282 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5283 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5284 rbtdb_version_t *rbtversion = version;
5285 rbtdb_rdatasetiter_t *iterator;
5288 REQUIRE(VALID_RBTDB(rbtdb));
5290 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5291 if (iterator == NULL)
5292 return (ISC_R_NOMEMORY);
5294 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5296 if (rbtversion == NULL)
5298 (dns_dbversion_t **) (void *)(&rbtversion));
5302 isc_refcount_increment(&rbtversion->references,
5308 isc_stdtime_get(&now);
5312 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5313 iterator->common.methods = &rdatasetiter_methods;
5314 iterator->common.db = db;
5315 iterator->common.node = node;
5316 iterator->common.version = (dns_dbversion_t *)rbtversion;
5317 iterator->common.now = now;
5319 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5321 dns_rbtnode_refincrement(rbtnode, &refs);
5324 iterator->current = NULL;
5326 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5328 *iteratorp = (dns_rdatasetiter_t *)iterator;
5330 return (ISC_R_SUCCESS);
5333 static isc_boolean_t
5334 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5335 rdatasetheader_t *header, *header_next;
5336 isc_boolean_t cname, other_data;
5337 dns_rdatatype_t rdtype;
5340 * The caller must hold the node lock.
5344 * Look for CNAME and "other data" rdatasets active in our version.
5347 other_data = ISC_FALSE;
5348 for (header = node->data; header != NULL; header = header_next) {
5349 header_next = header->next;
5350 if (header->type == dns_rdatatype_cname) {
5352 * Look for an active extant CNAME.
5355 if (header->serial <= serial &&
5358 * Is this a "this rdataset doesn't
5361 if (NONEXISTENT(header))
5365 header = header->down;
5366 } while (header != NULL);
5371 * Look for active extant "other data".
5373 * "Other data" is any rdataset whose type is not
5374 * KEY, NSEC, SIG or RRSIG.
5376 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5377 if (rdtype != dns_rdatatype_key &&
5378 rdtype != dns_rdatatype_sig &&
5379 rdtype != dns_rdatatype_nsec &&
5380 rdtype != dns_rdatatype_rrsig) {
5382 * Is it active and extant?
5385 if (header->serial <= serial &&
5388 * Is this a "this rdataset
5389 * doesn't exist" record?
5391 if (NONEXISTENT(header))
5395 header = header->down;
5396 } while (header != NULL);
5398 other_data = ISC_TRUE;
5403 if (cname && other_data)
5410 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5411 isc_result_t result;
5413 INSIST(!IS_CACHE(rbtdb));
5414 INSIST(newheader->heap_index == 0);
5415 INSIST(!ISC_LINK_LINKED(newheader, link));
5417 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5422 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5423 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5424 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5426 rbtdb_changed_t *changed = NULL;
5427 rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5428 unsigned char *merged;
5429 isc_result_t result;
5430 isc_boolean_t header_nx;
5431 isc_boolean_t newheader_nx;
5432 isc_boolean_t merge;
5433 dns_rdatatype_t rdtype, covers;
5434 rbtdb_rdatatype_t negtype, sigtype;
5439 * Add an rdatasetheader_t to a node.
5443 * Caller must be holding the node lock.
5446 if ((options & DNS_DBADD_MERGE) != 0) {
5447 REQUIRE(rbtversion != NULL);
5452 if ((options & DNS_DBADD_FORCE) != 0)
5453 trust = dns_trust_ultimate;
5455 trust = newheader->trust;
5457 if (rbtversion != NULL && !loading) {
5459 * We always add a changed record, even if no changes end up
5460 * being made to this node, because it's harmless and
5461 * simplifies the code.
5463 changed = add_changed(rbtdb, rbtversion, rbtnode);
5464 if (changed == NULL) {
5465 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5466 return (ISC_R_NOMEMORY);
5470 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5471 topheader_prev = NULL;
5474 if (rbtversion == NULL && !newheader_nx) {
5475 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5478 * We're adding a negative cache entry.
5480 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5481 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5483 for (topheader = rbtnode->data;
5485 topheader = topheader->next) {
5487 * If we're adding an negative cache entry
5488 * which covers all types (NXDOMAIN,
5489 * NODATA(QTYPE=ANY)).
5491 * We make all other data stale so that the
5492 * only rdataset that can be found at this
5493 * node is the negative cache entry.
5495 * Otherwise look for any RRSIGs of the
5496 * given type so they can be marked stale
5499 if (covers == dns_rdatatype_any) {
5500 set_ttl(rbtdb, topheader, 0);
5501 topheader->attributes |=
5502 RDATASET_ATTR_STALE;
5504 } else if (topheader->type == sigtype)
5505 sigheader = topheader;
5507 if (covers == dns_rdatatype_any)
5509 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5512 * We're adding something that isn't a
5513 * negative cache entry. Look for an extant
5514 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5517 for (topheader = rbtnode->data;
5519 topheader = topheader->next) {
5520 if (topheader->type ==
5521 RBTDB_RDATATYPE_NCACHEANY)
5524 if (topheader != NULL && EXISTS(topheader) &&
5525 topheader->rdh_ttl > now) {
5529 if (trust < topheader->trust) {
5531 * The NXDOMAIN/NODATA(QTYPE=ANY)
5534 free_rdataset(rbtdb,
5537 if (addedrdataset != NULL)
5538 bind_rdataset(rbtdb, rbtnode,
5541 return (DNS_R_UNCHANGED);
5544 * The new rdataset is better. Expire the
5545 * NXDOMAIN/NODATA(QTYPE=ANY).
5547 set_ttl(rbtdb, topheader, 0);
5548 topheader->attributes |= RDATASET_ATTR_STALE;
5553 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5557 for (topheader = rbtnode->data;
5559 topheader = topheader->next) {
5560 if (topheader->type == newheader->type ||
5561 topheader->type == negtype)
5563 topheader_prev = topheader;
5568 * If header isn't NULL, we've found the right type. There may be
5569 * IGNORE rdatasets between the top of the chain and the first real
5570 * data. We skip over them.
5573 while (header != NULL && IGNORE(header))
5574 header = header->down;
5575 if (header != NULL) {
5576 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5579 * Deleting an already non-existent rdataset has no effect.
5581 if (header_nx && newheader_nx) {
5582 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5583 return (DNS_R_UNCHANGED);
5587 * Trying to add an rdataset with lower trust to a cache DB
5588 * has no effect, provided that the cache data isn't stale.
5590 if (rbtversion == NULL && trust < header->trust &&
5591 (header->rdh_ttl > now || header_nx)) {
5592 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5593 if (addedrdataset != NULL)
5594 bind_rdataset(rbtdb, rbtnode, header, now,
5596 return (DNS_R_UNCHANGED);
5600 * Don't merge if a nonexistent rdataset is involved.
5602 if (merge && (header_nx || newheader_nx))
5606 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5607 * that is the union of 'newheader' and 'header'.
5610 unsigned int flags = 0;
5611 INSIST(rbtversion->serial >= header->serial);
5613 result = ISC_R_SUCCESS;
5615 if ((options & DNS_DBADD_EXACT) != 0)
5616 flags |= DNS_RDATASLAB_EXACT;
5617 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5618 newheader->rdh_ttl != header->rdh_ttl)
5619 result = DNS_R_NOTEXACT;
5620 else if (newheader->rdh_ttl != header->rdh_ttl)
5621 flags |= DNS_RDATASLAB_FORCE;
5622 if (result == ISC_R_SUCCESS)
5623 result = dns_rdataslab_merge(
5624 (unsigned char *)header,
5625 (unsigned char *)newheader,
5626 (unsigned int)(sizeof(*newheader)),
5628 rbtdb->common.rdclass,
5629 (dns_rdatatype_t)header->type,
5631 if (result == ISC_R_SUCCESS) {
5633 * If 'header' has the same serial number as
5634 * we do, we could clean it up now if we knew
5635 * that our caller had no references to it.
5636 * We don't know this, however, so we leave it
5637 * alone. It will get cleaned up when
5638 * clean_zone_node() runs.
5640 free_rdataset(rbtdb, rbtdb->common.mctx,
5642 newheader = (rdatasetheader_t *)merged;
5643 if (loading && RESIGN(newheader) &&
5645 header->resign < newheader->resign)
5646 newheader->resign = header->resign;
5648 free_rdataset(rbtdb, rbtdb->common.mctx,
5654 * Don't replace existing NS, A and AAAA RRsets
5655 * in the cache if they are already exist. This
5656 * prevents named being locked to old servers.
5657 * Don't lower trust of existing record if the
5660 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5661 header->type == dns_rdatatype_ns &&
5662 !header_nx && !newheader_nx &&
5663 header->trust >= newheader->trust &&
5664 dns_rdataslab_equalx((unsigned char *)header,
5665 (unsigned char *)newheader,
5666 (unsigned int)(sizeof(*newheader)),
5667 rbtdb->common.rdclass,
5668 (dns_rdatatype_t)header->type)) {
5670 * Honour the new ttl if it is less than the
5673 if (header->rdh_ttl > newheader->rdh_ttl)
5674 set_ttl(rbtdb, header, newheader->rdh_ttl);
5675 if (header->noqname == NULL &&
5676 newheader->noqname != NULL) {
5677 header->noqname = newheader->noqname;
5678 newheader->noqname = NULL;
5680 if (header->closest == NULL &&
5681 newheader->closest != NULL) {
5682 header->closest = newheader->closest;
5683 newheader->closest = NULL;
5685 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5686 if (addedrdataset != NULL)
5687 bind_rdataset(rbtdb, rbtnode, header, now,
5689 return (ISC_R_SUCCESS);
5691 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5692 (header->type == dns_rdatatype_a ||
5693 header->type == dns_rdatatype_aaaa) &&
5694 !header_nx && !newheader_nx &&
5695 header->trust >= newheader->trust &&
5696 dns_rdataslab_equal((unsigned char *)header,
5697 (unsigned char *)newheader,
5698 (unsigned int)(sizeof(*newheader)))) {
5700 * Honour the new ttl if it is less than the
5703 if (header->rdh_ttl > newheader->rdh_ttl)
5704 set_ttl(rbtdb, header, newheader->rdh_ttl);
5705 if (header->noqname == NULL &&
5706 newheader->noqname != NULL) {
5707 header->noqname = newheader->noqname;
5708 newheader->noqname = NULL;
5710 if (header->closest == NULL &&
5711 newheader->closest != NULL) {
5712 header->closest = newheader->closest;
5713 newheader->closest = NULL;
5715 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5716 if (addedrdataset != NULL)
5717 bind_rdataset(rbtdb, rbtnode, header, now,
5719 return (ISC_R_SUCCESS);
5721 INSIST(rbtversion == NULL ||
5722 rbtversion->serial >= topheader->serial);
5723 if (topheader_prev != NULL)
5724 topheader_prev->next = newheader;
5726 rbtnode->data = newheader;
5727 newheader->next = topheader->next;
5730 * There are no other references to 'header' when
5731 * loading, so we MAY clean up 'header' now.
5732 * Since we don't generate changed records when
5733 * loading, we MUST clean up 'header' now.
5735 newheader->down = NULL;
5736 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5738 newheader->down = topheader;
5739 topheader->next = newheader;
5741 if (changed != NULL)
5742 changed->dirty = ISC_TRUE;
5743 if (rbtversion == NULL) {
5744 set_ttl(rbtdb, header, 0);
5745 header->attributes |= RDATASET_ATTR_STALE;
5746 if (sigheader != NULL) {
5747 set_ttl(rbtdb, sigheader, 0);
5748 sigheader->attributes |=
5749 RDATASET_ATTR_STALE;
5752 idx = newheader->node->locknum;
5753 if (IS_CACHE(rbtdb)) {
5754 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5757 * XXXMLG We don't check the return value
5758 * here. If it fails, we will not do TTL
5759 * based expiry on this node. However, we
5760 * will do it on the LRU side, so memory
5761 * will not leak... for long.
5763 isc_heap_insert(rbtdb->heaps[idx], newheader);
5764 } else if (RESIGN(newheader))
5765 resign_insert(rbtdb, idx, newheader);
5769 * No non-IGNORED rdatasets of the given type exist at
5774 * If we're trying to delete the type, don't bother.
5777 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5778 return (DNS_R_UNCHANGED);
5781 if (topheader != NULL) {
5783 * We have an list of rdatasets of the given type,
5784 * but they're all marked IGNORE. We simply insert
5785 * the new rdataset at the head of the list.
5787 * Ignored rdatasets cannot occur during loading, so
5791 INSIST(rbtversion == NULL ||
5792 rbtversion->serial >= topheader->serial);
5793 if (topheader_prev != NULL)
5794 topheader_prev->next = newheader;
5796 rbtnode->data = newheader;
5797 newheader->next = topheader->next;
5798 newheader->down = topheader;
5799 topheader->next = newheader;
5801 if (changed != NULL)
5802 changed->dirty = ISC_TRUE;
5805 * No rdatasets of the given type exist at the node.
5807 newheader->next = rbtnode->data;
5808 newheader->down = NULL;
5809 rbtnode->data = newheader;
5811 idx = newheader->node->locknum;
5812 if (IS_CACHE(rbtdb)) {
5813 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5815 isc_heap_insert(rbtdb->heaps[idx], newheader);
5816 } else if (RESIGN(newheader)) {
5817 resign_insert(rbtdb, idx, newheader);
5822 * Check if the node now contains CNAME and other data.
5824 if (rbtversion != NULL &&
5825 cname_and_other_data(rbtnode, rbtversion->serial))
5826 return (DNS_R_CNAMEANDOTHER);
5828 if (addedrdataset != NULL)
5829 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5831 return (ISC_R_SUCCESS);
5834 static inline isc_boolean_t
5835 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5836 rbtdb_rdatatype_t type)
5838 if (IS_CACHE(rbtdb)) {
5839 if (type == dns_rdatatype_dname)
5843 } else if (type == dns_rdatatype_dname ||
5844 (type == dns_rdatatype_ns &&
5845 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5850 static inline isc_result_t
5851 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5852 dns_rdataset_t *rdataset)
5854 struct noqname *noqname;
5855 isc_mem_t *mctx = rbtdb->common.mctx;
5857 dns_rdataset_t neg, negsig;
5858 isc_result_t result;
5861 dns_name_init(&name, NULL);
5862 dns_rdataset_init(&neg);
5863 dns_rdataset_init(&negsig);
5865 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5866 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5868 noqname = isc_mem_get(mctx, sizeof(*noqname));
5869 if (noqname == NULL) {
5870 result = ISC_R_NOMEMORY;
5873 dns_name_init(&noqname->name, NULL);
5874 noqname->neg = NULL;
5875 noqname->negsig = NULL;
5876 noqname->type = neg.type;
5877 result = dns_name_dup(&name, mctx, &noqname->name);
5878 if (result != ISC_R_SUCCESS)
5880 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5881 if (result != ISC_R_SUCCESS)
5883 noqname->neg = r.base;
5884 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5885 if (result != ISC_R_SUCCESS)
5887 noqname->negsig = r.base;
5888 dns_rdataset_disassociate(&neg);
5889 dns_rdataset_disassociate(&negsig);
5890 newheader->noqname = noqname;
5891 return (ISC_R_SUCCESS);
5894 dns_rdataset_disassociate(&neg);
5895 dns_rdataset_disassociate(&negsig);
5896 free_noqname(mctx, &noqname);
5900 static inline isc_result_t
5901 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5902 dns_rdataset_t *rdataset)
5904 struct noqname *closest;
5905 isc_mem_t *mctx = rbtdb->common.mctx;
5907 dns_rdataset_t neg, negsig;
5908 isc_result_t result;
5911 dns_name_init(&name, NULL);
5912 dns_rdataset_init(&neg);
5913 dns_rdataset_init(&negsig);
5915 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5916 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5918 closest = isc_mem_get(mctx, sizeof(*closest));
5919 if (closest == NULL) {
5920 result = ISC_R_NOMEMORY;
5923 dns_name_init(&closest->name, NULL);
5924 closest->neg = NULL;
5925 closest->negsig = NULL;
5926 closest->type = neg.type;
5927 result = dns_name_dup(&name, mctx, &closest->name);
5928 if (result != ISC_R_SUCCESS)
5930 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5931 if (result != ISC_R_SUCCESS)
5933 closest->neg = r.base;
5934 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5935 if (result != ISC_R_SUCCESS)
5937 closest->negsig = r.base;
5938 dns_rdataset_disassociate(&neg);
5939 dns_rdataset_disassociate(&negsig);
5940 newheader->closest = closest;
5941 return (ISC_R_SUCCESS);
5944 dns_rdataset_disassociate(&neg);
5945 dns_rdataset_disassociate(&negsig);
5946 free_noqname(mctx, &closest);
5950 static dns_dbmethods_t zone_methods;
5953 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5954 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5955 dns_rdataset_t *addedrdataset)
5957 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5958 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5959 rbtdb_version_t *rbtversion = version;
5960 isc_region_t region;
5961 rdatasetheader_t *newheader;
5962 rdatasetheader_t *header;
5963 isc_result_t result;
5964 isc_boolean_t delegating;
5965 isc_boolean_t tree_locked = ISC_FALSE;
5966 isc_boolean_t cache_is_overmem = ISC_FALSE;
5968 REQUIRE(VALID_RBTDB(rbtdb));
5970 if (rbtdb->common.methods == &zone_methods)
5971 REQUIRE(((rbtnode->nsec3 &&
5972 (rdataset->type == dns_rdatatype_nsec3 ||
5973 rdataset->covers == dns_rdatatype_nsec3)) ||
5975 rdataset->type != dns_rdatatype_nsec3 &&
5976 rdataset->covers != dns_rdatatype_nsec3)));
5978 if (rbtversion == NULL) {
5980 isc_stdtime_get(&now);
5984 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5986 sizeof(rdatasetheader_t));
5987 if (result != ISC_R_SUCCESS)
5990 newheader = (rdatasetheader_t *)region.base;
5991 init_rdataset(rbtdb, newheader);
5992 set_ttl(rbtdb, newheader, rdataset->ttl + now);
5993 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5995 newheader->attributes = 0;
5996 newheader->noqname = NULL;
5997 newheader->closest = NULL;
5998 newheader->count = init_count++;
5999 newheader->trust = rdataset->trust;
6000 newheader->additional_auth = NULL;
6001 newheader->additional_glue = NULL;
6002 newheader->last_used = now;
6003 newheader->node = rbtnode;
6004 if (rbtversion != NULL) {
6005 newheader->serial = rbtversion->serial;
6008 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6009 newheader->attributes |= RDATASET_ATTR_RESIGN;
6010 newheader->resign = rdataset->resign;
6012 newheader->resign = 0;
6014 newheader->serial = 1;
6015 newheader->resign = 0;
6016 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6017 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6018 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6019 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6020 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6021 result = addnoqname(rbtdb, newheader, rdataset);
6022 if (result != ISC_R_SUCCESS) {
6023 free_rdataset(rbtdb, rbtdb->common.mctx,
6028 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6029 result = addclosest(rbtdb, newheader, rdataset);
6030 if (result != ISC_R_SUCCESS) {
6031 free_rdataset(rbtdb, rbtdb->common.mctx,
6039 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6040 * just DNAME for the cache), then we need to set the callback bit
6043 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6044 delegating = ISC_TRUE;
6046 delegating = ISC_FALSE;
6049 * If we're adding a delegation type or the DB is a cache in an overmem
6050 * state, hold an exclusive lock on the tree. In the latter case
6051 * the lock does not necessarily have to be acquired but it will help
6052 * purge stale entries more effectively.
6054 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6055 cache_is_overmem = ISC_TRUE;
6056 if (delegating || cache_is_overmem) {
6057 tree_locked = ISC_TRUE;
6058 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6061 if (cache_is_overmem)
6062 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6064 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6065 isc_rwlocktype_write);
6067 if (rbtdb->rrsetstats != NULL) {
6068 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6069 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6072 if (IS_CACHE(rbtdb)) {
6074 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6076 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6077 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6078 expire_header(rbtdb, header, tree_locked);
6081 * If we've been holding a write lock on the tree just for
6082 * cleaning, we can release it now. However, we still need the
6085 if (tree_locked && !delegating) {
6086 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6087 tree_locked = ISC_FALSE;
6091 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6092 addedrdataset, now);
6093 if (result == ISC_R_SUCCESS && delegating)
6094 rbtnode->find_callback = 1;
6096 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6097 isc_rwlocktype_write);
6100 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6103 * Update the zone's secure status. If version is non-NULL
6104 * this is deferred until closeversion() is called.
6106 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6107 iszonesecure(db, version, rbtdb->origin_node);
6113 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6114 dns_rdataset_t *rdataset, unsigned int options,
6115 dns_rdataset_t *newrdataset)
6117 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6118 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6119 rbtdb_version_t *rbtversion = version;
6120 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6121 unsigned char *subresult;
6122 isc_region_t region;
6123 isc_result_t result;
6124 rbtdb_changed_t *changed;
6126 REQUIRE(VALID_RBTDB(rbtdb));
6128 if (rbtdb->common.methods == &zone_methods)
6129 REQUIRE(((rbtnode->nsec3 &&
6130 (rdataset->type == dns_rdatatype_nsec3 ||
6131 rdataset->covers == dns_rdatatype_nsec3)) ||
6133 rdataset->type != dns_rdatatype_nsec3 &&
6134 rdataset->covers != dns_rdatatype_nsec3)));
6136 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6138 sizeof(rdatasetheader_t));
6139 if (result != ISC_R_SUCCESS)
6141 newheader = (rdatasetheader_t *)region.base;
6142 init_rdataset(rbtdb, newheader);
6143 set_ttl(rbtdb, newheader, rdataset->ttl);
6144 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6146 newheader->attributes = 0;
6147 newheader->serial = rbtversion->serial;
6148 newheader->trust = 0;
6149 newheader->noqname = NULL;
6150 newheader->closest = NULL;
6151 newheader->count = init_count++;
6152 newheader->additional_auth = NULL;
6153 newheader->additional_glue = NULL;
6154 newheader->last_used = 0;
6155 newheader->node = rbtnode;
6156 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6157 newheader->attributes |= RDATASET_ATTR_RESIGN;
6158 newheader->resign = rdataset->resign;
6160 newheader->resign = 0;
6162 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6163 isc_rwlocktype_write);
6165 changed = add_changed(rbtdb, rbtversion, rbtnode);
6166 if (changed == NULL) {
6167 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6168 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6169 isc_rwlocktype_write);
6170 return (ISC_R_NOMEMORY);
6173 topheader_prev = NULL;
6174 for (topheader = rbtnode->data;
6176 topheader = topheader->next) {
6177 if (topheader->type == newheader->type)
6179 topheader_prev = topheader;
6182 * If header isn't NULL, we've found the right type. There may be
6183 * IGNORE rdatasets between the top of the chain and the first real
6184 * data. We skip over them.
6187 while (header != NULL && IGNORE(header))
6188 header = header->down;
6189 if (header != NULL && EXISTS(header)) {
6190 unsigned int flags = 0;
6192 result = ISC_R_SUCCESS;
6193 if ((options & DNS_DBSUB_EXACT) != 0) {
6194 flags |= DNS_RDATASLAB_EXACT;
6195 if (newheader->rdh_ttl != header->rdh_ttl)
6196 result = DNS_R_NOTEXACT;
6198 if (result == ISC_R_SUCCESS)
6199 result = dns_rdataslab_subtract(
6200 (unsigned char *)header,
6201 (unsigned char *)newheader,
6202 (unsigned int)(sizeof(*newheader)),
6204 rbtdb->common.rdclass,
6205 (dns_rdatatype_t)header->type,
6207 if (result == ISC_R_SUCCESS) {
6208 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6209 newheader = (rdatasetheader_t *)subresult;
6210 init_rdataset(rbtdb, newheader);
6212 * We have to set the serial since the rdataslab
6213 * subtraction routine copies the reserved portion of
6214 * header, not newheader.
6216 newheader->serial = rbtversion->serial;
6218 * XXXJT: dns_rdataslab_subtract() copied the pointers
6219 * to additional info. We need to clear these fields
6220 * to avoid having duplicated references.
6222 newheader->additional_auth = NULL;
6223 newheader->additional_glue = NULL;
6224 } else if (result == DNS_R_NXRRSET) {
6226 * This subtraction would remove all of the rdata;
6227 * add a nonexistent header instead.
6229 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6230 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6231 if (newheader == NULL) {
6232 result = ISC_R_NOMEMORY;
6235 set_ttl(rbtdb, newheader, 0);
6236 newheader->type = topheader->type;
6237 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6238 newheader->trust = 0;
6239 newheader->serial = rbtversion->serial;
6240 newheader->noqname = NULL;
6241 newheader->closest = NULL;
6242 newheader->count = 0;
6243 newheader->additional_auth = NULL;
6244 newheader->additional_glue = NULL;
6245 newheader->node = rbtnode;
6246 newheader->resign = 0;
6247 newheader->last_used = 0;
6249 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6254 * If we're here, we want to link newheader in front of
6257 INSIST(rbtversion->serial >= topheader->serial);
6258 if (topheader_prev != NULL)
6259 topheader_prev->next = newheader;
6261 rbtnode->data = newheader;
6262 newheader->next = topheader->next;
6263 newheader->down = topheader;
6264 topheader->next = newheader;
6266 changed->dirty = ISC_TRUE;
6269 * The rdataset doesn't exist, so we don't need to do anything
6270 * to satisfy the deletion request.
6272 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6273 if ((options & DNS_DBSUB_EXACT) != 0)
6274 result = DNS_R_NOTEXACT;
6276 result = DNS_R_UNCHANGED;
6279 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6280 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6283 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6284 isc_rwlocktype_write);
6287 * Update the zone's secure status. If version is non-NULL
6288 * this is deferred until closeversion() is called.
6290 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6291 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6297 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6298 dns_rdatatype_t type, dns_rdatatype_t covers)
6300 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6301 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6302 rbtdb_version_t *rbtversion = version;
6303 isc_result_t result;
6304 rdatasetheader_t *newheader;
6306 REQUIRE(VALID_RBTDB(rbtdb));
6308 if (type == dns_rdatatype_any)
6309 return (ISC_R_NOTIMPLEMENTED);
6310 if (type == dns_rdatatype_rrsig && covers == 0)
6311 return (ISC_R_NOTIMPLEMENTED);
6313 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6314 if (newheader == NULL)
6315 return (ISC_R_NOMEMORY);
6316 set_ttl(rbtdb, newheader, 0);
6317 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6318 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6319 newheader->trust = 0;
6320 newheader->noqname = NULL;
6321 newheader->closest = NULL;
6322 newheader->additional_auth = NULL;
6323 newheader->additional_glue = NULL;
6324 if (rbtversion != NULL)
6325 newheader->serial = rbtversion->serial;
6327 newheader->serial = 0;
6328 newheader->count = 0;
6329 newheader->last_used = 0;
6330 newheader->node = rbtnode;
6332 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6333 isc_rwlocktype_write);
6335 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6336 ISC_FALSE, NULL, 0);
6338 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6339 isc_rwlocktype_write);
6342 * Update the zone's secure status. If version is non-NULL
6343 * this is deferred until closeversion() is called.
6345 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6346 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6352 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6353 rbtdb_load_t *loadctx = arg;
6354 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6355 dns_rbtnode_t *node;
6356 isc_result_t result;
6357 isc_region_t region;
6358 rdatasetheader_t *newheader;
6361 * This routine does no node locking. See comments in
6362 * 'load' below for more information on loading and
6368 * SOA records are only allowed at top of zone.
6370 if (rdataset->type == dns_rdatatype_soa &&
6371 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6372 return (DNS_R_NOTZONETOP);
6374 if (rdataset->type != dns_rdatatype_nsec3 &&
6375 rdataset->covers != dns_rdatatype_nsec3)
6376 add_empty_wildcards(rbtdb, name);
6378 if (dns_name_iswildcard(name)) {
6380 * NS record owners cannot legally be wild cards.
6382 if (rdataset->type == dns_rdatatype_ns)
6383 return (DNS_R_INVALIDNS);
6385 * NSEC3 record owners cannot legally be wild cards.
6387 if (rdataset->type == dns_rdatatype_nsec3)
6388 return (DNS_R_INVALIDNSEC3);
6389 result = add_wildcard_magic(rbtdb, name);
6390 if (result != ISC_R_SUCCESS)
6395 if (rdataset->type == dns_rdatatype_nsec3 ||
6396 rdataset->covers == dns_rdatatype_nsec3) {
6397 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6398 if (result == ISC_R_SUCCESS)
6401 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6402 if (result == ISC_R_SUCCESS)
6405 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6407 if (result != ISC_R_EXISTS) {
6408 dns_name_t foundname;
6409 dns_name_init(&foundname, NULL);
6410 dns_rbt_namefromnode(node, &foundname);
6411 #ifdef DNS_RBT_USEHASH
6412 node->locknum = node->hashval % rbtdb->node_lock_count;
6414 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6415 rbtdb->node_lock_count;
6419 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6421 sizeof(rdatasetheader_t));
6422 if (result != ISC_R_SUCCESS)
6424 newheader = (rdatasetheader_t *)region.base;
6425 init_rdataset(rbtdb, newheader);
6426 set_ttl(rbtdb, newheader,
6427 rdataset->ttl + loadctx->now); /* XXX overflow check */
6428 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6430 newheader->attributes = 0;
6431 newheader->trust = rdataset->trust;
6432 newheader->serial = 1;
6433 newheader->noqname = NULL;
6434 newheader->closest = NULL;
6435 newheader->count = init_count++;
6436 newheader->additional_auth = NULL;
6437 newheader->additional_glue = NULL;
6438 newheader->last_used = 0;
6439 newheader->node = node;
6440 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6441 newheader->attributes |= RDATASET_ATTR_RESIGN;
6442 newheader->resign = rdataset->resign;
6444 newheader->resign = 0;
6446 result = add(rbtdb, node, rbtdb->current_version, newheader,
6447 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6448 if (result == ISC_R_SUCCESS &&
6449 delegating_type(rbtdb, node, rdataset->type))
6450 node->find_callback = 1;
6451 else if (result == DNS_R_UNCHANGED)
6452 result = ISC_R_SUCCESS;
6458 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6459 rbtdb_load_t *loadctx;
6462 rbtdb = (dns_rbtdb_t *)db;
6464 REQUIRE(VALID_RBTDB(rbtdb));
6466 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6467 if (loadctx == NULL)
6468 return (ISC_R_NOMEMORY);
6470 loadctx->rbtdb = rbtdb;
6471 if (IS_CACHE(rbtdb))
6472 isc_stdtime_get(&loadctx->now);
6476 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6478 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6480 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6482 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6484 *addp = loading_addrdataset;
6487 return (ISC_R_SUCCESS);
6491 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6492 rbtdb_load_t *loadctx;
6493 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6495 REQUIRE(VALID_RBTDB(rbtdb));
6496 REQUIRE(dbloadp != NULL);
6498 REQUIRE(loadctx->rbtdb == rbtdb);
6500 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6502 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6503 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6505 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6506 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6508 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6511 * If there's a KEY rdataset at the zone origin containing a
6512 * zone key, we consider the zone secure.
6514 if (! IS_CACHE(rbtdb))
6515 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6519 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6521 return (ISC_R_SUCCESS);
6525 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6526 dns_masterformat_t masterformat) {
6529 rbtdb = (dns_rbtdb_t *)db;
6531 REQUIRE(VALID_RBTDB(rbtdb));
6533 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6534 &dns_master_style_default,
6535 filename, masterformat));
6539 delete_callback(void *data, void *arg) {
6540 dns_rbtdb_t *rbtdb = arg;
6541 rdatasetheader_t *current, *next;
6542 unsigned int locknum;
6545 locknum = current->node->locknum;
6546 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6547 while (current != NULL) {
6548 next = current->next;
6549 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6552 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6555 static isc_boolean_t
6556 issecure(dns_db_t *db) {
6558 isc_boolean_t secure;
6560 rbtdb = (dns_rbtdb_t *)db;
6562 REQUIRE(VALID_RBTDB(rbtdb));
6564 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6565 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6566 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6571 static isc_boolean_t
6572 isdnssec(dns_db_t *db) {
6574 isc_boolean_t dnssec;
6576 rbtdb = (dns_rbtdb_t *)db;
6578 REQUIRE(VALID_RBTDB(rbtdb));
6580 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6581 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6582 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6588 nodecount(dns_db_t *db) {
6592 rbtdb = (dns_rbtdb_t *)db;
6594 REQUIRE(VALID_RBTDB(rbtdb));
6596 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6597 count = dns_rbt_nodecount(rbtdb->tree);
6598 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6604 settask(dns_db_t *db, isc_task_t *task) {
6607 rbtdb = (dns_rbtdb_t *)db;
6609 REQUIRE(VALID_RBTDB(rbtdb));
6611 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6612 if (rbtdb->task != NULL)
6613 isc_task_detach(&rbtdb->task);
6615 isc_task_attach(task, &rbtdb->task);
6616 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6619 static isc_boolean_t
6620 ispersistent(dns_db_t *db) {
6626 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6627 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6628 dns_rbtnode_t *onode;
6629 isc_result_t result = ISC_R_SUCCESS;
6631 REQUIRE(VALID_RBTDB(rbtdb));
6632 REQUIRE(nodep != NULL && *nodep == NULL);
6634 /* Note that the access to origin_node doesn't require a DB lock */
6635 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6636 if (onode != NULL) {
6637 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6638 new_reference(rbtdb, onode);
6639 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6641 *nodep = rbtdb->origin_node;
6643 INSIST(IS_CACHE(rbtdb));
6644 result = ISC_R_NOTFOUND;
6651 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6652 isc_uint8_t *flags, isc_uint16_t *iterations,
6653 unsigned char *salt, size_t *salt_length)
6656 isc_result_t result = ISC_R_NOTFOUND;
6657 rbtdb_version_t *rbtversion = version;
6659 rbtdb = (dns_rbtdb_t *)db;
6661 REQUIRE(VALID_RBTDB(rbtdb));
6663 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6665 if (rbtversion == NULL)
6666 rbtversion = rbtdb->current_version;
6668 if (rbtversion->havensec3) {
6670 *hash = rbtversion->hash;
6671 if (salt != NULL && salt_length != NULL) {
6672 REQUIRE(*salt_length >= rbtversion->salt_length);
6673 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6675 if (salt_length != NULL)
6676 *salt_length = rbtversion->salt_length;
6677 if (iterations != NULL)
6678 *iterations = rbtversion->iterations;
6680 *flags = rbtversion->flags;
6681 result = ISC_R_SUCCESS;
6683 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6689 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6690 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6691 isc_stdtime_t oldresign;
6692 isc_result_t result = ISC_R_SUCCESS;
6693 rdatasetheader_t *header;
6695 REQUIRE(VALID_RBTDB(rbtdb));
6696 REQUIRE(!IS_CACHE(rbtdb));
6697 REQUIRE(rdataset != NULL);
6699 header = rdataset->private3;
6702 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6703 isc_rwlocktype_write);
6705 oldresign = header->resign;
6706 header->resign = resign;
6707 if (header->heap_index != 0) {
6708 INSIST(RESIGN(header));
6710 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6711 header->heap_index);
6712 header->heap_index = 0;
6713 } else if (resign < oldresign)
6714 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6715 header->heap_index);
6717 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6718 header->heap_index);
6719 } else if (resign && header->heap_index == 0) {
6720 header->attributes |= RDATASET_ATTR_RESIGN;
6721 result = resign_insert(rbtdb, header->node->locknum, header);
6723 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6724 isc_rwlocktype_write);
6729 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6730 dns_name_t *foundname)
6732 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6733 rdatasetheader_t *header = NULL, *this;
6735 isc_result_t result = ISC_R_NOTFOUND;
6736 unsigned int locknum;
6738 REQUIRE(VALID_RBTDB(rbtdb));
6740 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6742 for (i = 0; i < rbtdb->node_lock_count; i++) {
6743 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6744 this = isc_heap_element(rbtdb->heaps[i], 1);
6746 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6747 isc_rwlocktype_read);
6752 else if (isc_serial_lt(this->resign, header->resign)) {
6753 locknum = header->node->locknum;
6754 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6755 isc_rwlocktype_read);
6758 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6759 isc_rwlocktype_read);
6765 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6767 if (foundname != NULL)
6768 dns_rbt_fullnamefromnode(header->node, foundname);
6770 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6771 isc_rwlocktype_read);
6773 result = ISC_R_SUCCESS;
6776 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6782 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6784 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6785 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6786 dns_rbtnode_t *node;
6787 rdatasetheader_t *header;
6789 REQUIRE(VALID_RBTDB(rbtdb));
6790 REQUIRE(rdataset != NULL);
6791 REQUIRE(rbtdb->future_version == rbtversion);
6792 REQUIRE(rbtversion->writer);
6794 node = rdataset->private2;
6795 header = rdataset->private3;
6798 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6799 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6800 isc_rwlocktype_write);
6802 * Delete from heap and save to re-signed list so that it can
6803 * be restored if we backout of this change.
6805 new_reference(rbtdb, node);
6806 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6807 header->heap_index = 0;
6808 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6810 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6811 isc_rwlocktype_write);
6812 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6815 static dns_stats_t *
6816 getrrsetstats(dns_db_t *db) {
6817 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6819 REQUIRE(VALID_RBTDB(rbtdb));
6820 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6822 return (rbtdb->rrsetstats);
6825 static dns_dbmethods_t zone_methods = {
6864 static dns_dbmethods_t cache_methods = {
6904 #ifdef DNS_RBTDB_VERSION64
6909 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6910 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6911 void *driverarg, dns_db_t **dbp)
6914 isc_result_t result;
6917 isc_boolean_t (*sooner)(void *, void *);
6919 /* Keep the compiler happy. */
6924 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6926 return (ISC_R_NOMEMORY);
6928 memset(rbtdb, '\0', sizeof(*rbtdb));
6929 dns_name_init(&rbtdb->common.origin, NULL);
6930 rbtdb->common.attributes = 0;
6931 if (type == dns_dbtype_cache) {
6932 rbtdb->common.methods = &cache_methods;
6933 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6934 } else if (type == dns_dbtype_stub) {
6935 rbtdb->common.methods = &zone_methods;
6936 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6938 rbtdb->common.methods = &zone_methods;
6939 rbtdb->common.rdclass = rdclass;
6940 rbtdb->common.mctx = NULL;
6942 result = RBTDB_INITLOCK(&rbtdb->lock);
6943 if (result != ISC_R_SUCCESS)
6946 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6947 if (result != ISC_R_SUCCESS)
6951 * Initialize node_lock_count in a generic way to support future
6952 * extension which allows the user to specify this value on creation.
6953 * Note that when specified for a cache DB it must be larger than 1
6954 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6956 if (rbtdb->node_lock_count == 0) {
6957 if (IS_CACHE(rbtdb))
6958 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6960 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6961 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6962 result = ISC_R_RANGE;
6963 goto cleanup_tree_lock;
6965 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6966 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6967 sizeof(rbtdb_nodelock_t));
6968 if (rbtdb->node_locks == NULL) {
6969 result = ISC_R_NOMEMORY;
6970 goto cleanup_tree_lock;
6973 rbtdb->rrsetstats = NULL;
6974 if (IS_CACHE(rbtdb)) {
6975 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6976 if (result != ISC_R_SUCCESS)
6977 goto cleanup_node_locks;
6978 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6979 sizeof(rdatasetheaderlist_t));
6980 if (rbtdb->rdatasets == NULL) {
6981 result = ISC_R_NOMEMORY;
6982 goto cleanup_rrsetstats;
6984 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6985 ISC_LIST_INIT(rbtdb->rdatasets[i]);
6987 rbtdb->rdatasets = NULL;
6992 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6993 sizeof(isc_heap_t *));
6994 if (rbtdb->heaps == NULL) {
6995 result = ISC_R_NOMEMORY;
6996 goto cleanup_rdatasets;
6998 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6999 rbtdb->heaps[i] = NULL;
7000 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7001 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7002 result = isc_heap_create(mctx, sooner, set_index, 0,
7004 if (result != ISC_R_SUCCESS)
7009 * Create deadnode lists.
7011 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7012 sizeof(rbtnodelist_t));
7013 if (rbtdb->deadnodes == NULL) {
7014 result = ISC_R_NOMEMORY;
7017 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7018 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7020 rbtdb->active = rbtdb->node_lock_count;
7022 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7023 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7024 if (result == ISC_R_SUCCESS) {
7025 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7026 if (result != ISC_R_SUCCESS)
7027 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7029 if (result != ISC_R_SUCCESS) {
7031 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7032 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7033 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7035 goto cleanup_deadnodes;
7037 rbtdb->node_locks[i].exiting = ISC_FALSE;
7041 * Attach to the mctx. The database will persist so long as there
7042 * are references to it, and attaching to the mctx ensures that our
7043 * mctx won't disappear out from under us.
7045 isc_mem_attach(mctx, &rbtdb->common.mctx);
7048 * Must be initialized before free_rbtdb() is called.
7050 isc_ondestroy_init(&rbtdb->common.ondest);
7053 * Make a copy of the origin name.
7055 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7056 if (result != ISC_R_SUCCESS) {
7057 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7062 * Make the Red-Black Trees.
7064 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7065 if (result != ISC_R_SUCCESS) {
7066 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7070 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7071 if (result != ISC_R_SUCCESS) {
7072 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7077 * In order to set the node callback bit correctly in zone databases,
7078 * we need to know if the node has the origin name of the zone.
7079 * In loading_addrdataset() we could simply compare the new name
7080 * to the origin name, but this is expensive. Also, we don't know the
7081 * node name in addrdataset(), so we need another way of knowing the
7084 * We now explicitly create a node for the zone's origin, and then
7085 * we simply remember the node's address. This is safe, because
7086 * the top-of-zone node can never be deleted, nor can its address
7089 if (!IS_CACHE(rbtdb)) {
7090 dns_rbtnode_t *nsec3node;
7092 rbtdb->origin_node = NULL;
7093 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7094 &rbtdb->origin_node);
7095 if (result != ISC_R_SUCCESS) {
7096 INSIST(result != ISC_R_EXISTS);
7097 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7100 rbtdb->origin_node->nsec3 = 0;
7102 * We need to give the origin node the right locknum.
7104 dns_name_init(&name, NULL);
7105 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7106 #ifdef DNS_RBT_USEHASH
7107 rbtdb->origin_node->locknum =
7108 rbtdb->origin_node->hashval %
7109 rbtdb->node_lock_count;
7111 rbtdb->origin_node->locknum =
7112 dns_name_hash(&name, ISC_TRUE) %
7113 rbtdb->node_lock_count;
7116 * Add an apex node to the NSEC3 tree so that NSEC3 searches
7117 * return partial matches when there is only a single NSEC3
7118 * record in the tree.
7121 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7123 if (result != ISC_R_SUCCESS) {
7124 INSIST(result != ISC_R_EXISTS);
7125 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7128 nsec3node->nsec3 = 1;
7130 * We need to give the nsec3 origin node the right locknum.
7132 dns_name_init(&name, NULL);
7133 dns_rbt_namefromnode(nsec3node, &name);
7134 #ifdef DNS_RBT_USEHASH
7135 nsec3node->locknum = nsec3node->hashval %
7136 rbtdb->node_lock_count;
7138 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7139 rbtdb->node_lock_count;
7144 * Misc. Initialization.
7146 result = isc_refcount_init(&rbtdb->references, 1);
7147 if (result != ISC_R_SUCCESS) {
7148 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7151 rbtdb->attributes = 0;
7155 * Version Initialization.
7157 rbtdb->current_serial = 1;
7158 rbtdb->least_serial = 1;
7159 rbtdb->next_serial = 2;
7160 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7161 if (rbtdb->current_version == NULL) {
7162 isc_refcount_decrement(&rbtdb->references, NULL);
7163 isc_refcount_destroy(&rbtdb->references);
7164 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7165 return (ISC_R_NOMEMORY);
7167 rbtdb->current_version->secure = dns_db_insecure;
7168 rbtdb->current_version->havensec3 = ISC_FALSE;
7169 rbtdb->current_version->flags = 0;
7170 rbtdb->current_version->iterations = 0;
7171 rbtdb->current_version->hash = 0;
7172 rbtdb->current_version->salt_length = 0;
7173 memset(rbtdb->current_version->salt, 0,
7174 sizeof(rbtdb->current_version->salt));
7175 rbtdb->future_version = NULL;
7176 ISC_LIST_INIT(rbtdb->open_versions);
7178 * Keep the current version in the open list so that list operation
7179 * won't happen in normal lookup operations.
7181 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7183 rbtdb->common.magic = DNS_DB_MAGIC;
7184 rbtdb->common.impmagic = RBTDB_MAGIC;
7186 *dbp = (dns_db_t *)rbtdb;
7188 return (ISC_R_SUCCESS);
7191 isc_mem_put(mctx, rbtdb->deadnodes,
7192 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7195 if (rbtdb->heaps != NULL) {
7196 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7197 if (rbtdb->heaps[i] != NULL)
7198 isc_heap_destroy(&rbtdb->heaps[i]);
7199 isc_mem_put(mctx, rbtdb->heaps,
7200 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7204 if (rbtdb->rdatasets != NULL)
7205 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7206 sizeof(rdatasetheaderlist_t));
7208 if (rbtdb->rrsetstats != NULL)
7209 dns_stats_detach(&rbtdb->rrsetstats);
7212 isc_mem_put(mctx, rbtdb->node_locks,
7213 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7216 isc_rwlock_destroy(&rbtdb->tree_lock);
7219 RBTDB_DESTROYLOCK(&rbtdb->lock);
7222 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7228 * Slabbed Rdataset Methods
7232 rdataset_disassociate(dns_rdataset_t *rdataset) {
7233 dns_db_t *db = rdataset->private1;
7234 dns_dbnode_t *node = rdataset->private2;
7236 detachnode(db, &node);
7240 rdataset_first(dns_rdataset_t *rdataset) {
7241 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7244 count = raw[0] * 256 + raw[1];
7246 rdataset->private5 = NULL;
7247 return (ISC_R_NOMORE);
7250 #if DNS_RDATASET_FIXED
7251 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7252 raw += 2 + (4 * count);
7258 * The privateuint4 field is the number of rdata beyond the
7259 * cursor position, so we decrement the total count by one
7260 * before storing it.
7262 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7263 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7264 * to the first entry in the offset table.
7267 rdataset->privateuint4 = count;
7268 rdataset->private5 = raw;
7270 return (ISC_R_SUCCESS);
7274 rdataset_next(dns_rdataset_t *rdataset) {
7276 unsigned int length;
7277 unsigned char *raw; /* RDATASLAB */
7279 count = rdataset->privateuint4;
7281 return (ISC_R_NOMORE);
7283 rdataset->privateuint4 = count;
7286 * Skip forward one record (length + 4) or one offset (4).
7288 raw = rdataset->private5;
7289 #if DNS_RDATASET_FIXED
7290 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7292 length = raw[0] * 256 + raw[1];
7294 #if DNS_RDATASET_FIXED
7296 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7298 rdataset->private5 = raw + 2; /* length(2) */
7301 return (ISC_R_SUCCESS);
7305 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7306 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7307 #if DNS_RDATASET_FIXED
7308 unsigned int offset;
7310 unsigned int length;
7312 unsigned int flags = 0;
7314 REQUIRE(raw != NULL);
7317 * Find the start of the record if not already in private5
7318 * then skip the length and order fields.
7320 #if DNS_RDATASET_FIXED
7321 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7322 offset = (raw[0] << 24) + (raw[1] << 16) +
7323 (raw[2] << 8) + raw[3];
7324 raw = rdataset->private3;
7328 length = raw[0] * 256 + raw[1];
7329 #if DNS_RDATASET_FIXED
7334 if (rdataset->type == dns_rdatatype_rrsig) {
7335 if (*raw & DNS_RDATASLAB_OFFLINE)
7336 flags |= DNS_RDATA_OFFLINE;
7342 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7343 rdata->flags |= flags;
7347 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7348 dns_db_t *db = source->private1;
7349 dns_dbnode_t *node = source->private2;
7350 dns_dbnode_t *cloned_node = NULL;
7352 attachnode(db, node, &cloned_node);
7356 * Reset iterator state.
7358 target->privateuint4 = 0;
7359 target->private5 = NULL;
7363 rdataset_count(dns_rdataset_t *rdataset) {
7364 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7367 count = raw[0] * 256 + raw[1];
7373 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7374 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7376 dns_db_t *db = rdataset->private1;
7377 dns_dbnode_t *node = rdataset->private2;
7378 dns_dbnode_t *cloned_node;
7379 struct noqname *noqname = rdataset->private6;
7382 attachnode(db, node, &cloned_node);
7383 nsec->methods = &rdataset_methods;
7384 nsec->rdclass = db->rdclass;
7385 nsec->type = noqname->type;
7387 nsec->ttl = rdataset->ttl;
7388 nsec->trust = rdataset->trust;
7389 nsec->private1 = rdataset->private1;
7390 nsec->private2 = rdataset->private2;
7391 nsec->private3 = noqname->neg;
7392 nsec->privateuint4 = 0;
7393 nsec->private5 = NULL;
7394 nsec->private6 = NULL;
7395 nsec->private7 = NULL;
7398 attachnode(db, node, &cloned_node);
7399 nsecsig->methods = &rdataset_methods;
7400 nsecsig->rdclass = db->rdclass;
7401 nsecsig->type = dns_rdatatype_rrsig;
7402 nsecsig->covers = noqname->type;
7403 nsecsig->ttl = rdataset->ttl;
7404 nsecsig->trust = rdataset->trust;
7405 nsecsig->private1 = rdataset->private1;
7406 nsecsig->private2 = rdataset->private2;
7407 nsecsig->private3 = noqname->negsig;
7408 nsecsig->privateuint4 = 0;
7409 nsecsig->private5 = NULL;
7410 nsec->private6 = NULL;
7411 nsec->private7 = NULL;
7413 dns_name_clone(&noqname->name, name);
7415 return (ISC_R_SUCCESS);
7419 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7420 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7422 dns_db_t *db = rdataset->private1;
7423 dns_dbnode_t *node = rdataset->private2;
7424 dns_dbnode_t *cloned_node;
7425 struct noqname *closest = rdataset->private7;
7428 attachnode(db, node, &cloned_node);
7429 nsec->methods = &rdataset_methods;
7430 nsec->rdclass = db->rdclass;
7431 nsec->type = closest->type;
7433 nsec->ttl = rdataset->ttl;
7434 nsec->trust = rdataset->trust;
7435 nsec->private1 = rdataset->private1;
7436 nsec->private2 = rdataset->private2;
7437 nsec->private3 = closest->neg;
7438 nsec->privateuint4 = 0;
7439 nsec->private5 = NULL;
7440 nsec->private6 = NULL;
7441 nsec->private7 = NULL;
7444 attachnode(db, node, &cloned_node);
7445 nsecsig->methods = &rdataset_methods;
7446 nsecsig->rdclass = db->rdclass;
7447 nsecsig->type = dns_rdatatype_rrsig;
7448 nsecsig->covers = closest->type;
7449 nsecsig->ttl = rdataset->ttl;
7450 nsecsig->trust = rdataset->trust;
7451 nsecsig->private1 = rdataset->private1;
7452 nsecsig->private2 = rdataset->private2;
7453 nsecsig->private3 = closest->negsig;
7454 nsecsig->privateuint4 = 0;
7455 nsecsig->private5 = NULL;
7456 nsec->private6 = NULL;
7457 nsec->private7 = NULL;
7459 dns_name_clone(&closest->name, name);
7461 return (ISC_R_SUCCESS);
7465 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7466 dns_rbtdb_t *rbtdb = rdataset->private1;
7467 dns_rbtnode_t *rbtnode = rdataset->private2;
7468 rdatasetheader_t *header = rdataset->private3;
7471 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7472 isc_rwlocktype_write);
7473 header->trust = rdataset->trust = trust;
7474 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7475 isc_rwlocktype_write);
7479 rdataset_expire(dns_rdataset_t *rdataset) {
7480 dns_rbtdb_t *rbtdb = rdataset->private1;
7481 dns_rbtnode_t *rbtnode = rdataset->private2;
7482 rdatasetheader_t *header = rdataset->private3;
7485 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7486 isc_rwlocktype_write);
7487 expire_header(rbtdb, header, ISC_FALSE);
7488 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7489 isc_rwlocktype_write);
7493 * Rdataset Iterator Methods
7497 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7498 rbtdb_rdatasetiter_t *rbtiterator;
7500 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7502 if (rbtiterator->common.version != NULL)
7503 closeversion(rbtiterator->common.db,
7504 &rbtiterator->common.version, ISC_FALSE);
7505 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7506 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7507 sizeof(*rbtiterator));
7513 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7514 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7515 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7516 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7517 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7518 rdatasetheader_t *header, *top_next;
7519 rbtdb_serial_t serial;
7522 if (IS_CACHE(rbtdb)) {
7524 now = rbtiterator->common.now;
7526 serial = rbtversion->serial;
7530 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7531 isc_rwlocktype_read);
7533 for (header = rbtnode->data; header != NULL; header = top_next) {
7534 top_next = header->next;
7536 if (header->serial <= serial && !IGNORE(header)) {
7538 * Is this a "this rdataset doesn't exist"
7539 * record? Or is it too old in the cache?
7541 * Note: unlike everywhere else, we
7542 * check for now > header->rdh_ttl instead
7543 * of now >= header->rdh_ttl. This allows
7544 * ANY and RRSIG queries for 0 TTL
7545 * rdatasets to work.
7547 if (NONEXISTENT(header) ||
7548 (now != 0 && now > header->rdh_ttl))
7552 header = header->down;
7553 } while (header != NULL);
7558 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7559 isc_rwlocktype_read);
7561 rbtiterator->current = header;
7564 return (ISC_R_NOMORE);
7566 return (ISC_R_SUCCESS);
7570 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7571 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7572 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7573 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7574 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7575 rdatasetheader_t *header, *top_next;
7576 rbtdb_serial_t serial;
7578 rbtdb_rdatatype_t type, negtype;
7579 dns_rdatatype_t rdtype, covers;
7581 header = rbtiterator->current;
7583 return (ISC_R_NOMORE);
7585 if (IS_CACHE(rbtdb)) {
7587 now = rbtiterator->common.now;
7589 serial = rbtversion->serial;
7593 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7594 isc_rwlocktype_read);
7596 type = header->type;
7597 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7599 covers = RBTDB_RDATATYPE_EXT(header->type);
7600 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7602 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7603 for (header = header->next; header != NULL; header = top_next) {
7604 top_next = header->next;
7606 * If not walking back up the down list.
7608 if (header->type != type && header->type != negtype) {
7610 if (header->serial <= serial &&
7613 * Is this a "this rdataset doesn't
7616 * Note: unlike everywhere else, we
7617 * check for now > header->ttl instead
7618 * of now >= header->ttl. This allows
7619 * ANY and RRSIG queries for 0 TTL
7620 * rdatasets to work.
7622 if ((header->attributes &
7623 RDATASET_ATTR_NONEXISTENT) != 0 ||
7624 (now != 0 && now > header->rdh_ttl))
7628 header = header->down;
7629 } while (header != NULL);
7635 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7636 isc_rwlocktype_read);
7638 rbtiterator->current = header;
7641 return (ISC_R_NOMORE);
7643 return (ISC_R_SUCCESS);
7647 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7648 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7649 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7650 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7651 rdatasetheader_t *header;
7653 header = rbtiterator->current;
7654 REQUIRE(header != NULL);
7656 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7657 isc_rwlocktype_read);
7659 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7662 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7663 isc_rwlocktype_read);
7668 * Database Iterator Methods
7672 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7673 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7674 dns_rbtnode_t *node = rbtdbiter->node;
7679 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7680 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7684 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7685 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7686 dns_rbtnode_t *node = rbtdbiter->node;
7692 lock = &rbtdb->node_locks[node->locknum].lock;
7693 NODE_LOCK(lock, isc_rwlocktype_read);
7694 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7695 rbtdbiter->tree_locked, ISC_FALSE);
7696 NODE_UNLOCK(lock, isc_rwlocktype_read);
7698 rbtdbiter->node = NULL;
7702 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7703 dns_rbtnode_t *node;
7704 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7705 isc_boolean_t was_read_locked = ISC_FALSE;
7709 if (rbtdbiter->delete != 0) {
7711 * Note that "%d node of %d in tree" can report things like
7712 * "flush_deletions: 59 nodes of 41 in tree". This means
7713 * That some nodes appear on the deletions list more than
7714 * once. Only the last occurence will actually be deleted.
7716 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7717 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7718 "flush_deletions: %d nodes of %d in tree",
7720 dns_rbt_nodecount(rbtdb->tree));
7722 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7723 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7724 was_read_locked = ISC_TRUE;
7726 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7727 rbtdbiter->tree_locked = isc_rwlocktype_write;
7729 for (i = 0; i < rbtdbiter->delete; i++) {
7730 node = rbtdbiter->deletions[i];
7731 lock = &rbtdb->node_locks[node->locknum].lock;
7733 NODE_LOCK(lock, isc_rwlocktype_read);
7734 decrement_reference(rbtdb, node, 0,
7735 isc_rwlocktype_read,
7736 rbtdbiter->tree_locked, ISC_FALSE);
7737 NODE_UNLOCK(lock, isc_rwlocktype_read);
7740 rbtdbiter->delete = 0;
7742 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7743 if (was_read_locked) {
7744 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7745 rbtdbiter->tree_locked = isc_rwlocktype_read;
7748 rbtdbiter->tree_locked = isc_rwlocktype_none;
7754 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7755 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7757 REQUIRE(rbtdbiter->paused);
7758 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7760 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7761 rbtdbiter->tree_locked = isc_rwlocktype_read;
7763 rbtdbiter->paused = ISC_FALSE;
7767 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7768 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7769 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7770 dns_db_t *db = NULL;
7772 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7773 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7774 rbtdbiter->tree_locked = isc_rwlocktype_none;
7776 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7778 dereference_iter_node(rbtdbiter);
7780 flush_deletions(rbtdbiter);
7782 dns_db_attach(rbtdbiter->common.db, &db);
7783 dns_db_detach(&rbtdbiter->common.db);
7785 dns_rbtnodechain_reset(&rbtdbiter->chain);
7786 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7787 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7794 dbiterator_first(dns_dbiterator_t *iterator) {
7795 isc_result_t result;
7796 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7797 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7798 dns_name_t *name, *origin;
7800 if (rbtdbiter->result != ISC_R_SUCCESS &&
7801 rbtdbiter->result != ISC_R_NOMORE)
7802 return (rbtdbiter->result);
7804 if (rbtdbiter->paused)
7805 resume_iteration(rbtdbiter);
7807 dereference_iter_node(rbtdbiter);
7809 name = dns_fixedname_name(&rbtdbiter->name);
7810 origin = dns_fixedname_name(&rbtdbiter->origin);
7811 dns_rbtnodechain_reset(&rbtdbiter->chain);
7812 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7814 if (rbtdbiter->nsec3only) {
7815 rbtdbiter->current = &rbtdbiter->nsec3chain;
7816 result = dns_rbtnodechain_first(rbtdbiter->current,
7817 rbtdb->nsec3, name, origin);
7819 rbtdbiter->current = &rbtdbiter->chain;
7820 result = dns_rbtnodechain_first(rbtdbiter->current,
7821 rbtdb->tree, name, origin);
7822 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7823 rbtdbiter->current = &rbtdbiter->nsec3chain;
7824 result = dns_rbtnodechain_first(rbtdbiter->current,
7829 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7830 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7831 NULL, &rbtdbiter->node);
7832 if (result == ISC_R_SUCCESS) {
7833 rbtdbiter->new_origin = ISC_TRUE;
7834 reference_iter_node(rbtdbiter);
7837 INSIST(result == ISC_R_NOTFOUND);
7838 result = ISC_R_NOMORE; /* The tree is empty. */
7841 rbtdbiter->result = result;
7847 dbiterator_last(dns_dbiterator_t *iterator) {
7848 isc_result_t result;
7849 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7850 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7851 dns_name_t *name, *origin;
7853 if (rbtdbiter->result != ISC_R_SUCCESS &&
7854 rbtdbiter->result != ISC_R_NOMORE)
7855 return (rbtdbiter->result);
7857 if (rbtdbiter->paused)
7858 resume_iteration(rbtdbiter);
7860 dereference_iter_node(rbtdbiter);
7862 name = dns_fixedname_name(&rbtdbiter->name);
7863 origin = dns_fixedname_name(&rbtdbiter->origin);
7864 dns_rbtnodechain_reset(&rbtdbiter->chain);
7865 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7867 result = ISC_R_NOTFOUND;
7868 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7869 rbtdbiter->current = &rbtdbiter->nsec3chain;
7870 result = dns_rbtnodechain_last(rbtdbiter->current,
7871 rbtdb->nsec3, name, origin);
7873 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7874 rbtdbiter->current = &rbtdbiter->chain;
7875 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7878 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7879 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7880 NULL, &rbtdbiter->node);
7881 if (result == ISC_R_SUCCESS) {
7882 rbtdbiter->new_origin = ISC_TRUE;
7883 reference_iter_node(rbtdbiter);
7886 INSIST(result == ISC_R_NOTFOUND);
7887 result = ISC_R_NOMORE; /* The tree is empty. */
7890 rbtdbiter->result = result;
7896 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7897 isc_result_t result;
7898 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7899 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7900 dns_name_t *iname, *origin;
7902 if (rbtdbiter->result != ISC_R_SUCCESS &&
7903 rbtdbiter->result != ISC_R_NOTFOUND &&
7904 rbtdbiter->result != ISC_R_NOMORE)
7905 return (rbtdbiter->result);
7907 if (rbtdbiter->paused)
7908 resume_iteration(rbtdbiter);
7910 dereference_iter_node(rbtdbiter);
7912 iname = dns_fixedname_name(&rbtdbiter->name);
7913 origin = dns_fixedname_name(&rbtdbiter->origin);
7914 dns_rbtnodechain_reset(&rbtdbiter->chain);
7915 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7917 if (rbtdbiter->nsec3only) {
7918 rbtdbiter->current = &rbtdbiter->nsec3chain;
7919 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7922 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7923 } else if (rbtdbiter->nonsec3) {
7924 rbtdbiter->current = &rbtdbiter->chain;
7925 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7928 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7931 * Stay on main chain if not found on either chain.
7933 rbtdbiter->current = &rbtdbiter->chain;
7934 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7937 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7938 if (result == DNS_R_PARTIALMATCH) {
7939 dns_rbtnode_t *node = NULL;
7940 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7941 &node, &rbtdbiter->nsec3chain,
7942 DNS_RBTFIND_EMPTYDATA,
7944 if (result == ISC_R_SUCCESS) {
7945 rbtdbiter->node = node;
7946 rbtdbiter->current = &rbtdbiter->nsec3chain;
7952 if (result == ISC_R_SUCCESS) {
7953 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7955 if (result == ISC_R_SUCCESS) {
7956 rbtdbiter->new_origin = ISC_TRUE;
7957 reference_iter_node(rbtdbiter);
7959 } else if (result == DNS_R_PARTIALMATCH) {
7960 result = ISC_R_NOTFOUND;
7961 rbtdbiter->node = NULL;
7964 rbtdbiter->result = result;
7966 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7967 isc_result_t tresult;
7968 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7970 if (tresult == ISC_R_SUCCESS) {
7971 rbtdbiter->new_origin = ISC_TRUE;
7972 reference_iter_node(rbtdbiter);
7975 rbtdbiter->node = NULL;
7978 rbtdbiter->node = NULL;
7980 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7981 ISC_R_SUCCESS : result;
7988 dbiterator_prev(dns_dbiterator_t *iterator) {
7989 isc_result_t result;
7990 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7991 dns_name_t *name, *origin;
7992 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7994 REQUIRE(rbtdbiter->node != NULL);
7996 if (rbtdbiter->result != ISC_R_SUCCESS)
7997 return (rbtdbiter->result);
7999 if (rbtdbiter->paused)
8000 resume_iteration(rbtdbiter);
8002 name = dns_fixedname_name(&rbtdbiter->name);
8003 origin = dns_fixedname_name(&rbtdbiter->origin);
8004 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8005 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8006 !rbtdbiter->nonsec3 &&
8007 &rbtdbiter->nsec3chain == rbtdbiter->current) {
8008 rbtdbiter->current = &rbtdbiter->chain;
8009 dns_rbtnodechain_reset(rbtdbiter->current);
8010 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8012 if (result == ISC_R_NOTFOUND)
8013 result = ISC_R_NOMORE;
8016 dereference_iter_node(rbtdbiter);
8018 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8019 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8020 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8021 NULL, &rbtdbiter->node);
8024 if (result == ISC_R_SUCCESS)
8025 reference_iter_node(rbtdbiter);
8027 rbtdbiter->result = result;
8033 dbiterator_next(dns_dbiterator_t *iterator) {
8034 isc_result_t result;
8035 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8036 dns_name_t *name, *origin;
8037 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8039 REQUIRE(rbtdbiter->node != NULL);
8041 if (rbtdbiter->result != ISC_R_SUCCESS)
8042 return (rbtdbiter->result);
8044 if (rbtdbiter->paused)
8045 resume_iteration(rbtdbiter);
8047 name = dns_fixedname_name(&rbtdbiter->name);
8048 origin = dns_fixedname_name(&rbtdbiter->origin);
8049 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8050 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8051 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8052 rbtdbiter->current = &rbtdbiter->nsec3chain;
8053 dns_rbtnodechain_reset(rbtdbiter->current);
8054 result = dns_rbtnodechain_first(rbtdbiter->current,
8055 rbtdb->nsec3, name, origin);
8056 if (result == ISC_R_NOTFOUND)
8057 result = ISC_R_NOMORE;
8060 dereference_iter_node(rbtdbiter);
8062 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8063 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8064 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8065 NULL, &rbtdbiter->node);
8067 if (result == ISC_R_SUCCESS)
8068 reference_iter_node(rbtdbiter);
8070 rbtdbiter->result = result;
8076 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8079 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8080 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8081 dns_rbtnode_t *node = rbtdbiter->node;
8082 isc_result_t result;
8083 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8084 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8086 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8087 REQUIRE(rbtdbiter->node != NULL);
8089 if (rbtdbiter->paused)
8090 resume_iteration(rbtdbiter);
8093 if (rbtdbiter->common.relative_names)
8095 result = dns_name_concatenate(nodename, origin, name, NULL);
8096 if (result != ISC_R_SUCCESS)
8098 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8099 result = DNS_R_NEWORIGIN;
8101 result = ISC_R_SUCCESS;
8103 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8104 new_reference(rbtdb, node);
8105 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8107 *nodep = rbtdbiter->node;
8109 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8110 isc_result_t expire_result;
8113 * If the deletion array is full, flush it before trying
8114 * to expire the current node. The current node can't
8115 * fully deleted while the iteration cursor is still on it.
8117 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8118 flush_deletions(rbtdbiter);
8120 expire_result = expirenode(iterator->db, *nodep, 0);
8123 * expirenode() currently always returns success.
8125 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8128 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8129 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8130 dns_rbtnode_refincrement(node, &refs);
8132 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8140 dbiterator_pause(dns_dbiterator_t *iterator) {
8141 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8142 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8144 if (rbtdbiter->result != ISC_R_SUCCESS &&
8145 rbtdbiter->result != ISC_R_NOMORE)
8146 return (rbtdbiter->result);
8148 if (rbtdbiter->paused)
8149 return (ISC_R_SUCCESS);
8151 rbtdbiter->paused = ISC_TRUE;
8153 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8154 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8155 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8156 rbtdbiter->tree_locked = isc_rwlocktype_none;
8159 flush_deletions(rbtdbiter);
8161 return (ISC_R_SUCCESS);
8165 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8166 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8167 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8169 if (rbtdbiter->result != ISC_R_SUCCESS)
8170 return (rbtdbiter->result);
8172 return (dns_name_copy(origin, name, NULL));
8176 * Additional cache routines.
8179 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8180 dns_rdatatype_t qtype, dns_acache_t *acache,
8181 dns_zone_t **zonep, dns_db_t **dbp,
8182 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8183 dns_name_t *fname, dns_message_t *msg,
8186 dns_rbtdb_t *rbtdb = rdataset->private1;
8187 dns_rbtnode_t *rbtnode = rdataset->private2;
8188 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8189 unsigned int current_count = rdataset->privateuint4;
8191 rdatasetheader_t *header;
8192 nodelock_t *nodelock;
8193 unsigned int total_count;
8194 acachectl_t *acarray;
8195 dns_acacheentry_t *entry;
8196 isc_result_t result;
8198 UNUSED(qtype); /* we do not use this value at least for now */
8201 header = (struct rdatasetheader *)(raw - sizeof(*header));
8203 total_count = raw[0] * 256 + raw[1];
8204 INSIST(total_count > current_count);
8205 count = total_count - current_count - 1;
8209 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8210 NODE_LOCK(nodelock, isc_rwlocktype_read);
8213 case dns_rdatasetadditional_fromauth:
8214 acarray = header->additional_auth;
8216 case dns_rdatasetadditional_fromcache:
8219 case dns_rdatasetadditional_fromglue:
8220 acarray = header->additional_glue;
8226 if (acarray == NULL) {
8227 if (type != dns_rdatasetadditional_fromcache)
8228 dns_acache_countquerymiss(acache);
8229 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8230 return (ISC_R_NOTFOUND);
8233 if (acarray[count].entry == NULL) {
8234 dns_acache_countquerymiss(acache);
8235 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8236 return (ISC_R_NOTFOUND);
8240 dns_acache_attachentry(acarray[count].entry, &entry);
8242 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8244 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8245 nodep, fname, msg, now);
8247 dns_acache_detachentry(&entry);
8253 acache_callback(dns_acacheentry_t *entry, void **arg) {
8255 dns_rbtnode_t *rbtnode;
8256 nodelock_t *nodelock;
8257 acachectl_t *acarray = NULL;
8258 acache_cbarg_t *cbarg;
8261 REQUIRE(arg != NULL);
8265 * The caller must hold the entry lock.
8268 rbtdb = (dns_rbtdb_t *)cbarg->db;
8269 rbtnode = (dns_rbtnode_t *)cbarg->node;
8271 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8272 NODE_LOCK(nodelock, isc_rwlocktype_write);
8274 switch (cbarg->type) {
8275 case dns_rdatasetadditional_fromauth:
8276 acarray = cbarg->header->additional_auth;
8278 case dns_rdatasetadditional_fromglue:
8279 acarray = cbarg->header->additional_glue;
8285 count = cbarg->count;
8286 if (acarray != NULL && acarray[count].entry == entry) {
8287 acarray[count].entry = NULL;
8288 INSIST(acarray[count].cbarg == cbarg);
8289 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8290 acarray[count].cbarg = NULL;
8292 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8294 dns_acache_detachentry(&entry);
8296 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8298 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8299 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8305 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8306 acache_cbarg_t **cbargp)
8308 acache_cbarg_t *cbarg;
8310 REQUIRE(mctx != NULL);
8311 REQUIRE(entry != NULL);
8312 REQUIRE(cbargp != NULL && *cbargp != NULL);
8316 dns_acache_cancelentry(entry);
8317 dns_db_detachnode(cbarg->db, &cbarg->node);
8318 dns_db_detach(&cbarg->db);
8320 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8326 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8327 dns_rdatatype_t qtype, dns_acache_t *acache,
8328 dns_zone_t *zone, dns_db_t *db,
8329 dns_dbversion_t *version, dns_dbnode_t *node,
8332 dns_rbtdb_t *rbtdb = rdataset->private1;
8333 dns_rbtnode_t *rbtnode = rdataset->private2;
8334 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8335 unsigned int current_count = rdataset->privateuint4;
8336 rdatasetheader_t *header;
8337 unsigned int total_count, count;
8338 nodelock_t *nodelock;
8339 isc_result_t result;
8340 acachectl_t *acarray;
8341 dns_acacheentry_t *newentry, *oldentry = NULL;
8342 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8346 if (type == dns_rdatasetadditional_fromcache)
8347 return (ISC_R_SUCCESS);
8349 header = (struct rdatasetheader *)(raw - sizeof(*header));
8351 total_count = raw[0] * 256 + raw[1];
8352 INSIST(total_count > current_count);
8353 count = total_count - current_count - 1; /* should be private data */
8355 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8356 if (newcbarg == NULL)
8357 return (ISC_R_NOMEMORY);
8358 newcbarg->type = type;
8359 newcbarg->count = count;
8360 newcbarg->header = header;
8361 newcbarg->db = NULL;
8362 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8363 newcbarg->node = NULL;
8364 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8367 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8368 acache_callback, newcbarg, &newentry);
8369 if (result != ISC_R_SUCCESS)
8371 /* Set cache data in the new entry. */
8372 result = dns_acache_setentry(acache, newentry, zone, db,
8373 version, node, fname);
8374 if (result != ISC_R_SUCCESS)
8377 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8378 NODE_LOCK(nodelock, isc_rwlocktype_write);
8382 case dns_rdatasetadditional_fromauth:
8383 acarray = header->additional_auth;
8385 case dns_rdatasetadditional_fromglue:
8386 acarray = header->additional_glue;
8392 if (acarray == NULL) {
8395 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8396 sizeof(acachectl_t));
8398 if (acarray == NULL) {
8399 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8403 for (i = 0; i < total_count; i++) {
8404 acarray[i].entry = NULL;
8405 acarray[i].cbarg = NULL;
8409 case dns_rdatasetadditional_fromauth:
8410 header->additional_auth = acarray;
8412 case dns_rdatasetadditional_fromglue:
8413 header->additional_glue = acarray;
8419 if (acarray[count].entry != NULL) {
8421 * Swap the entry. Delay cleaning-up the old entry since
8422 * it would require a node lock.
8424 oldentry = acarray[count].entry;
8425 INSIST(acarray[count].cbarg != NULL);
8426 oldcbarg = acarray[count].cbarg;
8428 acarray[count].entry = newentry;
8429 acarray[count].cbarg = newcbarg;
8431 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8433 if (oldentry != NULL) {
8434 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8435 dns_acache_detachentry(&oldentry);
8438 return (ISC_R_SUCCESS);
8441 if (newcbarg != NULL) {
8442 if (newentry != NULL) {
8443 acache_cancelentry(rbtdb->common.mctx, newentry,
8445 dns_acache_detachentry(&newentry);
8447 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8448 dns_db_detach(&newcbarg->db);
8449 isc_mem_put(rbtdb->common.mctx, newcbarg,
8458 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8459 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8461 dns_rbtdb_t *rbtdb = rdataset->private1;
8462 dns_rbtnode_t *rbtnode = rdataset->private2;
8463 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8464 unsigned int current_count = rdataset->privateuint4;
8465 rdatasetheader_t *header;
8466 nodelock_t *nodelock;
8467 unsigned int total_count, count;
8468 acachectl_t *acarray;
8469 dns_acacheentry_t *entry;
8470 acache_cbarg_t *cbarg;
8472 UNUSED(qtype); /* we do not use this value at least for now */
8475 if (type == dns_rdatasetadditional_fromcache)
8476 return (ISC_R_SUCCESS);
8478 header = (struct rdatasetheader *)(raw - sizeof(*header));
8480 total_count = raw[0] * 256 + raw[1];
8481 INSIST(total_count > current_count);
8482 count = total_count - current_count - 1;
8487 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8488 NODE_LOCK(nodelock, isc_rwlocktype_write);
8491 case dns_rdatasetadditional_fromauth:
8492 acarray = header->additional_auth;
8494 case dns_rdatasetadditional_fromglue:
8495 acarray = header->additional_glue;
8501 if (acarray == NULL) {
8502 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8503 return (ISC_R_NOTFOUND);
8506 entry = acarray[count].entry;
8507 if (entry == NULL) {
8508 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8509 return (ISC_R_NOTFOUND);
8512 acarray[count].entry = NULL;
8513 cbarg = acarray[count].cbarg;
8514 acarray[count].cbarg = NULL;
8516 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8518 if (entry != NULL) {
8520 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8521 dns_acache_detachentry(&entry);
8524 return (ISC_R_SUCCESS);
8528 * Routines for LRU-based cache management.
8532 * See if a given cache entry that is being reused needs to be updated
8533 * in the LRU-list. From the LRU management point of view, this function is
8534 * expected to return true for almost all cases. When used with threads,
8535 * however, this may cause a non-negligible performance penalty because a
8536 * writer lock will have to be acquired before updating the list.
8537 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8538 * function returns true if the entry has not been updated for some period of
8539 * time. We differentiate the NS or glue address case and the others since
8540 * experiments have shown that the former tends to be accessed relatively
8541 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8542 * may cause external queries at a higher level zone, involving more
8545 * Caller must hold the node (read or write) lock.
8547 static inline isc_boolean_t
8548 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8549 if ((header->attributes &
8550 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8553 #if DNS_RBTDB_LIMITLRUUPDATE
8554 if (header->type == dns_rdatatype_ns ||
8555 (header->trust == dns_trust_glue &&
8556 (header->type == dns_rdatatype_a ||
8557 header->type == dns_rdatatype_aaaa))) {
8559 * Glue records are updated if at least 60 seconds have passed
8560 * since the previous update time.
8562 return (header->last_used + 60 <= now);
8565 /* Other records are updated if 5 minutes have passed. */
8566 return (header->last_used + 300 <= now);
8575 * Update the timestamp of a given cache entry and move it to the head
8576 * of the corresponding LRU list.
8578 * Caller must hold the node (write) lock.
8580 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8583 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8586 INSIST(IS_CACHE(rbtdb));
8588 /* To be checked: can we really assume this? XXXMLG */
8589 INSIST(ISC_LINK_LINKED(header, link));
8591 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8592 header->last_used = now;
8593 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8597 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8598 * under an overmem condition. To recover from this condition quickly, up to
8599 * 2 entries will be purged. This process is triggered while adding a new
8600 * entry, and we specifically avoid purging entries in the same LRU bucket as
8601 * the one to which the new entry will belong. Otherwise, we might purge
8602 * entries of the same name of different RR types while adding RRsets from a
8603 * single response (consider the case where we're adding A and AAAA glue records
8604 * of the same NS name).
8607 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8608 isc_stdtime_t now, isc_boolean_t tree_locked)
8610 rdatasetheader_t *header, *header_prev;
8611 unsigned int locknum;
8614 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8615 locknum != locknum_start && purgecount > 0;
8616 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8617 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8618 isc_rwlocktype_write);
8620 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8621 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8622 expire_header(rbtdb, header, tree_locked);
8626 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8627 header != NULL && purgecount > 0;
8628 header = header_prev) {
8629 header_prev = ISC_LIST_PREV(header, link);
8631 * Unlink the entry at this point to avoid checking it
8632 * again even if it's currently used someone else and
8633 * cannot be purged at this moment. This entry won't be
8634 * referenced any more (so unlinking is safe) since the
8635 * TTL was reset to 0.
8637 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8639 expire_header(rbtdb, header, tree_locked);
8643 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8644 isc_rwlocktype_write);
8649 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8650 isc_boolean_t tree_locked)
8652 set_ttl(rbtdb, header, 0);
8653 header->attributes |= RDATASET_ATTR_STALE;
8654 header->node->dirty = 1;
8657 * Caller must hold the node (write) lock.
8660 if (dns_rbtnode_refcurrent(header->node) == 0) {
8662 * If no one else is using the node, we can clean it up now.
8663 * We first need to gain a new reference to the node to meet a
8664 * requirement of decrement_reference().
8666 new_reference(rbtdb, header->node);
8667 decrement_reference(rbtdb, header->node, 0,
8668 isc_rwlocktype_write,
8669 tree_locked ? isc_rwlocktype_write :
8670 isc_rwlocktype_none, ISC_FALSE);