2 * Copyright (C) 2004-2011 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.310.8.1.2.1 2011-06-21 20:15:48 each Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
57 #include <dns/rdata.h>
58 #include <dns/rdataset.h>
59 #include <dns/rdatasetiter.h>
60 #include <dns/rdataslab.h>
61 #include <dns/rdatastruct.h>
62 #include <dns/result.h>
63 #include <dns/stats.h>
66 #include <dns/zonekey.h>
68 #ifdef DNS_RBTDB_VERSION64
74 #ifdef DNS_RBTDB_VERSION64
75 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
77 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
81 * Note that "impmagic" is not the first four bytes of the struct, so
82 * ISC_MAGIC_VALID cannot be used.
84 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
85 (rbtdb)->common.impmagic == RBTDB_MAGIC)
87 #ifdef DNS_RBTDB_VERSION64
88 typedef isc_uint64_t rbtdb_serial_t;
90 * Make casting easier in symbolic debuggers by using different names
91 * for the 64 bit version.
93 #define dns_rbtdb_t dns_rbtdb64_t
94 #define rdatasetheader_t rdatasetheader64_t
95 #define rbtdb_version_t rbtdb_version64_t
97 typedef isc_uint32_t rbtdb_serial_t;
100 typedef isc_uint32_t rbtdb_rdatatype_t;
102 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
103 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
104 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
106 #define RBTDB_RDATATYPE_SIGNSEC \
107 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
108 #define RBTDB_RDATATYPE_SIGNSEC3 \
109 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
110 #define RBTDB_RDATATYPE_SIGNS \
111 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
112 #define RBTDB_RDATATYPE_SIGCNAME \
113 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
114 #define RBTDB_RDATATYPE_SIGDNAME \
115 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
116 #define RBTDB_RDATATYPE_NCACHEANY \
117 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
120 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
121 * Using rwlock is effective with regard to lookup performance only when
122 * it is implemented in an efficient way.
123 * Otherwise, it is generally wise to stick to the simple locking since rwlock
124 * would require more memory or can even make lookups slower due to its own
125 * overhead (when it internally calls mutex locks).
127 #ifdef ISC_RWLOCK_USEATOMIC
128 #define DNS_RBTDB_USERWLOCK 1
130 #define DNS_RBTDB_USERWLOCK 0
133 #if DNS_RBTDB_USERWLOCK
134 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
135 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
136 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
137 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
139 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
140 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
141 #define RBTDB_LOCK(l, t) LOCK(l)
142 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
146 * Since node locking is sensitive to both performance and memory footprint,
147 * we need some trick here. If we have both high-performance rwlock and
148 * high performance and small-memory reference counters, we use rwlock for
149 * node lock and isc_refcount for node references. In this case, we don't have
150 * to protect the access to the counters by locks.
151 * Otherwise, we simply use ordinary mutex lock for node locking, and use
152 * simple integers as reference counters which is protected by the lock.
153 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
154 * NODE_UNLOCK. In some other cases, however, we need to protect reference
155 * counters first and then protect other parts of a node as read-only data.
156 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
157 * provided for these special cases. When we can use the efficient backend
158 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
159 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
160 * section including the access to the reference counter.
161 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
162 * section is also protected by NODE_STRONGLOCK().
164 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
165 typedef isc_rwlock_t nodelock_t;
167 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
168 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
169 #define NODE_LOCK(l, t) RWLOCK((l), (t))
170 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
171 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
173 #define NODE_STRONGLOCK(l) ((void)0)
174 #define NODE_STRONGUNLOCK(l) ((void)0)
175 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
176 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
177 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
179 typedef isc_mutex_t nodelock_t;
181 #define NODE_INITLOCK(l) isc_mutex_init(l)
182 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
183 #define NODE_LOCK(l, t) LOCK(l)
184 #define NODE_UNLOCK(l, t) UNLOCK(l)
185 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
187 #define NODE_STRONGLOCK(l) LOCK(l)
188 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
189 #define NODE_WEAKLOCK(l, t) ((void)0)
190 #define NODE_WEAKUNLOCK(l, t) ((void)0)
191 #define NODE_WEAKDOWNGRADE(l) ((void)0)
195 * Whether to rate-limit updating the LRU to avoid possible thread contention.
196 * Our performance measurement has shown the cost is marginal, so it's defined
197 * to be 0 by default either with or without threads.
199 #ifndef DNS_RBTDB_LIMITLRUUPDATE
200 #define DNS_RBTDB_LIMITLRUUPDATE 0
204 * Allow clients with a virtual time of up to 5 minutes in the past to see
205 * records that would have otherwise have expired.
207 #define RBTDB_VIRTUAL 300
213 dns_rdatatype_t type;
216 typedef struct acachectl acachectl_t;
218 typedef struct rdatasetheader {
220 * Locked by the owning node's lock.
222 rbtdb_serial_t serial;
224 rbtdb_rdatatype_t type;
225 isc_uint16_t attributes;
227 struct noqname *noqname;
228 struct noqname *closest;
230 * We don't use the LIST macros, because the LIST structure has
231 * both head and tail pointers, and is doubly linked.
234 struct rdatasetheader *next;
236 * If this is the top header for an rdataset, 'next' points
237 * to the top header for the next rdataset (i.e., the next type).
238 * Otherwise, it points up to the header whose down pointer points
242 struct rdatasetheader *down;
244 * Points to the header for the next older version of
250 * Monotonously increased every time this rdataset is bound so that
251 * it is used as the base of the starting point in DNS responses
252 * when the "cyclic" rrset-order is required. Since the ordering
253 * should not be so crucial, no lock is set for the counter for
254 * performance reasons.
257 acachectl_t *additional_auth;
258 acachectl_t *additional_glue;
261 isc_stdtime_t last_used;
262 ISC_LINK(struct rdatasetheader) link;
264 unsigned int heap_index;
266 * Used for TTL-based cache cleaning.
268 isc_stdtime_t resign;
271 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
272 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
274 #define RDATASET_ATTR_NONEXISTENT 0x0001
275 #define RDATASET_ATTR_STALE 0x0002
276 #define RDATASET_ATTR_IGNORE 0x0004
277 #define RDATASET_ATTR_RETAIN 0x0008
278 #define RDATASET_ATTR_NXDOMAIN 0x0010
279 #define RDATASET_ATTR_RESIGN 0x0020
280 #define RDATASET_ATTR_STATCOUNT 0x0040
281 #define RDATASET_ATTR_OPTOUT 0x0080
282 #define RDATASET_ATTR_NEGATIVE 0x0100
284 typedef struct acache_cbarg {
285 dns_rdatasetadditional_t type;
289 rdatasetheader_t *header;
293 dns_acacheentry_t *entry;
294 acache_cbarg_t *cbarg;
299 * When the cache will pre-expire data (due to memory low or other
300 * situations) before the rdataset's TTL has expired, it MUST
301 * respect the RETAIN bit and not expire the data until its TTL is
305 #undef IGNORE /* WIN32 winbase.h defines this. */
307 #define EXISTS(header) \
308 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
309 #define NONEXISTENT(header) \
310 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
311 #define IGNORE(header) \
312 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
313 #define RETAIN(header) \
314 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
315 #define NXDOMAIN(header) \
316 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
317 #define RESIGN(header) \
318 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
319 #define OPTOUT(header) \
320 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
321 #define NEGATIVE(header) \
322 (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
324 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
327 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
328 * There is a tradeoff issue about configuring this value: if this is too
329 * small, it may cause heavier contention between threads; if this is too large,
330 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
331 * The default value should work well for most environments, but this can
332 * also be configurable at compilation time via the
333 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
334 * 1 due to the assumption of overmem_purge().
336 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
338 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
340 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
343 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
344 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
348 /* Protected in the refcount routines. */
349 isc_refcount_t references;
350 /* Locked by lock. */
351 isc_boolean_t exiting;
354 typedef struct rbtdb_changed {
355 dns_rbtnode_t * node;
357 ISC_LINK(struct rbtdb_changed) link;
360 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
368 typedef struct rbtdb_version {
370 rbtdb_serial_t serial;
372 * Protected in the refcount routines.
373 * XXXJT: should we change the lock policy based on the refcount
376 isc_refcount_t references;
377 /* Locked by database lock. */
378 isc_boolean_t writer;
379 isc_boolean_t commit_ok;
380 rbtdb_changedlist_t changed_list;
381 rdatasetheaderlist_t resigned_list;
382 ISC_LINK(struct rbtdb_version) link;
383 dns_db_secure_t secure;
384 isc_boolean_t havensec3;
385 /* NSEC3 parameters */
388 isc_uint16_t iterations;
389 isc_uint8_t salt_length;
390 unsigned char salt[DNS_NSEC3_SALTSIZE];
393 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
398 /* Locks the data in this struct */
399 #if DNS_RBTDB_USERWLOCK
404 /* Locks the tree structure (prevents nodes appearing/disappearing) */
405 isc_rwlock_t tree_lock;
406 /* Locks for individual tree nodes */
407 unsigned int node_lock_count;
408 rbtdb_nodelock_t * node_locks;
409 dns_rbtnode_t * origin_node;
410 dns_stats_t * rrsetstats; /* cache DB only */
411 /* Locked by lock. */
413 isc_refcount_t references;
414 unsigned int attributes;
415 rbtdb_serial_t current_serial;
416 rbtdb_serial_t least_serial;
417 rbtdb_serial_t next_serial;
418 rbtdb_version_t * current_version;
419 rbtdb_version_t * future_version;
420 rbtdb_versionlist_t open_versions;
422 dns_dbnode_t *soanode;
423 dns_dbnode_t *nsnode;
426 * This is a linked list used to implement the LRU cache. There will
427 * be node_lock_count linked lists here. Nodes in bucket 1 will be
428 * placed on the linked list rdatasets[1].
430 rdatasetheaderlist_t *rdatasets;
433 * Temporary storage for stale cache nodes and dynamically deleted
434 * nodes that await being cleaned up.
436 rbtnodelist_t *deadnodes;
439 * Heaps. Each of these is used for TTL based expiry.
443 /* Locked by tree_lock. */
447 dns_rpz_cidr_t * rpz_cidr;
450 unsigned int quantum;
453 #define RBTDB_ATTR_LOADED 0x01
454 #define RBTDB_ATTR_LOADING 0x02
461 rbtdb_version_t * rbtversion;
462 rbtdb_serial_t serial;
463 unsigned int options;
464 dns_rbtnodechain_t chain;
465 isc_boolean_t copy_name;
466 isc_boolean_t need_cleanup;
468 dns_rbtnode_t * zonecut;
469 rdatasetheader_t * zonecut_rdataset;
470 rdatasetheader_t * zonecut_sigrdataset;
471 dns_fixedname_t zonecut_name;
483 static void rdataset_disassociate(dns_rdataset_t *rdataset);
484 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
485 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
486 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
487 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
488 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
489 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
492 dns_rdataset_t *negsig);
493 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
496 dns_rdataset_t *negsig);
497 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
498 dns_rdatasetadditional_t type,
499 dns_rdatatype_t qtype,
500 dns_acache_t *acache,
503 dns_dbversion_t **versionp,
504 dns_dbnode_t **nodep,
508 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
509 dns_rdatasetadditional_t type,
510 dns_rdatatype_t qtype,
511 dns_acache_t *acache,
514 dns_dbversion_t *version,
517 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
518 dns_rdataset_t *rdataset,
519 dns_rdatasetadditional_t type,
520 dns_rdatatype_t qtype);
521 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
523 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
525 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
526 isc_boolean_t tree_locked);
527 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
528 isc_stdtime_t now, isc_boolean_t tree_locked);
529 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
530 rdatasetheader_t *newheader);
531 static void prune_tree(isc_task_t *task, isc_event_t *event);
532 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
533 static void rdataset_expire(dns_rdataset_t *rdataset);
535 static dns_rdatasetmethods_t rdataset_methods = {
536 rdataset_disassociate,
546 rdataset_getadditional,
547 rdataset_setadditional,
548 rdataset_putadditional,
553 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
554 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
555 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
556 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
557 dns_rdataset_t *rdataset);
559 static dns_rdatasetitermethods_t rdatasetiter_methods = {
560 rdatasetiter_destroy,
566 typedef struct rbtdb_rdatasetiter {
567 dns_rdatasetiter_t common;
568 rdatasetheader_t * current;
569 } rbtdb_rdatasetiter_t;
571 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
572 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
573 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
574 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
576 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
577 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
578 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
579 dns_dbnode_t **nodep,
581 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
582 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
585 static dns_dbiteratormethods_t dbiterator_methods = {
597 #define DELETION_BATCH_MAX 64
600 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
602 typedef struct rbtdb_dbiterator {
603 dns_dbiterator_t common;
604 isc_boolean_t paused;
605 isc_boolean_t new_origin;
606 isc_rwlocktype_t tree_locked;
608 dns_fixedname_t name;
609 dns_fixedname_t origin;
610 dns_rbtnodechain_t chain;
611 dns_rbtnodechain_t nsec3chain;
612 dns_rbtnodechain_t *current;
614 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
616 isc_boolean_t nsec3only;
617 isc_boolean_t nonsec3;
618 } rbtdb_dbiterator_t;
621 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
622 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
624 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
626 static void overmem(dns_db_t *db, isc_boolean_t overmem);
628 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
632 * 'init_count' is used to initialize 'newheader->count' which inturn
633 * is used to determine where in the cycle rrset-order cyclic starts.
634 * We don't lock this as we don't care about simultaneous updates.
637 * Both init_count and header->count can be ISC_UINT32_MAX.
638 * The count on the returned rdataset however can't be as
639 * that indicates that the database does not implement cyclic
642 static unsigned int init_count;
647 * If a routine is going to lock more than one lock in this module, then
648 * the locking must be done in the following order:
652 * Node Lock (Only one from the set may be locked at one time by
657 * Failure to follow this hierarchy can result in deadlock.
663 * For zone databases the node for the origin of the zone MUST NOT be deleted.
672 attach(dns_db_t *source, dns_db_t **targetp) {
673 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
675 REQUIRE(VALID_RBTDB(rbtdb));
677 isc_refcount_increment(&rbtdb->references, NULL);
683 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
684 dns_rbtdb_t *rbtdb = event->ev_arg;
688 free_rbtdb(rbtdb, ISC_TRUE, event);
692 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
693 isc_boolean_t increment)
695 dns_rdatastatstype_t statattributes = 0;
696 dns_rdatastatstype_t base = 0;
697 dns_rdatastatstype_t type;
699 /* At the moment we count statistics only for cache DB */
700 INSIST(IS_CACHE(rbtdb));
702 if (NEGATIVE(header)) {
703 if (NXDOMAIN(header))
704 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
706 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
707 base = RBTDB_RDATATYPE_EXT(header->type);
710 base = RBTDB_RDATATYPE_BASE(header->type);
712 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
714 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
716 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
720 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
725 oldttl = header->rdh_ttl;
726 header->rdh_ttl = newttl;
728 if (!IS_CACHE(rbtdb))
732 * It's possible the rbtdb is not a cache. If this is the case,
733 * we will not have a heap, and we move on. If we do, though,
734 * we might need to adjust things.
736 if (header->heap_index == 0 || newttl == oldttl)
738 idx = header->node->locknum;
739 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
741 heap = rbtdb->heaps[idx];
744 isc_heap_increased(heap, header->heap_index);
746 isc_heap_decreased(heap, header->heap_index);
750 * These functions allow the heap code to rank the priority of each
751 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
754 ttl_sooner(void *v1, void *v2) {
755 rdatasetheader_t *h1 = v1;
756 rdatasetheader_t *h2 = v2;
758 if (h1->rdh_ttl < h2->rdh_ttl)
764 resign_sooner(void *v1, void *v2) {
765 rdatasetheader_t *h1 = v1;
766 rdatasetheader_t *h2 = v2;
768 if (h1->resign < h2->resign)
774 * This function sets the heap index into the header.
777 set_index(void *what, unsigned int index) {
778 rdatasetheader_t *h = what;
780 h->heap_index = index;
784 * Work out how many nodes can be deleted in the time between two
785 * requests to the nameserver. Smooth the resulting number and use it
786 * as a estimate for the number of nodes to be deleted in the next
790 adjust_quantum(unsigned int old, isc_time_t *start) {
791 unsigned int pps = dns_pps; /* packets per second */
792 unsigned int interval;
801 interval = 1000000 / pps; /* interval in usec */
804 usecs = isc_time_microdiff(&end, start);
807 * We were unable to measure the amount of time taken.
808 * Double the nodes deleted next time.
815 new = old * interval;
816 new /= (unsigned int)usecs;
823 new = (new + old * 3) / 4;
825 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
826 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
832 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
834 isc_ondestroy_t ondest;
836 char buf[DNS_NAME_FORMATSIZE];
840 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
841 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
843 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
844 REQUIRE(rbtdb->future_version == NULL);
846 if (rbtdb->current_version != NULL) {
849 isc_refcount_decrement(&rbtdb->current_version->references,
852 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
853 isc_refcount_destroy(&rbtdb->current_version->references);
854 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
855 sizeof(rbtdb_version_t));
859 * We assume the number of remaining dead nodes is reasonably small;
860 * the overhead of unlinking all nodes here should be negligible.
862 for (i = 0; i < rbtdb->node_lock_count; i++) {
865 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
866 while (node != NULL) {
867 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
868 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
873 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
877 * pick the next tree to (start to) destroy
879 treep = &rbtdb->tree;
880 if (*treep == NULL) {
881 treep = &rbtdb->nsec;
882 if (*treep == NULL) {
883 treep = &rbtdb->nsec3;
885 * we're finished after clear cutting
892 isc_time_now(&start);
893 result = dns_rbt_destroy2(treep, rbtdb->quantum);
894 if (result == ISC_R_QUOTA) {
895 INSIST(rbtdb->task != NULL);
896 if (rbtdb->quantum != 0)
897 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
900 event = isc_event_allocate(rbtdb->common.mctx,
902 DNS_EVENT_FREESTORAGE,
905 sizeof(isc_event_t));
908 isc_task_send(rbtdb->task, &event);
911 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
915 isc_event_free(&event);
917 if (dns_name_dynamic(&rbtdb->common.origin))
918 dns_name_format(&rbtdb->common.origin, buf,
921 strcpy(buf, "<UNKNOWN>");
922 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
923 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
924 "done free_rbtdb(%s)", buf);
926 if (dns_name_dynamic(&rbtdb->common.origin))
927 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
928 for (i = 0; i < rbtdb->node_lock_count; i++) {
929 isc_refcount_destroy(&rbtdb->node_locks[i].references);
930 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
934 * Clean up LRU / re-signing order lists.
936 if (rbtdb->rdatasets != NULL) {
937 for (i = 0; i < rbtdb->node_lock_count; i++)
938 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
939 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
940 rbtdb->node_lock_count *
941 sizeof(rdatasetheaderlist_t));
944 * Clean up dead node buckets.
946 if (rbtdb->deadnodes != NULL) {
947 for (i = 0; i < rbtdb->node_lock_count; i++)
948 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
949 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
950 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
953 * Clean up heap objects.
955 if (rbtdb->heaps != NULL) {
956 for (i = 0; i < rbtdb->node_lock_count; i++)
957 isc_heap_destroy(&rbtdb->heaps[i]);
958 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
959 rbtdb->node_lock_count *
960 sizeof(isc_heap_t *));
963 if (rbtdb->rrsetstats != NULL)
964 dns_stats_detach(&rbtdb->rrsetstats);
967 if (rbtdb->rpz_cidr != NULL)
968 dns_rpz_cidr_free(&rbtdb->rpz_cidr);
971 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
972 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
973 isc_rwlock_destroy(&rbtdb->tree_lock);
974 isc_refcount_destroy(&rbtdb->references);
975 if (rbtdb->task != NULL)
976 isc_task_detach(&rbtdb->task);
978 RBTDB_DESTROYLOCK(&rbtdb->lock);
979 rbtdb->common.magic = 0;
980 rbtdb->common.impmagic = 0;
981 ondest = rbtdb->common.ondest;
982 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
983 isc_ondestroy_notify(&ondest, rbtdb);
987 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
988 isc_boolean_t want_free = ISC_FALSE;
990 unsigned int inactive = 0;
992 /* XXX check for open versions here */
994 if (rbtdb->soanode != NULL)
995 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
996 if (rbtdb->nsnode != NULL)
997 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1000 * Even though there are no external direct references, there still
1001 * may be nodes in use.
1003 for (i = 0; i < rbtdb->node_lock_count; i++) {
1004 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1005 rbtdb->node_locks[i].exiting = ISC_TRUE;
1006 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1007 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1013 if (inactive != 0) {
1014 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1015 rbtdb->active -= inactive;
1016 if (rbtdb->active == 0)
1017 want_free = ISC_TRUE;
1018 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1020 char buf[DNS_NAME_FORMATSIZE];
1021 if (dns_name_dynamic(&rbtdb->common.origin))
1022 dns_name_format(&rbtdb->common.origin, buf,
1025 strcpy(buf, "<UNKNOWN>");
1026 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1027 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1028 "calling free_rbtdb(%s)", buf);
1029 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1035 detach(dns_db_t **dbp) {
1036 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1039 REQUIRE(VALID_RBTDB(rbtdb));
1041 isc_refcount_decrement(&rbtdb->references, &refs);
1044 maybe_free_rbtdb(rbtdb);
1050 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1051 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1052 rbtdb_version_t *version;
1055 REQUIRE(VALID_RBTDB(rbtdb));
1057 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1058 version = rbtdb->current_version;
1059 isc_refcount_increment(&version->references, &refs);
1060 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1062 *versionp = (dns_dbversion_t *)version;
1065 static inline rbtdb_version_t *
1066 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1067 unsigned int references, isc_boolean_t writer)
1069 isc_result_t result;
1070 rbtdb_version_t *version;
1072 version = isc_mem_get(mctx, sizeof(*version));
1073 if (version == NULL)
1075 version->serial = serial;
1076 result = isc_refcount_init(&version->references, references);
1077 if (result != ISC_R_SUCCESS) {
1078 isc_mem_put(mctx, version, sizeof(*version));
1081 version->writer = writer;
1082 version->commit_ok = ISC_FALSE;
1083 ISC_LIST_INIT(version->changed_list);
1084 ISC_LIST_INIT(version->resigned_list);
1085 ISC_LINK_INIT(version, link);
1091 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1092 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1093 rbtdb_version_t *version;
1095 REQUIRE(VALID_RBTDB(rbtdb));
1096 REQUIRE(versionp != NULL && *versionp == NULL);
1097 REQUIRE(rbtdb->future_version == NULL);
1099 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1100 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1101 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1103 if (version != NULL) {
1104 version->commit_ok = ISC_TRUE;
1105 version->secure = rbtdb->current_version->secure;
1106 version->havensec3 = rbtdb->current_version->havensec3;
1107 if (version->havensec3) {
1108 version->flags = rbtdb->current_version->flags;
1109 version->iterations =
1110 rbtdb->current_version->iterations;
1111 version->hash = rbtdb->current_version->hash;
1112 version->salt_length =
1113 rbtdb->current_version->salt_length;
1114 memcpy(version->salt, rbtdb->current_version->salt,
1115 version->salt_length);
1118 version->iterations = 0;
1120 version->salt_length = 0;
1121 memset(version->salt, 0, sizeof(version->salt));
1123 rbtdb->next_serial++;
1124 rbtdb->future_version = version;
1126 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1128 if (version == NULL)
1129 return (ISC_R_NOMEMORY);
1131 *versionp = version;
1133 return (ISC_R_SUCCESS);
1137 attachversion(dns_db_t *db, dns_dbversion_t *source,
1138 dns_dbversion_t **targetp)
1140 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1141 rbtdb_version_t *rbtversion = source;
1144 REQUIRE(VALID_RBTDB(rbtdb));
1146 isc_refcount_increment(&rbtversion->references, &refs);
1149 *targetp = rbtversion;
1152 static rbtdb_changed_t *
1153 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1154 dns_rbtnode_t *node)
1156 rbtdb_changed_t *changed;
1160 * Caller must be holding the node lock if its reference must be
1161 * protected by the lock.
1164 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1166 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1168 REQUIRE(version->writer);
1170 if (changed != NULL) {
1171 dns_rbtnode_refincrement(node, &refs);
1173 changed->node = node;
1174 changed->dirty = ISC_FALSE;
1175 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1177 version->commit_ok = ISC_FALSE;
1179 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1185 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1190 unsigned char *raw; /* RDATASLAB */
1193 * The caller must be holding the corresponding node lock.
1199 raw = (unsigned char *)header + sizeof(*header);
1200 count = raw[0] * 256 + raw[1];
1203 * Sanity check: since an additional cache entry has a reference to
1204 * the original DB node (in the callback arg), there should be no
1205 * acache entries when the node can be freed.
1207 for (i = 0; i < count; i++)
1208 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1210 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1214 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1216 if (dns_name_dynamic(&(*noqname)->name))
1217 dns_name_free(&(*noqname)->name, mctx);
1218 if ((*noqname)->neg != NULL)
1219 isc_mem_put(mctx, (*noqname)->neg,
1220 dns_rdataslab_size((*noqname)->neg, 0));
1221 if ((*noqname)->negsig != NULL)
1222 isc_mem_put(mctx, (*noqname)->negsig,
1223 dns_rdataslab_size((*noqname)->negsig, 0));
1224 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1229 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1231 ISC_LINK_INIT(h, link);
1235 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1236 fprintf(stderr, "initialized header: %p\n", h);
1242 static inline rdatasetheader_t *
1243 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1245 rdatasetheader_t *h;
1247 h = isc_mem_get(mctx, sizeof(*h));
1252 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1253 fprintf(stderr, "allocated header: %p\n", h);
1255 init_rdataset(rbtdb, h);
1260 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1265 if (EXISTS(rdataset) &&
1266 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1267 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1270 idx = rdataset->node->locknum;
1271 if (ISC_LINK_LINKED(rdataset, link)) {
1272 INSIST(IS_CACHE(rbtdb));
1273 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1275 if (rdataset->heap_index != 0)
1276 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1277 rdataset->heap_index = 0;
1279 if (rdataset->noqname != NULL)
1280 free_noqname(mctx, &rdataset->noqname);
1281 if (rdataset->closest != NULL)
1282 free_noqname(mctx, &rdataset->closest);
1284 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1285 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1287 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1288 size = sizeof(*rdataset);
1290 size = dns_rdataslab_size((unsigned char *)rdataset,
1292 isc_mem_put(mctx, rdataset, size);
1296 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1297 rdatasetheader_t *header, *dcurrent;
1298 isc_boolean_t make_dirty = ISC_FALSE;
1301 * Caller must hold the node lock.
1305 * We set the IGNORE attribute on rdatasets with serial number
1306 * 'serial'. When the reference count goes to zero, these rdatasets
1307 * will be cleaned up; until that time, they will be ignored.
1309 for (header = node->data; header != NULL; header = header->next) {
1310 if (header->serial == serial) {
1311 header->attributes |= RDATASET_ATTR_IGNORE;
1312 make_dirty = ISC_TRUE;
1314 for (dcurrent = header->down;
1316 dcurrent = dcurrent->down) {
1317 if (dcurrent->serial == serial) {
1318 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1319 make_dirty = ISC_TRUE;
1328 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1330 rdatasetheader_t *d, *down_next;
1332 for (d = top->down; d != NULL; d = down_next) {
1333 down_next = d->down;
1334 free_rdataset(rbtdb, mctx, d);
1340 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1341 rdatasetheader_t *current, *top_prev, *top_next;
1342 isc_mem_t *mctx = rbtdb->common.mctx;
1345 * Caller must be holding the node lock.
1349 for (current = node->data; current != NULL; current = top_next) {
1350 top_next = current->next;
1351 clean_stale_headers(rbtdb, mctx, current);
1353 * If current is nonexistent or stale, we can clean it up.
1355 if ((current->attributes &
1356 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1357 if (top_prev != NULL)
1358 top_prev->next = current->next;
1360 node->data = current->next;
1361 free_rdataset(rbtdb, mctx, current);
1369 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1370 rbtdb_serial_t least_serial)
1372 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1373 rdatasetheader_t *top_prev, *top_next;
1374 isc_mem_t *mctx = rbtdb->common.mctx;
1375 isc_boolean_t still_dirty = ISC_FALSE;
1378 * Caller must be holding the node lock.
1380 REQUIRE(least_serial != 0);
1383 for (current = node->data; current != NULL; current = top_next) {
1384 top_next = current->next;
1387 * First, we clean up any instances of multiple rdatasets
1388 * with the same serial number, or that have the IGNORE
1392 for (dcurrent = current->down;
1394 dcurrent = down_next) {
1395 down_next = dcurrent->down;
1396 INSIST(dcurrent->serial <= dparent->serial);
1397 if (dcurrent->serial == dparent->serial ||
1399 if (down_next != NULL)
1400 down_next->next = dparent;
1401 dparent->down = down_next;
1402 free_rdataset(rbtdb, mctx, dcurrent);
1408 * We've now eliminated all IGNORE datasets with the possible
1409 * exception of current, which we now check.
1411 if (IGNORE(current)) {
1412 down_next = current->down;
1413 if (down_next == NULL) {
1414 if (top_prev != NULL)
1415 top_prev->next = current->next;
1417 node->data = current->next;
1418 free_rdataset(rbtdb, mctx, current);
1420 * current no longer exists, so we can
1421 * just continue with the loop.
1426 * Pull up current->down, making it the new
1429 if (top_prev != NULL)
1430 top_prev->next = down_next;
1432 node->data = down_next;
1433 down_next->next = top_next;
1434 free_rdataset(rbtdb, mctx, current);
1435 current = down_next;
1440 * We now try to find the first down node less than the
1444 for (dcurrent = current->down;
1446 dcurrent = down_next) {
1447 down_next = dcurrent->down;
1448 if (dcurrent->serial < least_serial)
1454 * If there is a such an rdataset, delete it and any older
1457 if (dcurrent != NULL) {
1459 down_next = dcurrent->down;
1460 INSIST(dcurrent->serial <= least_serial);
1461 free_rdataset(rbtdb, mctx, dcurrent);
1462 dcurrent = down_next;
1463 } while (dcurrent != NULL);
1464 dparent->down = NULL;
1468 * Note. The serial number of 'current' might be less than
1469 * least_serial too, but we cannot delete it because it is
1470 * the most recent version, unless it is a NONEXISTENT
1473 if (current->down != NULL) {
1474 still_dirty = ISC_TRUE;
1478 * If this is a NONEXISTENT rdataset, we can delete it.
1480 if (NONEXISTENT(current)) {
1481 if (top_prev != NULL)
1482 top_prev->next = current->next;
1484 node->data = current->next;
1485 free_rdataset(rbtdb, mctx, current);
1495 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1497 dns_rbtnode_t *nsecnode;
1498 dns_fixedname_t fname;
1500 isc_result_t result = ISC_R_UNEXPECTED;
1502 INSIST(!ISC_LINK_LINKED(node, deadlink));
1504 switch (node->nsec) {
1505 case DNS_RBT_NSEC_NORMAL:
1507 if (rbtdb->rpz_cidr != NULL) {
1508 dns_fixedname_init(&fname);
1509 name = dns_fixedname_name(&fname);
1510 dns_rbt_fullnamefromnode(node, name);
1511 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1514 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1516 case DNS_RBT_NSEC_HAS_NSEC:
1517 dns_fixedname_init(&fname);
1518 name = dns_fixedname_name(&fname);
1519 dns_rbt_fullnamefromnode(node, name);
1521 * Delete the corresponding node from the auxiliary NSEC
1522 * tree before deleting from the main tree.
1525 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1526 NULL, DNS_RBTFIND_EMPTYDATA,
1528 if (result != ISC_R_SUCCESS) {
1529 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1530 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1532 "dns_rbt_findnode(nsec): %s",
1533 isc_result_totext(result));
1535 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1537 if (result != ISC_R_SUCCESS) {
1538 isc_log_write(dns_lctx,
1539 DNS_LOGCATEGORY_DATABASE,
1540 DNS_LOGMODULE_CACHE,
1542 "delete_nsecnode(): "
1543 "dns_rbt_deletenode(nsecnode): %s",
1544 isc_result_totext(result));
1547 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1549 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1552 case DNS_RBT_NSEC_NSEC:
1553 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1555 case DNS_RBT_NSEC_NSEC3:
1556 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1559 if (result != ISC_R_SUCCESS) {
1560 isc_log_write(dns_lctx,
1561 DNS_LOGCATEGORY_DATABASE,
1562 DNS_LOGMODULE_CACHE,
1564 "delete_nsecnode(): "
1565 "dns_rbt_deletenode: %s",
1566 isc_result_totext(result));
1571 * Clean up dead nodes. These are nodes which have no references, and
1572 * have no data. They are dead but we could not or chose not to delete
1573 * them when we deleted all the data at that node because we did not want
1574 * to wait for the tree write lock.
1576 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1579 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1580 dns_rbtnode_t *node;
1581 int count = 10; /* XXXJT: should be adjustable */
1583 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1584 while (node != NULL && count > 0) {
1585 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1588 * Since we're holding a tree write lock, it should be
1589 * impossible for this node to be referenced by others.
1591 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1592 node->data == NULL);
1594 delete_node(rbtdb, node);
1596 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1602 * Caller must be holding the node lock if its reference must be protected
1606 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1607 unsigned int lockrefs, noderefs;
1608 isc_refcount_t *lockref;
1610 dns_rbtnode_refincrement0(node, &noderefs);
1611 if (noderefs == 1) { /* this is the first reference to the node */
1612 lockref = &rbtdb->node_locks[node->locknum].references;
1613 isc_refcount_increment0(lockref, &lockrefs);
1614 INSIST(lockrefs != 0);
1616 INSIST(noderefs != 0);
1620 * This function is assumed to be called when a node is newly referenced
1621 * and can be in the deadnode list. In that case the node must be retrieved
1622 * from the list because it is going to be used. In addition, if the caller
1623 * happens to hold a write lock on the tree, it's a good chance to purge dead
1625 * Note: while a new reference is gained in multiple places, there are only very
1626 * few cases where the node can be in the deadnode list (only empty nodes can
1627 * have been added to the list).
1630 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1631 isc_rwlocktype_t treelocktype)
1633 isc_boolean_t need_relock = ISC_FALSE;
1635 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1636 new_reference(rbtdb, node);
1638 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1639 isc_rwlocktype_read);
1640 if (ISC_LINK_LINKED(node, deadlink))
1641 need_relock = ISC_TRUE;
1642 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1643 treelocktype == isc_rwlocktype_write)
1644 need_relock = ISC_TRUE;
1645 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1646 isc_rwlocktype_read);
1648 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1649 isc_rwlocktype_write);
1650 if (ISC_LINK_LINKED(node, deadlink))
1651 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1653 if (treelocktype == isc_rwlocktype_write)
1654 cleanup_dead_nodes(rbtdb, node->locknum);
1655 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1656 isc_rwlocktype_write);
1659 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1663 * Caller must be holding the node lock; either the "strong", read or write
1664 * lock. Note that the lock must be held even when node references are
1665 * atomically modified; in that case the decrement operation itself does not
1666 * have to be protected, but we must avoid a race condition where multiple
1667 * threads are decreasing the reference to zero simultaneously and at least
1668 * one of them is going to free the node.
1669 * This function returns ISC_TRUE if and only if the node reference decreases
1672 static isc_boolean_t
1673 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1674 rbtdb_serial_t least_serial,
1675 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1676 isc_boolean_t pruning)
1678 isc_result_t result;
1679 isc_boolean_t write_locked;
1680 rbtdb_nodelock_t *nodelock;
1681 unsigned int refs, nrefs;
1682 int bucket = node->locknum;
1683 isc_boolean_t no_reference;
1685 nodelock = &rbtdb->node_locks[bucket];
1687 /* Handle easy and typical case first. */
1688 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1689 dns_rbtnode_refdecrement(node, &nrefs);
1690 INSIST((int)nrefs >= 0);
1692 isc_refcount_decrement(&nodelock->references, &refs);
1693 INSIST((int)refs >= 0);
1695 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1698 /* Upgrade the lock? */
1699 if (nlock == isc_rwlocktype_read) {
1700 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1701 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1703 dns_rbtnode_refdecrement(node, &nrefs);
1704 INSIST((int)nrefs >= 0);
1706 /* Restore the lock? */
1707 if (nlock == isc_rwlocktype_read)
1708 NODE_WEAKDOWNGRADE(&nodelock->lock);
1712 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1713 if (IS_CACHE(rbtdb))
1714 clean_cache_node(rbtdb, node);
1716 if (least_serial == 0) {
1718 * Caller doesn't know the least serial.
1721 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1722 least_serial = rbtdb->least_serial;
1723 RBTDB_UNLOCK(&rbtdb->lock,
1724 isc_rwlocktype_read);
1726 clean_zone_node(rbtdb, node, least_serial);
1730 isc_refcount_decrement(&nodelock->references, &refs);
1731 INSIST((int)refs >= 0);
1734 * XXXDCL should this only be done for cache zones?
1736 if (node->data != NULL || node->down != NULL) {
1737 /* Restore the lock? */
1738 if (nlock == isc_rwlocktype_read)
1739 NODE_WEAKDOWNGRADE(&nodelock->lock);
1744 * Attempt to switch to a write lock on the tree. If this fails,
1745 * we will add this node to a linked list of nodes in this locking
1746 * bucket which we will free later.
1748 if (tlock != isc_rwlocktype_write) {
1750 * Locking hierarchy notwithstanding, we don't need to free
1751 * the node lock before acquiring the tree write lock because
1752 * we only do a trylock.
1754 if (tlock == isc_rwlocktype_read)
1755 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1757 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1758 isc_rwlocktype_write);
1759 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1760 result == ISC_R_LOCKBUSY);
1762 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1764 write_locked = ISC_TRUE;
1766 no_reference = ISC_TRUE;
1767 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1769 * We can now delete the node if the reference counter is
1770 * zero. This should be typically the case, but a different
1771 * thread may still gain a (new) reference just before the
1772 * current thread locks the tree (e.g., in findnode()).
1776 * If this node is the only one in the level it's in, deleting
1777 * this node may recursively make its parent the only node in
1778 * the parent level; if so, and if no one is currently using
1779 * the parent node, this is almost the only opportunity to
1780 * clean it up. But the recursive cleanup is not that trivial
1781 * since the child and parent may be in different lock buckets,
1782 * which would cause a lock order reversal problem. To avoid
1783 * the trouble, we'll dispatch a separate event for batch
1784 * cleaning. We need to check whether we're deleting the node
1785 * as a result of pruning to avoid infinite dispatching.
1786 * Note: pruning happens only when a task has been set for the
1787 * rbtdb. If the user of the rbtdb chooses not to set a task,
1788 * it's their responsibility to purge stale leaves (e.g. by
1789 * periodic walk-through).
1791 if (!pruning && node->parent != NULL &&
1792 node->parent->down == node && node->left == NULL &&
1793 node->right == NULL && rbtdb->task != NULL) {
1797 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1800 sizeof(isc_event_t));
1802 new_reference(rbtdb, node);
1804 attach((dns_db_t *)rbtdb, &db);
1806 isc_task_send(rbtdb->task, &ev);
1807 no_reference = ISC_FALSE;
1810 * XXX: this is a weird situation. We could
1811 * ignore this error case, but then the stale
1812 * node will unlikely be purged except via a
1813 * rare condition such as manual cleanup. So
1814 * we queue it in the deadnodes list, hoping
1815 * the memory shortage is temporary and the node
1816 * will be deleted later.
1818 isc_log_write(dns_lctx,
1819 DNS_LOGCATEGORY_DATABASE,
1820 DNS_LOGMODULE_CACHE,
1822 "decrement_reference: failed to "
1823 "allocate pruning event");
1824 INSIST(!ISC_LINK_LINKED(node, deadlink));
1825 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1829 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1830 char printname[DNS_NAME_FORMATSIZE];
1832 isc_log_write(dns_lctx,
1833 DNS_LOGCATEGORY_DATABASE,
1834 DNS_LOGMODULE_CACHE,
1836 "decrement_reference: "
1837 "delete from rbt: %p %s",
1839 dns_rbt_formatnodename(node,
1841 sizeof(printname)));
1844 delete_node(rbtdb, node);
1846 } else if (dns_rbtnode_refcurrent(node) == 0) {
1847 INSIST(!ISC_LINK_LINKED(node, deadlink));
1848 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1850 no_reference = ISC_FALSE;
1852 /* Restore the lock? */
1853 if (nlock == isc_rwlocktype_read)
1854 NODE_WEAKDOWNGRADE(&nodelock->lock);
1857 * Relock a read lock, or unlock the write lock if no lock was held.
1859 if (tlock == isc_rwlocktype_none)
1861 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1863 if (tlock == isc_rwlocktype_read)
1865 isc_rwlock_downgrade(&rbtdb->tree_lock);
1867 return (no_reference);
1871 * Prune the tree by recursively cleaning-up single leaves. In the worst
1872 * case, the number of iteration is the number of tree levels, which is at
1873 * most the maximum number of domain name labels, i.e, 127. In practice, this
1874 * should be much smaller (only a few times), and even the worst case would be
1875 * acceptable for a single event.
1878 prune_tree(isc_task_t *task, isc_event_t *event) {
1879 dns_rbtdb_t *rbtdb = event->ev_sender;
1880 dns_rbtnode_t *node = event->ev_arg;
1881 dns_rbtnode_t *parent;
1882 unsigned int locknum;
1886 isc_event_free(&event);
1888 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1889 locknum = node->locknum;
1890 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1892 parent = node->parent;
1893 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1894 isc_rwlocktype_write, ISC_TRUE);
1896 if (parent != NULL && parent->down == NULL) {
1898 * node was the only down child of the parent and has
1899 * just been removed. We'll then need to examine the
1900 * parent. Keep the lock if possible; otherwise,
1901 * release the old lock and acquire one for the parent.
1903 if (parent->locknum != locknum) {
1904 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1905 isc_rwlocktype_write);
1906 locknum = parent->locknum;
1907 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1908 isc_rwlocktype_write);
1912 * We need to gain a reference to the node before
1913 * decrementing it in the next iteration. In addition,
1914 * if the node is in the dead-nodes list, extract it
1915 * from the list beforehand as we do in
1916 * reactivate_node().
1918 new_reference(rbtdb, parent);
1919 if (ISC_LINK_LINKED(parent, deadlink)) {
1920 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1927 } while (node != NULL);
1928 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1929 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1931 detach((dns_db_t **)&rbtdb);
1935 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1936 rbtdb_changedlist_t *cleanup_list)
1939 * Caller must be holding the database lock.
1942 rbtdb->least_serial = version->serial;
1943 *cleanup_list = version->changed_list;
1944 ISC_LIST_INIT(version->changed_list);
1948 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1949 rbtdb_changed_t *changed, *next_changed;
1952 * If the changed record is dirty, then
1953 * an update created multiple versions of
1954 * a given rdataset. We keep this list
1955 * until we're the least open version, at
1956 * which point it's safe to get rid of any
1959 * If the changed record isn't dirty, then
1960 * we don't need it anymore since we're
1961 * committing and not rolling back.
1963 * The caller must be holding the database lock.
1965 for (changed = HEAD(version->changed_list);
1967 changed = next_changed) {
1968 next_changed = NEXT(changed, link);
1969 if (!changed->dirty) {
1970 UNLINK(version->changed_list,
1972 APPEND(*cleanup_list,
1979 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1987 dns_rdataset_t keyset;
1988 dns_rdataset_t nsecset, signsecset;
1989 isc_boolean_t haszonekey = ISC_FALSE;
1990 isc_boolean_t hasnsec = ISC_FALSE;
1991 isc_result_t result;
1993 dns_rdataset_init(&keyset);
1994 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1995 0, 0, &keyset, NULL);
1996 if (result == ISC_R_SUCCESS) {
1997 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1998 result = dns_rdataset_first(&keyset);
1999 while (result == ISC_R_SUCCESS) {
2000 dns_rdataset_current(&keyset, &keyrdata);
2001 if (dns_zonekey_iszonekey(&keyrdata)) {
2002 haszonekey = ISC_TRUE;
2005 result = dns_rdataset_next(&keyset);
2007 dns_rdataset_disassociate(&keyset);
2010 version->secure = dns_db_insecure;
2011 version->havensec3 = ISC_FALSE;
2015 dns_rdataset_init(&nsecset);
2016 dns_rdataset_init(&signsecset);
2017 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2018 0, 0, &nsecset, &signsecset);
2019 if (result == ISC_R_SUCCESS) {
2020 if (dns_rdataset_isassociated(&signsecset)) {
2022 dns_rdataset_disassociate(&signsecset);
2024 dns_rdataset_disassociate(&nsecset);
2027 setnsec3parameters(db, version);
2030 * Do we have a valid NSEC/NSEC3 chain?
2032 if (version->havensec3 || hasnsec)
2033 version->secure = dns_db_secure;
2035 version->secure = dns_db_insecure;
2040 * Walk the origin node looking for NSEC3PARAM records.
2041 * Cache the nsec3 parameters.
2045 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2046 dns_rbtnode_t *node;
2047 dns_rdata_nsec3param_t nsec3param;
2048 dns_rdata_t rdata = DNS_RDATA_INIT;
2049 isc_region_t region;
2050 isc_result_t result;
2051 rdatasetheader_t *header, *header_next;
2052 unsigned char *raw; /* RDATASLAB */
2053 unsigned int count, length;
2054 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2056 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2057 version->havensec3 = ISC_FALSE;
2058 node = rbtdb->origin_node;
2059 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2060 isc_rwlocktype_read);
2061 for (header = node->data;
2063 header = header_next) {
2064 header_next = header->next;
2066 if (header->serial <= version->serial &&
2068 if (NONEXISTENT(header))
2072 header = header->down;
2073 } while (header != NULL);
2075 if (header != NULL &&
2076 (header->type == dns_rdatatype_nsec3param)) {
2078 * Find A NSEC3PARAM with a supported algorithm.
2080 raw = (unsigned char *)header + sizeof(*header);
2081 count = raw[0] * 256 + raw[1]; /* count */
2082 #if DNS_RDATASET_FIXED
2083 raw += count * 4 + 2;
2087 while (count-- > 0U) {
2088 length = raw[0] * 256 + raw[1];
2089 #if DNS_RDATASET_FIXED
2095 region.length = length;
2097 dns_rdata_fromregion(&rdata,
2098 rbtdb->common.rdclass,
2099 dns_rdatatype_nsec3param,
2101 result = dns_rdata_tostruct(&rdata,
2104 INSIST(result == ISC_R_SUCCESS);
2105 dns_rdata_reset(&rdata);
2107 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2108 !dns_nsec3_supportedhash(nsec3param.hash))
2111 if (nsec3param.flags != 0)
2114 memcpy(version->salt, nsec3param.salt,
2115 nsec3param.salt_length);
2116 version->hash = nsec3param.hash;
2117 version->salt_length = nsec3param.salt_length;
2118 version->iterations = nsec3param.iterations;
2119 version->flags = nsec3param.flags;
2120 version->havensec3 = ISC_TRUE;
2122 * Look for a better algorithm than the
2123 * unknown test algorithm.
2125 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2131 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2132 isc_rwlocktype_read);
2133 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2138 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2139 dns_rbtdb_t *rbtdb = event->ev_arg;
2140 isc_boolean_t again = ISC_FALSE;
2141 unsigned int locknum;
2144 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2145 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2146 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2147 isc_rwlocktype_write);
2148 cleanup_dead_nodes(rbtdb, locknum);
2149 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2151 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2152 isc_rwlocktype_write);
2154 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2156 isc_task_send(task, &event);
2158 isc_event_free(&event);
2159 isc_refcount_decrement(&rbtdb->references, &refs);
2161 maybe_free_rbtdb(rbtdb);
2166 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2167 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2168 rbtdb_version_t *version, *cleanup_version, *least_greater;
2169 isc_boolean_t rollback = ISC_FALSE;
2170 rbtdb_changedlist_t cleanup_list;
2171 rdatasetheaderlist_t resigned_list;
2172 rbtdb_changed_t *changed, *next_changed;
2173 rbtdb_serial_t serial, least_serial;
2174 dns_rbtnode_t *rbtnode;
2176 rdatasetheader_t *header;
2177 isc_boolean_t writer;
2179 REQUIRE(VALID_RBTDB(rbtdb));
2180 version = (rbtdb_version_t *)*versionp;
2182 cleanup_version = NULL;
2183 ISC_LIST_INIT(cleanup_list);
2184 ISC_LIST_INIT(resigned_list);
2186 isc_refcount_decrement(&version->references, &refs);
2187 if (refs > 0) { /* typical and easy case first */
2189 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2190 INSIST(!version->writer);
2191 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2196 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2197 serial = version->serial;
2198 writer = version->writer;
2199 if (version->writer) {
2202 rbtdb_version_t *cur_version;
2204 INSIST(version->commit_ok);
2205 INSIST(version == rbtdb->future_version);
2207 * The current version is going to be replaced.
2208 * Release the (likely last) reference to it from the
2209 * DB itself and unlink it from the open list.
2211 cur_version = rbtdb->current_version;
2212 isc_refcount_decrement(&cur_version->references,
2215 if (cur_version->serial == rbtdb->least_serial)
2216 INSIST(EMPTY(cur_version->changed_list));
2217 UNLINK(rbtdb->open_versions,
2220 if (EMPTY(rbtdb->open_versions)) {
2222 * We're going to become the least open
2225 make_least_version(rbtdb, version,
2229 * Some other open version is the
2230 * least version. We can't cleanup
2231 * records that were changed in this
2232 * version because the older versions
2233 * may still be in use by an open
2236 * We can, however, discard the
2237 * changed records for things that
2238 * we've added that didn't exist in
2241 cleanup_nondirty(version, &cleanup_list);
2244 * If the (soon to be former) current version
2245 * isn't being used by anyone, we can clean
2249 cleanup_version = cur_version;
2250 APPENDLIST(version->changed_list,
2251 cleanup_version->changed_list,
2255 * Become the current version.
2257 version->writer = ISC_FALSE;
2258 rbtdb->current_version = version;
2259 rbtdb->current_serial = version->serial;
2260 rbtdb->future_version = NULL;
2263 * Keep the current version in the open list, and
2264 * gain a reference for the DB itself (see the DB
2265 * creation function below). This must be the only
2266 * case where we need to increment the counter from
2267 * zero and need to use isc_refcount_increment0().
2269 isc_refcount_increment0(&version->references,
2271 INSIST(cur_ref == 1);
2272 PREPEND(rbtdb->open_versions,
2273 rbtdb->current_version, link);
2274 resigned_list = version->resigned_list;
2275 ISC_LIST_INIT(version->resigned_list);
2278 * We're rolling back this transaction.
2280 cleanup_list = version->changed_list;
2281 ISC_LIST_INIT(version->changed_list);
2282 resigned_list = version->resigned_list;
2283 ISC_LIST_INIT(version->resigned_list);
2284 rollback = ISC_TRUE;
2285 cleanup_version = version;
2286 rbtdb->future_version = NULL;
2289 if (version != rbtdb->current_version) {
2291 * There are no external or internal references
2292 * to this version and it can be cleaned up.
2294 cleanup_version = version;
2297 * Find the version with the least serial
2298 * number greater than ours.
2300 least_greater = PREV(version, link);
2301 if (least_greater == NULL)
2302 least_greater = rbtdb->current_version;
2304 INSIST(version->serial < least_greater->serial);
2306 * Is this the least open version?
2308 if (version->serial == rbtdb->least_serial) {
2310 * Yes. Install the new least open
2313 make_least_version(rbtdb,
2318 * Add any unexecuted cleanups to
2319 * those of the least greater version.
2321 APPENDLIST(least_greater->changed_list,
2322 version->changed_list,
2325 } else if (version->serial == rbtdb->least_serial)
2326 INSIST(EMPTY(version->changed_list));
2327 UNLINK(rbtdb->open_versions, version, link);
2329 least_serial = rbtdb->least_serial;
2330 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2333 * Update the zone's secure status.
2335 if (writer && commit && !IS_CACHE(rbtdb))
2336 iszonesecure(db, version, rbtdb->origin_node);
2338 if (cleanup_version != NULL) {
2339 INSIST(EMPTY(cleanup_version->changed_list));
2340 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2341 sizeof(*cleanup_version));
2345 * Commit/rollback re-signed headers.
2347 for (header = HEAD(resigned_list);
2349 header = HEAD(resigned_list)) {
2352 ISC_LIST_UNLINK(resigned_list, header, link);
2354 lock = &rbtdb->node_locks[header->node->locknum].lock;
2355 NODE_LOCK(lock, isc_rwlocktype_write);
2357 resign_insert(rbtdb, header->node->locknum, header);
2358 decrement_reference(rbtdb, header->node, least_serial,
2359 isc_rwlocktype_write, isc_rwlocktype_none,
2361 NODE_UNLOCK(lock, isc_rwlocktype_write);
2364 if (!EMPTY(cleanup_list)) {
2365 isc_event_t *event = NULL;
2366 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2368 if (rbtdb->task != NULL)
2369 event = isc_event_allocate(rbtdb->common.mctx, NULL,
2370 DNS_EVENT_RBTDEADNODES,
2371 cleanup_dead_nodes_callback,
2372 rbtdb, sizeof(isc_event_t));
2373 if (event == NULL) {
2375 * We acquire a tree write lock here in order to make
2376 * sure that stale nodes will be removed in
2377 * decrement_reference(). If we didn't have the lock,
2378 * those nodes could miss the chance to be removed
2379 * until the server stops. The write lock is
2380 * expensive, but this event should be rare enough
2381 * to justify the cost.
2383 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2384 tlock = isc_rwlocktype_write;
2387 for (changed = HEAD(cleanup_list);
2389 changed = next_changed) {
2392 next_changed = NEXT(changed, link);
2393 rbtnode = changed->node;
2394 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2396 NODE_LOCK(lock, isc_rwlocktype_write);
2398 * This is a good opportunity to purge any dead nodes,
2402 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2405 rollback_node(rbtnode, serial);
2406 decrement_reference(rbtdb, rbtnode, least_serial,
2407 isc_rwlocktype_write, tlock,
2410 NODE_UNLOCK(lock, isc_rwlocktype_write);
2412 isc_mem_put(rbtdb->common.mctx, changed,
2415 if (event != NULL) {
2416 isc_refcount_increment(&rbtdb->references, NULL);
2417 isc_task_send(rbtdb->task, &event);
2419 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2427 * Add the necessary magic for the wildcard name 'name'
2428 * to be found in 'rbtdb'.
2430 * In order for wildcard matching to work correctly in
2431 * zone_find(), we must ensure that a node for the wildcarding
2432 * level exists in the database, and has its 'find_callback'
2433 * and 'wild' bits set.
2435 * E.g. if the wildcard name is "*.sub.example." then we
2436 * must ensure that "sub.example." exists and is marked as
2440 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2441 isc_result_t result;
2442 dns_name_t foundname;
2443 dns_offsets_t offsets;
2445 dns_rbtnode_t *node = NULL;
2447 dns_name_init(&foundname, offsets);
2448 n = dns_name_countlabels(name);
2451 dns_name_getlabelsequence(name, 1, n, &foundname);
2452 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2453 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2455 if (result == ISC_R_SUCCESS)
2456 node->nsec = DNS_RBT_NSEC_NORMAL;
2457 node->find_callback = 1;
2459 return (ISC_R_SUCCESS);
2463 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2464 isc_result_t result;
2465 dns_name_t foundname;
2466 dns_offsets_t offsets;
2467 unsigned int n, l, i;
2469 dns_name_init(&foundname, offsets);
2470 n = dns_name_countlabels(name);
2471 l = dns_name_countlabels(&rbtdb->common.origin);
2474 dns_rbtnode_t *node = NULL; /* dummy */
2475 dns_name_getlabelsequence(name, n - i, i, &foundname);
2476 if (dns_name_iswildcard(&foundname)) {
2477 result = add_wildcard_magic(rbtdb, &foundname);
2478 if (result != ISC_R_SUCCESS)
2480 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2482 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2484 if (result == ISC_R_SUCCESS)
2485 node->nsec = DNS_RBT_NSEC_NORMAL;
2489 return (ISC_R_SUCCESS);
2493 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2494 dns_dbnode_t **nodep)
2496 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2497 dns_rbtnode_t *node = NULL;
2498 dns_name_t nodename;
2499 isc_result_t result;
2500 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2502 REQUIRE(VALID_RBTDB(rbtdb));
2504 dns_name_init(&nodename, NULL);
2505 RWLOCK(&rbtdb->tree_lock, locktype);
2506 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2507 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2508 if (result != ISC_R_SUCCESS) {
2509 RWUNLOCK(&rbtdb->tree_lock, locktype);
2511 if (result == DNS_R_PARTIALMATCH)
2512 result = ISC_R_NOTFOUND;
2516 * It would be nice to try to upgrade the lock instead of
2517 * unlocking then relocking.
2519 locktype = isc_rwlocktype_write;
2520 RWLOCK(&rbtdb->tree_lock, locktype);
2522 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2523 if (result == ISC_R_SUCCESS) {
2525 if (rbtdb->rpz_cidr != NULL) {
2526 dns_fixedname_t fnamef;
2529 dns_fixedname_init(&fnamef);
2530 fname = dns_fixedname_name(&fnamef);
2531 dns_rbt_fullnamefromnode(node, fname);
2532 dns_rpz_cidr_addip(rbtdb->rpz_cidr, fname);
2535 dns_rbt_namefromnode(node, &nodename);
2536 #ifdef DNS_RBT_USEHASH
2537 node->locknum = node->hashval % rbtdb->node_lock_count;
2539 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2540 rbtdb->node_lock_count;
2542 add_empty_wildcards(rbtdb, name);
2544 if (dns_name_iswildcard(name)) {
2545 result = add_wildcard_magic(rbtdb, name);
2546 if (result != ISC_R_SUCCESS) {
2547 RWUNLOCK(&rbtdb->tree_lock, locktype);
2551 } else if (result != ISC_R_EXISTS) {
2552 RWUNLOCK(&rbtdb->tree_lock, locktype);
2556 reactivate_node(rbtdb, node, locktype);
2557 RWUNLOCK(&rbtdb->tree_lock, locktype);
2559 *nodep = (dns_dbnode_t *)node;
2561 return (ISC_R_SUCCESS);
2565 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2566 dns_dbnode_t **nodep)
2568 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2569 dns_rbtnode_t *node = NULL;
2570 dns_name_t nodename;
2571 isc_result_t result;
2572 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2574 REQUIRE(VALID_RBTDB(rbtdb));
2576 dns_name_init(&nodename, NULL);
2577 RWLOCK(&rbtdb->tree_lock, locktype);
2578 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2579 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2580 if (result != ISC_R_SUCCESS) {
2581 RWUNLOCK(&rbtdb->tree_lock, locktype);
2583 if (result == DNS_R_PARTIALMATCH)
2584 result = ISC_R_NOTFOUND;
2588 * It would be nice to try to upgrade the lock instead of
2589 * unlocking then relocking.
2591 locktype = isc_rwlocktype_write;
2592 RWLOCK(&rbtdb->tree_lock, locktype);
2594 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2595 if (result == ISC_R_SUCCESS) {
2596 dns_rbt_namefromnode(node, &nodename);
2597 #ifdef DNS_RBT_USEHASH
2598 node->locknum = node->hashval % rbtdb->node_lock_count;
2600 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2601 rbtdb->node_lock_count;
2603 node->nsec = DNS_RBT_NSEC_NSEC3;
2604 } else if (result != ISC_R_EXISTS) {
2605 RWUNLOCK(&rbtdb->tree_lock, locktype);
2609 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2611 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2612 new_reference(rbtdb, node);
2613 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2614 RWUNLOCK(&rbtdb->tree_lock, locktype);
2616 *nodep = (dns_dbnode_t *)node;
2618 return (ISC_R_SUCCESS);
2622 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2623 rbtdb_search_t *search = arg;
2624 rdatasetheader_t *header, *header_next;
2625 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2626 rdatasetheader_t *found;
2627 isc_result_t result;
2628 dns_rbtnode_t *onode;
2631 * We only want to remember the topmost zone cut, since it's the one
2632 * that counts, so we'll just continue if we've already found a
2635 if (search->zonecut != NULL)
2636 return (DNS_R_CONTINUE);
2639 result = DNS_R_CONTINUE;
2640 onode = search->rbtdb->origin_node;
2642 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2643 isc_rwlocktype_read);
2646 * Look for an NS or DNAME rdataset active in our version.
2649 dname_header = NULL;
2650 sigdname_header = NULL;
2651 for (header = node->data; header != NULL; header = header_next) {
2652 header_next = header->next;
2653 if (header->type == dns_rdatatype_ns ||
2654 header->type == dns_rdatatype_dname ||
2655 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2657 if (header->serial <= search->serial &&
2660 * Is this a "this rdataset doesn't
2663 if (NONEXISTENT(header))
2667 header = header->down;
2668 } while (header != NULL);
2669 if (header != NULL) {
2670 if (header->type == dns_rdatatype_dname)
2671 dname_header = header;
2672 else if (header->type ==
2673 RBTDB_RDATATYPE_SIGDNAME)
2674 sigdname_header = header;
2675 else if (node != onode ||
2676 IS_STUB(search->rbtdb)) {
2678 * We've found an NS rdataset that
2679 * isn't at the origin node. We check
2680 * that they're not at the origin node,
2681 * because otherwise we'd erroneously
2682 * treat the zone top as if it were
2692 * Did we find anything?
2694 if (dname_header != NULL) {
2696 * Note that DNAME has precedence over NS if both exist.
2698 found = dname_header;
2699 search->zonecut_sigrdataset = sigdname_header;
2700 } else if (ns_header != NULL) {
2702 search->zonecut_sigrdataset = NULL;
2705 if (found != NULL) {
2707 * We increment the reference count on node to ensure that
2708 * search->zonecut_rdataset will still be valid later.
2710 new_reference(search->rbtdb, node);
2711 search->zonecut = node;
2712 search->zonecut_rdataset = found;
2713 search->need_cleanup = ISC_TRUE;
2715 * Since we've found a zonecut, anything beneath it is
2716 * glue and is not subject to wildcard matching, so we
2717 * may clear search->wild.
2719 search->wild = ISC_FALSE;
2720 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2722 * If the caller does not want to find glue, then
2723 * this is the best answer and the search should
2726 result = DNS_R_PARTIALMATCH;
2731 * The search will continue beneath the zone cut.
2732 * This may or may not be the best match. In case it
2733 * is, we need to remember the node name.
2735 zcname = dns_fixedname_name(&search->zonecut_name);
2736 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2738 search->copy_name = ISC_TRUE;
2742 * There is no zonecut at this node which is active in this
2745 * If this is a "wild" node and the caller hasn't disabled
2746 * wildcard matching, remember that we've seen a wild node
2747 * in case we need to go searching for wildcard matches
2750 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2751 search->wild = ISC_TRUE;
2754 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2755 isc_rwlocktype_read);
2761 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2762 rdatasetheader_t *header, isc_stdtime_t now,
2763 dns_rdataset_t *rdataset)
2765 unsigned char *raw; /* RDATASLAB */
2768 * Caller must be holding the node reader lock.
2769 * XXXJT: technically, we need a writer lock, since we'll increment
2770 * the header count below. However, since the actual counter value
2771 * doesn't matter, we prioritize performance here. (We may want to
2772 * use atomic increment when available).
2775 if (rdataset == NULL)
2778 new_reference(rbtdb, node);
2780 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2782 rdataset->methods = &rdataset_methods;
2783 rdataset->rdclass = rbtdb->common.rdclass;
2784 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2785 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2786 rdataset->ttl = header->rdh_ttl - now;
2787 rdataset->trust = header->trust;
2788 if (NEGATIVE(header))
2789 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2790 if (NXDOMAIN(header))
2791 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2793 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2794 rdataset->private1 = rbtdb;
2795 rdataset->private2 = node;
2796 raw = (unsigned char *)header + sizeof(*header);
2797 rdataset->private3 = raw;
2798 rdataset->count = header->count++;
2799 if (rdataset->count == ISC_UINT32_MAX)
2800 rdataset->count = 0;
2803 * Reset iterator state.
2805 rdataset->privateuint4 = 0;
2806 rdataset->private5 = NULL;
2809 * Add noqname proof.
2811 rdataset->private6 = header->noqname;
2812 if (rdataset->private6 != NULL)
2813 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2814 rdataset->private7 = header->closest;
2815 if (rdataset->private7 != NULL)
2816 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2819 * Copy out re-signing information.
2821 if (RESIGN(header)) {
2822 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2823 rdataset->resign = header->resign;
2825 rdataset->resign = 0;
2828 static inline isc_result_t
2829 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2830 dns_name_t *foundname, dns_rdataset_t *rdataset,
2831 dns_rdataset_t *sigrdataset)
2833 isc_result_t result;
2835 rbtdb_rdatatype_t type;
2836 dns_rbtnode_t *node;
2839 * The caller MUST NOT be holding any node locks.
2842 node = search->zonecut;
2843 type = search->zonecut_rdataset->type;
2846 * If we have to set foundname, we do it before anything else.
2847 * If we were to set foundname after we had set nodep or bound the
2848 * rdataset, then we'd have to undo that work if dns_name_copy()
2849 * failed. By setting foundname first, there's nothing to undo if
2852 if (foundname != NULL && search->copy_name) {
2853 zcname = dns_fixedname_name(&search->zonecut_name);
2854 result = dns_name_copy(zcname, foundname, NULL);
2855 if (result != ISC_R_SUCCESS)
2858 if (nodep != NULL) {
2860 * Note that we don't have to increment the node's reference
2861 * count here because we're going to use the reference we
2862 * already have in the search block.
2865 search->need_cleanup = ISC_FALSE;
2867 if (rdataset != NULL) {
2868 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2869 isc_rwlocktype_read);
2870 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2871 search->now, rdataset);
2872 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2873 bind_rdataset(search->rbtdb, node,
2874 search->zonecut_sigrdataset,
2875 search->now, sigrdataset);
2876 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2877 isc_rwlocktype_read);
2880 if (type == dns_rdatatype_dname)
2881 return (DNS_R_DNAME);
2882 return (DNS_R_DELEGATION);
2885 static inline isc_boolean_t
2886 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2887 dns_rbtnode_t *node)
2889 unsigned char *raw; /* RDATASLAB */
2890 unsigned int count, size;
2892 isc_boolean_t valid = ISC_FALSE;
2893 dns_offsets_t offsets;
2894 isc_region_t region;
2895 rdatasetheader_t *header;
2898 * No additional locking is required.
2902 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2903 * if it occurs at a zone cut, but is not valid below it.
2905 if (type == dns_rdatatype_ns) {
2906 if (node != search->zonecut) {
2909 } else if (type != dns_rdatatype_a &&
2910 type != dns_rdatatype_aaaa &&
2911 type != dns_rdatatype_a6) {
2915 header = search->zonecut_rdataset;
2916 raw = (unsigned char *)header + sizeof(*header);
2917 count = raw[0] * 256 + raw[1];
2918 #if DNS_RDATASET_FIXED
2919 raw += 2 + (4 * count);
2926 size = raw[0] * 256 + raw[1];
2927 #if DNS_RDATASET_FIXED
2933 region.length = size;
2936 * XXX Until we have rdata structures, we have no choice but
2937 * to directly access the rdata format.
2939 dns_name_init(&ns_name, offsets);
2940 dns_name_fromregion(&ns_name, ®ion);
2941 if (dns_name_compare(&ns_name, name) == 0) {
2950 static inline isc_boolean_t
2951 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2954 dns_fixedname_t fnext;
2955 dns_fixedname_t forigin;
2960 dns_rbtnode_t *node;
2961 isc_result_t result;
2962 isc_boolean_t answer = ISC_FALSE;
2963 rdatasetheader_t *header;
2965 rbtdb = search->rbtdb;
2967 dns_name_init(&prefix, NULL);
2968 dns_fixedname_init(&fnext);
2969 next = dns_fixedname_name(&fnext);
2970 dns_fixedname_init(&forigin);
2971 origin = dns_fixedname_name(&forigin);
2973 result = dns_rbtnodechain_next(chain, NULL, NULL);
2974 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2976 result = dns_rbtnodechain_current(chain, &prefix,
2978 if (result != ISC_R_SUCCESS)
2980 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2981 isc_rwlocktype_read);
2982 for (header = node->data;
2984 header = header->next) {
2985 if (header->serial <= search->serial &&
2986 !IGNORE(header) && EXISTS(header))
2989 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2990 isc_rwlocktype_read);
2993 result = dns_rbtnodechain_next(chain, NULL, NULL);
2995 if (result == ISC_R_SUCCESS)
2996 result = dns_name_concatenate(&prefix, origin, next, NULL);
2997 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
3002 static inline isc_boolean_t
3003 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
3004 dns_fixedname_t fnext;
3005 dns_fixedname_t forigin;
3006 dns_fixedname_t fprev;
3014 dns_rbtnode_t *node;
3015 dns_rbtnodechain_t chain;
3016 isc_boolean_t check_next = ISC_TRUE;
3017 isc_boolean_t check_prev = ISC_TRUE;
3018 isc_boolean_t answer = ISC_FALSE;
3019 isc_result_t result;
3020 rdatasetheader_t *header;
3023 rbtdb = search->rbtdb;
3025 dns_name_init(&name, NULL);
3026 dns_name_init(&tname, NULL);
3027 dns_name_init(&rname, NULL);
3028 dns_fixedname_init(&fnext);
3029 next = dns_fixedname_name(&fnext);
3030 dns_fixedname_init(&fprev);
3031 prev = dns_fixedname_name(&fprev);
3032 dns_fixedname_init(&forigin);
3033 origin = dns_fixedname_name(&forigin);
3036 * Find if qname is at or below a empty node.
3037 * Use our own copy of the chain.
3040 chain = search->chain;
3043 result = dns_rbtnodechain_current(&chain, &name,
3045 if (result != ISC_R_SUCCESS)
3047 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3048 isc_rwlocktype_read);
3049 for (header = node->data;
3051 header = header->next) {
3052 if (header->serial <= search->serial &&
3053 !IGNORE(header) && EXISTS(header))
3056 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3057 isc_rwlocktype_read);
3060 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3061 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3062 if (result == ISC_R_SUCCESS)
3063 result = dns_name_concatenate(&name, origin, prev, NULL);
3064 if (result != ISC_R_SUCCESS)
3065 check_prev = ISC_FALSE;
3067 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3068 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3070 result = dns_rbtnodechain_current(&chain, &name,
3072 if (result != ISC_R_SUCCESS)
3074 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3075 isc_rwlocktype_read);
3076 for (header = node->data;
3078 header = header->next) {
3079 if (header->serial <= search->serial &&
3080 !IGNORE(header) && EXISTS(header))
3083 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3084 isc_rwlocktype_read);
3087 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3089 if (result == ISC_R_SUCCESS)
3090 result = dns_name_concatenate(&name, origin, next, NULL);
3091 if (result != ISC_R_SUCCESS)
3092 check_next = ISC_FALSE;
3094 dns_name_clone(qname, &rname);
3097 * Remove the wildcard label to find the terminal name.
3099 n = dns_name_countlabels(wname);
3100 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3103 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3104 (check_next && dns_name_issubdomain(next, &rname))) {
3109 * Remove the left hand label.
3111 n = dns_name_countlabels(&rname);
3112 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3113 } while (!dns_name_equal(&rname, &tname));
3117 static inline isc_result_t
3118 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3122 dns_rbtnode_t *node, *level_node, *wnode;
3123 rdatasetheader_t *header;
3124 isc_result_t result = ISC_R_NOTFOUND;
3127 dns_fixedname_t fwname;
3129 isc_boolean_t done, wild, active;
3130 dns_rbtnodechain_t wchain;
3133 * Caller must be holding the tree lock and MUST NOT be holding
3138 * Examine each ancestor level. If the level's wild bit
3139 * is set, then construct the corresponding wildcard name and
3140 * search for it. If the wildcard node exists, and is active in
3141 * this version, we're done. If not, then we next check to see
3142 * if the ancestor is active in this version. If so, then there
3143 * can be no possible wildcard match and again we're done. If not,
3144 * continue the search.
3147 rbtdb = search->rbtdb;
3148 i = search->chain.level_matches;
3152 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3153 isc_rwlocktype_read);
3156 * First we try to figure out if this node is active in
3157 * the search's version. We do this now, even though we
3158 * may not need the information, because it simplifies the
3159 * locking and code flow.
3161 for (header = node->data;
3163 header = header->next) {
3164 if (header->serial <= search->serial &&
3165 !IGNORE(header) && EXISTS(header))
3178 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3179 isc_rwlocktype_read);
3183 * Construct the wildcard name for this level.
3185 dns_name_init(&name, NULL);
3186 dns_rbt_namefromnode(node, &name);
3187 dns_fixedname_init(&fwname);
3188 wname = dns_fixedname_name(&fwname);
3189 result = dns_name_concatenate(dns_wildcardname, &name,
3192 while (result == ISC_R_SUCCESS && j != 0) {
3194 level_node = search->chain.levels[j];
3195 dns_name_init(&name, NULL);
3196 dns_rbt_namefromnode(level_node, &name);
3197 result = dns_name_concatenate(wname,
3202 if (result != ISC_R_SUCCESS)
3206 dns_rbtnodechain_init(&wchain, NULL);
3207 result = dns_rbt_findnode(rbtdb->tree, wname,
3208 NULL, &wnode, &wchain,
3209 DNS_RBTFIND_EMPTYDATA,
3211 if (result == ISC_R_SUCCESS) {
3215 * We have found the wildcard node. If it
3216 * is active in the search's version, we're
3219 lock = &rbtdb->node_locks[wnode->locknum].lock;
3220 NODE_LOCK(lock, isc_rwlocktype_read);
3221 for (header = wnode->data;
3223 header = header->next) {
3224 if (header->serial <= search->serial &&
3225 !IGNORE(header) && EXISTS(header))
3228 NODE_UNLOCK(lock, isc_rwlocktype_read);
3229 if (header != NULL ||
3230 activeempty(search, &wchain, wname)) {
3231 if (activeemtpynode(search, qname,
3233 return (ISC_R_NOTFOUND);
3236 * The wildcard node is active!
3238 * Note: result is still ISC_R_SUCCESS
3239 * so we don't have to set it.
3244 } else if (result != ISC_R_NOTFOUND &&
3245 result != DNS_R_PARTIALMATCH) {
3247 * An error has occurred. Bail out.
3255 * The level node is active. Any wildcarding
3256 * present at higher levels has no
3257 * effect and we're done.
3259 result = ISC_R_NOTFOUND;
3265 node = search->chain.levels[i];
3273 static isc_boolean_t
3274 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3276 dns_rdata_t rdata = DNS_RDATA_INIT;
3277 dns_rdata_nsec3_t nsec3;
3278 unsigned char *raw; /* RDATASLAB */
3279 unsigned int rdlen, count;
3280 isc_region_t region;
3281 isc_result_t result;
3283 REQUIRE(header->type == dns_rdatatype_nsec3);
3285 raw = (unsigned char *)header + sizeof(*header);
3286 count = raw[0] * 256 + raw[1]; /* count */
3287 #if DNS_RDATASET_FIXED
3288 raw += count * 4 + 2;
3292 while (count-- > 0) {
3293 rdlen = raw[0] * 256 + raw[1];
3294 #if DNS_RDATASET_FIXED
3300 region.length = rdlen;
3301 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3302 dns_rdatatype_nsec3, ®ion);
3304 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3305 INSIST(result == ISC_R_SUCCESS);
3306 if (nsec3.hash == search->rbtversion->hash &&
3307 nsec3.iterations == search->rbtversion->iterations &&
3308 nsec3.salt_length == search->rbtversion->salt_length &&
3309 memcmp(nsec3.salt, search->rbtversion->salt,
3310 nsec3.salt_length) == 0)
3312 dns_rdata_reset(&rdata);
3318 * Find node of the NSEC/NSEC3 record that is 'name'.
3320 static inline isc_result_t
3321 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3322 dns_name_t *name, dns_name_t *origin,
3323 dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3324 isc_boolean_t *firstp)
3326 dns_fixedname_t ftarget;
3328 dns_rbtnode_t *nsecnode;
3329 isc_result_t result;
3331 REQUIRE(nodep != NULL && *nodep == NULL);
3333 if (type == dns_rdatatype_nsec3) {
3334 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3335 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3337 result = dns_rbtnodechain_current(&search->chain, name, origin,
3342 dns_fixedname_init(&ftarget);
3343 target = dns_fixedname_name(&ftarget);
3348 * Construct the name of the second node to check.
3349 * It is the first node sought in the NSEC tree.
3351 *firstp = ISC_FALSE;
3352 dns_rbtnodechain_init(nsecchain, NULL);
3353 result = dns_name_concatenate(name, origin,
3355 if (result != ISC_R_SUCCESS)
3358 result = dns_rbt_findnode(search->rbtdb->nsec,
3360 &nsecnode, nsecchain,
3361 DNS_RBTFIND_NOOPTIONS,
3363 if (result == ISC_R_SUCCESS) {
3365 * Since this was the first loop, finding the
3366 * name in the NSEC tree implies that the first
3367 * node checked in the main tree had an
3368 * unacceptable NSEC record.
3369 * Try the previous node in the NSEC tree.
3371 result = dns_rbtnodechain_prev(nsecchain,
3373 if (result == DNS_R_NEWORIGIN)
3374 result = ISC_R_SUCCESS;
3375 } else if (result == ISC_R_NOTFOUND ||
3376 result == DNS_R_PARTIALMATCH) {
3377 result = dns_rbtnodechain_current(nsecchain,
3378 name, origin, NULL);
3379 if (result == ISC_R_NOTFOUND)
3380 result = ISC_R_NOMORE;
3384 * This is a second or later trip through the auxiliary
3385 * tree for the name of a third or earlier NSEC node in
3386 * the main tree. Previous trips through the NSEC tree
3387 * must have found nodes in the main tree with NSEC
3388 * records. Perhaps they lacked signature records.
3390 result = dns_rbtnodechain_prev(nsecchain, name, origin);
3391 if (result == DNS_R_NEWORIGIN)
3392 result = ISC_R_SUCCESS;
3394 if (result != ISC_R_SUCCESS)
3398 * Construct the name to seek in the main tree.
3400 result = dns_name_concatenate(name, origin, target, NULL);
3401 if (result != ISC_R_SUCCESS)
3405 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3406 nodep, &search->chain,
3407 DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3408 if (result == ISC_R_SUCCESS)
3412 * There should always be a node in the main tree with the
3413 * same name as the node in the auxiliary NSEC tree, except for
3414 * nodes in the auxiliary tree that are awaiting deletion.
3416 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3417 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3418 DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3419 "previous_closest_nsec(): %s",
3420 isc_result_totext(result));
3421 return (DNS_R_BADDB);
3427 * Find the NSEC/NSEC3 which is or before the current point on the
3428 * search chain. For NSEC3 records only NSEC3 records that match the
3429 * current NSEC3PARAM record are considered.
3431 static inline isc_result_t
3432 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3433 dns_name_t *foundname, dns_rdataset_t *rdataset,
3434 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3435 dns_db_secure_t secure)
3437 dns_rbtnode_t *node, *prevnode;
3438 rdatasetheader_t *header, *header_next, *found, *foundsig;
3439 dns_rbtnodechain_t nsecchain;
3440 isc_boolean_t empty_node;
3441 isc_result_t result;
3442 dns_fixedname_t fname, forigin;
3443 dns_name_t *name, *origin;
3444 dns_rdatatype_t type;
3445 rbtdb_rdatatype_t sigtype;
3446 isc_boolean_t wraps;
3447 isc_boolean_t first = ISC_TRUE;
3448 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3450 if (tree == search->rbtdb->nsec3) {
3451 type = dns_rdatatype_nsec3;
3452 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3455 type = dns_rdatatype_nsec;
3456 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3461 * Use the auxiliary tree only starting with the second node in the
3462 * hope that the original node will be right much of the time.
3464 dns_fixedname_init(&fname);
3465 name = dns_fixedname_name(&fname);
3466 dns_fixedname_init(&forigin);
3467 origin = dns_fixedname_name(&forigin);
3471 result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3472 if (result != ISC_R_SUCCESS)
3475 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3476 isc_rwlocktype_read);
3479 empty_node = ISC_TRUE;
3480 for (header = node->data;
3482 header = header_next) {
3483 header_next = header->next;
3485 * Look for an active, extant NSEC or RRSIG NSEC.
3488 if (header->serial <= search->serial &&
3491 * Is this a "this rdataset doesn't
3494 if (NONEXISTENT(header))
3498 header = header->down;
3499 } while (header != NULL);
3500 if (header != NULL) {
3502 * We now know that there is at least one
3503 * active rdataset at this node.
3505 empty_node = ISC_FALSE;
3506 if (header->type == type) {
3508 if (foundsig != NULL)
3510 } else if (header->type == sigtype) {
3518 if (found != NULL && search->rbtversion->havensec3 &&
3519 found->type == dns_rdatatype_nsec3 &&
3520 !matchparams(found, search)) {
3521 empty_node = ISC_TRUE;
3524 result = previous_closest_nsec(type, search,
3528 } else if (found != NULL &&
3529 (foundsig != NULL || !need_sig)) {
3531 * We've found the right NSEC/NSEC3 record.
3533 * Note: for this to really be the right
3534 * NSEC record, it's essential that the NSEC
3535 * records of any nodes obscured by a zone
3536 * cut have been removed; we assume this is
3539 result = dns_name_concatenate(name, origin,
3541 if (result == ISC_R_SUCCESS) {
3542 if (nodep != NULL) {
3543 new_reference(search->rbtdb,
3547 bind_rdataset(search->rbtdb, node,
3550 if (foundsig != NULL)
3551 bind_rdataset(search->rbtdb,
3557 } else if (found == NULL && foundsig == NULL) {
3559 * This node is active, but has no NSEC or
3560 * RRSIG NSEC. That means it's glue or
3561 * other obscured zone data that isn't
3562 * relevant for our search. Treat the
3563 * node as if it were empty and keep looking.
3565 empty_node = ISC_TRUE;
3566 result = previous_closest_nsec(type, search,
3573 * We found an active node, but either the
3574 * NSEC or the RRSIG NSEC is missing. This
3577 result = DNS_R_BADDB;
3581 * This node isn't active. We've got to keep
3584 result = previous_closest_nsec(type, search,
3585 name, origin, &prevnode,
3586 &nsecchain, &first);
3588 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3589 isc_rwlocktype_read);
3592 } while (empty_node && result == ISC_R_SUCCESS);
3595 dns_rbtnodechain_invalidate(&nsecchain);
3597 if (result == ISC_R_NOMORE && wraps) {
3598 result = dns_rbtnodechain_last(&search->chain, tree,
3600 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3607 * If the result is ISC_R_NOMORE, then we got to the beginning of
3608 * the database and didn't find a NSEC record. This shouldn't
3611 if (result == ISC_R_NOMORE)
3612 result = DNS_R_BADDB;
3618 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3619 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3620 dns_dbnode_t **nodep, dns_name_t *foundname,
3621 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3623 dns_rbtnode_t *node = NULL;
3624 isc_result_t result;
3625 rbtdb_search_t search;
3626 isc_boolean_t cname_ok = ISC_TRUE;
3627 isc_boolean_t close_version = ISC_FALSE;
3628 isc_boolean_t maybe_zonecut = ISC_FALSE;
3629 isc_boolean_t at_zonecut = ISC_FALSE;
3631 isc_boolean_t empty_node;
3632 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3633 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3634 rbtdb_rdatatype_t sigtype;
3635 isc_boolean_t active;
3636 dns_rbtnodechain_t chain;
3640 search.rbtdb = (dns_rbtdb_t *)db;
3642 REQUIRE(VALID_RBTDB(search.rbtdb));
3645 * We don't care about 'now'.
3650 * If the caller didn't supply a version, attach to the current
3653 if (version == NULL) {
3654 currentversion(db, &version);
3655 close_version = ISC_TRUE;
3658 search.rbtversion = version;
3659 search.serial = search.rbtversion->serial;
3660 search.options = options;
3661 search.copy_name = ISC_FALSE;
3662 search.need_cleanup = ISC_FALSE;
3663 search.wild = ISC_FALSE;
3664 search.zonecut = NULL;
3665 dns_fixedname_init(&search.zonecut_name);
3666 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3670 * 'wild' will be true iff. we've matched a wildcard.
3674 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3677 * Search down from the root of the tree. If, while going down, we
3678 * encounter a callback node, zone_zonecut_callback() will search the
3679 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3681 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3683 result = dns_rbt_findnode(tree, name, foundname, &node,
3684 &search.chain, DNS_RBTFIND_EMPTYDATA,
3685 zone_zonecut_callback, &search);
3687 if (result == DNS_R_PARTIALMATCH) {
3689 if (search.zonecut != NULL) {
3690 result = setup_delegation(&search, nodep, foundname,
3691 rdataset, sigrdataset);
3697 * At least one of the levels in the search chain
3698 * potentially has a wildcard. For each such level,
3699 * we must see if there's a matching wildcard active
3700 * in the current version.
3702 result = find_wildcard(&search, &node, name);
3703 if (result == ISC_R_SUCCESS) {
3704 result = dns_name_copy(name, foundname, NULL);
3705 if (result != ISC_R_SUCCESS)
3710 else if (result != ISC_R_NOTFOUND)
3714 chain = search.chain;
3715 active = activeempty(&search, &chain, name);
3718 * If we're here, then the name does not exist, is not
3719 * beneath a zonecut, and there's no matching wildcard.
3721 if ((search.rbtversion->secure == dns_db_secure &&
3722 !search.rbtversion->havensec3) ||
3723 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3724 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3726 result = find_closest_nsec(&search, nodep, foundname,
3727 rdataset, sigrdataset, tree,
3728 search.rbtversion->secure);
3729 if (result == ISC_R_SUCCESS)
3730 result = active ? DNS_R_EMPTYNAME :
3733 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3735 } else if (result != ISC_R_SUCCESS)
3740 * We have found a node whose name is the desired name, or we
3741 * have matched a wildcard.
3744 if (search.zonecut != NULL) {
3746 * If we're beneath a zone cut, we don't want to look for
3747 * CNAMEs because they're not legitimate zone glue.
3749 cname_ok = ISC_FALSE;
3752 * The node may be a zone cut itself. If it might be one,
3753 * make sure we check for it later.
3755 * DS records live above the zone cut in ordinary zone so
3756 * we want to ignore any referral.
3758 * Stub zones don't have anything "above" the delgation so
3759 * we always return a referral.
3761 if (node->find_callback &&
3762 ((node != search.rbtdb->origin_node &&
3763 !dns_rdatatype_atparent(type)) ||
3764 IS_STUB(search.rbtdb)))
3765 maybe_zonecut = ISC_TRUE;
3769 * Certain DNSSEC types are not subject to CNAME matching
3770 * (RFC4035, section 2.5 and RFC3007).
3772 * We don't check for RRSIG, because we don't store RRSIG records
3775 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3776 cname_ok = ISC_FALSE;
3779 * We now go looking for rdata...
3782 lock = &search.rbtdb->node_locks[node->locknum].lock;
3783 NODE_LOCK(lock, isc_rwlocktype_read);
3787 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3791 empty_node = ISC_TRUE;
3792 for (header = node->data; header != NULL; header = header_next) {
3793 header_next = header->next;
3795 * Look for an active, extant rdataset.
3798 if (header->serial <= search.serial &&
3801 * Is this a "this rdataset doesn't
3804 if (NONEXISTENT(header))
3808 header = header->down;
3809 } while (header != NULL);
3810 if (header != NULL) {
3812 * We now know that there is at least one active
3813 * rdataset at this node.
3815 empty_node = ISC_FALSE;
3818 * Do special zone cut handling, if requested.
3820 if (maybe_zonecut &&
3821 header->type == dns_rdatatype_ns) {
3823 * We increment the reference count on node to
3824 * ensure that search->zonecut_rdataset will
3825 * still be valid later.
3827 new_reference(search.rbtdb, node);
3828 search.zonecut = node;
3829 search.zonecut_rdataset = header;
3830 search.zonecut_sigrdataset = NULL;
3831 search.need_cleanup = ISC_TRUE;
3832 maybe_zonecut = ISC_FALSE;
3833 at_zonecut = ISC_TRUE;
3835 * It is not clear if KEY should still be
3836 * allowed at the parent side of the zone
3837 * cut or not. It is needed for RFC3007
3838 * validated updates.
3840 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3841 && type != dns_rdatatype_nsec
3842 && type != dns_rdatatype_key) {
3844 * Glue is not OK, but any answer we
3845 * could return would be glue. Return
3851 if (found != NULL && foundsig != NULL)
3857 * If the NSEC3 record doesn't match the chain
3858 * we are using behave as if it isn't here.
3860 if (header->type == dns_rdatatype_nsec3 &&
3861 !matchparams(header, &search)) {
3862 NODE_UNLOCK(lock, isc_rwlocktype_read);
3866 * If we found a type we were looking for,
3869 if (header->type == type ||
3870 type == dns_rdatatype_any ||
3871 (header->type == dns_rdatatype_cname &&
3874 * We've found the answer!
3877 if (header->type == dns_rdatatype_cname &&
3880 * We may be finding a CNAME instead
3881 * of the desired type.
3883 * If we've already got the CNAME RRSIG,
3884 * use it, otherwise change sigtype
3885 * so that we find it.
3887 if (cnamesig != NULL)
3888 foundsig = cnamesig;
3891 RBTDB_RDATATYPE_SIGCNAME;
3894 * If we've got all we need, end the search.
3896 if (!maybe_zonecut && foundsig != NULL)
3898 } else if (header->type == sigtype) {
3900 * We've found the RRSIG rdataset for our
3901 * target type. Remember it.
3905 * If we've got all we need, end the search.
3907 if (!maybe_zonecut && found != NULL)
3909 } else if (header->type == dns_rdatatype_nsec &&
3910 !search.rbtversion->havensec3) {
3912 * Remember a NSEC rdataset even if we're
3913 * not specifically looking for it, because
3914 * we might need it later.
3916 nsecheader = header;
3917 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3918 !search.rbtversion->havensec3) {
3920 * If we need the NSEC rdataset, we'll also
3921 * need its signature.
3924 } else if (cname_ok &&
3925 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3927 * If we get a CNAME match, we'll also need
3937 * We have an exact match for the name, but there are no
3938 * active rdatasets in the desired version. That means that
3939 * this node doesn't exist in the desired version, and that
3940 * we really have a partial match.
3943 NODE_UNLOCK(lock, isc_rwlocktype_read);
3949 * If we didn't find what we were looking for...
3951 if (found == NULL) {
3952 if (search.zonecut != NULL) {
3954 * We were trying to find glue at a node beneath a
3955 * zone cut, but didn't.
3957 * Return the delegation.
3959 NODE_UNLOCK(lock, isc_rwlocktype_read);
3960 result = setup_delegation(&search, nodep, foundname,
3961 rdataset, sigrdataset);
3965 * The desired type doesn't exist.
3967 result = DNS_R_NXRRSET;
3968 if (search.rbtversion->secure == dns_db_secure &&
3969 !search.rbtversion->havensec3 &&
3970 (nsecheader == NULL || nsecsig == NULL)) {
3972 * The zone is secure but there's no NSEC,
3973 * or the NSEC has no signature!
3976 result = DNS_R_BADDB;
3980 NODE_UNLOCK(lock, isc_rwlocktype_read);
3981 result = find_closest_nsec(&search, nodep, foundname,
3982 rdataset, sigrdataset,
3984 search.rbtversion->secure);
3985 if (result == ISC_R_SUCCESS)
3986 result = DNS_R_EMPTYWILD;
3989 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3993 * There's no NSEC record, and we were told
3996 result = DNS_R_BADDB;
3999 if (nodep != NULL) {
4000 new_reference(search.rbtdb, node);
4003 if ((search.rbtversion->secure == dns_db_secure &&
4004 !search.rbtversion->havensec3) ||
4005 (search.options & DNS_DBFIND_FORCENSEC) != 0)
4007 bind_rdataset(search.rbtdb, node, nsecheader,
4009 if (nsecsig != NULL)
4010 bind_rdataset(search.rbtdb, node,
4011 nsecsig, 0, sigrdataset);
4014 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4019 * We found what we were looking for, or we found a CNAME.
4022 if (type != found->type &&
4023 type != dns_rdatatype_any &&
4024 found->type == dns_rdatatype_cname) {
4026 * We weren't doing an ANY query and we found a CNAME instead
4027 * of the type we were looking for, so we need to indicate
4028 * that result to the caller.
4030 result = DNS_R_CNAME;
4031 } else if (search.zonecut != NULL) {
4033 * If we're beneath a zone cut, we must indicate that the
4034 * result is glue, unless we're actually at the zone cut
4035 * and the type is NSEC or KEY.
4037 if (search.zonecut == node) {
4039 * It is not clear if KEY should still be
4040 * allowed at the parent side of the zone
4041 * cut or not. It is needed for RFC3007
4042 * validated updates.
4044 if (type == dns_rdatatype_nsec ||
4045 type == dns_rdatatype_nsec3 ||
4046 type == dns_rdatatype_key)
4047 result = ISC_R_SUCCESS;
4048 else if (type == dns_rdatatype_any)
4049 result = DNS_R_ZONECUT;
4051 result = DNS_R_GLUE;
4053 result = DNS_R_GLUE;
4055 * We might have found data that isn't glue, but was occluded
4056 * by a dynamic update. If the caller cares about this, they
4057 * will have told us to validate glue.
4059 * XXX We should cache the glue validity state!
4061 if (result == DNS_R_GLUE &&
4062 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4063 !valid_glue(&search, foundname, type, node)) {
4064 NODE_UNLOCK(lock, isc_rwlocktype_read);
4065 result = setup_delegation(&search, nodep, foundname,
4066 rdataset, sigrdataset);
4071 * An ordinary successful query!
4073 result = ISC_R_SUCCESS;
4076 if (nodep != NULL) {
4078 new_reference(search.rbtdb, node);
4080 search.need_cleanup = ISC_FALSE;
4084 if (type != dns_rdatatype_any) {
4085 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4086 if (foundsig != NULL)
4087 bind_rdataset(search.rbtdb, node, foundsig, 0,
4092 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4095 NODE_UNLOCK(lock, isc_rwlocktype_read);
4098 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4101 * If we found a zonecut but aren't going to use it, we have to
4104 if (search.need_cleanup) {
4105 node = search.zonecut;
4106 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4108 NODE_LOCK(lock, isc_rwlocktype_read);
4109 decrement_reference(search.rbtdb, node, 0,
4110 isc_rwlocktype_read, isc_rwlocktype_none,
4112 NODE_UNLOCK(lock, isc_rwlocktype_read);
4116 closeversion(db, &version, ISC_FALSE);
4118 dns_rbtnodechain_reset(&search.chain);
4124 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4125 isc_stdtime_t now, dns_dbnode_t **nodep,
4126 dns_name_t *foundname,
4127 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4136 UNUSED(sigrdataset);
4138 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4141 return (ISC_R_NOTIMPLEMENTED);
4145 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4146 rbtdb_search_t *search = arg;
4147 rdatasetheader_t *header, *header_prev, *header_next;
4148 rdatasetheader_t *dname_header, *sigdname_header;
4149 isc_result_t result;
4151 isc_rwlocktype_t locktype;
4155 REQUIRE(search->zonecut == NULL);
4158 * Keep compiler silent.
4162 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4163 locktype = isc_rwlocktype_read;
4164 NODE_LOCK(lock, locktype);
4167 * Look for a DNAME or RRSIG DNAME rdataset.
4169 dname_header = NULL;
4170 sigdname_header = NULL;
4172 for (header = node->data; header != NULL; header = header_next) {
4173 header_next = header->next;
4174 if (header->rdh_ttl <= search->now) {
4176 * This rdataset is stale. If no one else is
4177 * using the node, we can clean it up right
4178 * now, otherwise we mark it as stale, and
4179 * the node as dirty, so it will get cleaned
4182 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4183 (locktype == isc_rwlocktype_write ||
4184 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4186 * We update the node's status only when we
4187 * can get write access; otherwise, we leave
4188 * others to this work. Periodical cleaning
4189 * will eventually take the job as the last
4191 * We won't downgrade the lock, since other
4192 * rdatasets are probably stale, too.
4194 locktype = isc_rwlocktype_write;
4196 if (dns_rbtnode_refcurrent(node) == 0) {
4200 * header->down can be non-NULL if the
4201 * refcount has just decremented to 0
4202 * but decrement_reference() has not
4203 * performed clean_cache_node(), in
4204 * which case we need to purge the
4205 * stale headers first.
4207 mctx = search->rbtdb->common.mctx;
4208 clean_stale_headers(search->rbtdb,
4211 if (header_prev != NULL)
4215 node->data = header->next;
4216 free_rdataset(search->rbtdb, mctx,
4219 header->attributes |=
4220 RDATASET_ATTR_STALE;
4222 header_prev = header;
4225 header_prev = header;
4226 } else if (header->type == dns_rdatatype_dname &&
4228 dname_header = header;
4229 header_prev = header;
4230 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4232 sigdname_header = header;
4233 header_prev = header;
4235 header_prev = header;
4238 if (dname_header != NULL &&
4239 (!DNS_TRUST_PENDING(dname_header->trust) ||
4240 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4242 * We increment the reference count on node to ensure that
4243 * search->zonecut_rdataset will still be valid later.
4245 new_reference(search->rbtdb, node);
4246 INSIST(!ISC_LINK_LINKED(node, deadlink));
4247 search->zonecut = node;
4248 search->zonecut_rdataset = dname_header;
4249 search->zonecut_sigrdataset = sigdname_header;
4250 search->need_cleanup = ISC_TRUE;
4251 result = DNS_R_PARTIALMATCH;
4253 result = DNS_R_CONTINUE;
4255 NODE_UNLOCK(lock, locktype);
4260 static inline isc_result_t
4261 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4262 dns_dbnode_t **nodep, dns_name_t *foundname,
4263 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4266 dns_rbtnode_t *level_node;
4267 rdatasetheader_t *header, *header_prev, *header_next;
4268 rdatasetheader_t *found, *foundsig;
4269 isc_result_t result = ISC_R_NOTFOUND;
4274 isc_rwlocktype_t locktype;
4277 * Caller must be holding the tree lock.
4280 rbtdb = search->rbtdb;
4281 i = search->chain.level_matches;
4284 locktype = isc_rwlocktype_read;
4285 lock = &rbtdb->node_locks[node->locknum].lock;
4286 NODE_LOCK(lock, locktype);
4289 * Look for NS and RRSIG NS rdatasets.
4294 for (header = node->data;
4296 header = header_next) {
4297 header_next = header->next;
4298 if (header->rdh_ttl <= search->now) {
4300 * This rdataset is stale. If no one else is
4301 * using the node, we can clean it up right
4302 * now, otherwise we mark it as stale, and
4303 * the node as dirty, so it will get cleaned
4306 if ((header->rdh_ttl <= search->now -
4308 (locktype == isc_rwlocktype_write ||
4309 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4311 * We update the node's status only
4312 * when we can get write access.
4314 locktype = isc_rwlocktype_write;
4316 if (dns_rbtnode_refcurrent(node)
4320 m = search->rbtdb->common.mctx;
4321 clean_stale_headers(
4324 if (header_prev != NULL)
4330 free_rdataset(rbtdb, m,
4333 header->attributes |=
4334 RDATASET_ATTR_STALE;
4336 header_prev = header;
4339 header_prev = header;
4340 } else if (EXISTS(header)) {
4342 * We've found an extant rdataset. See if
4343 * we're interested in it.
4345 if (header->type == dns_rdatatype_ns) {
4347 if (foundsig != NULL)
4349 } else if (header->type ==
4350 RBTDB_RDATATYPE_SIGNS) {
4355 header_prev = header;
4357 header_prev = header;
4360 if (found != NULL) {
4362 * If we have to set foundname, we do it before
4363 * anything else. If we were to set foundname after
4364 * we had set nodep or bound the rdataset, then we'd
4365 * have to undo that work if dns_name_concatenate()
4366 * failed. By setting foundname first, there's
4367 * nothing to undo if we have trouble.
4369 if (foundname != NULL) {
4370 dns_name_init(&name, NULL);
4371 dns_rbt_namefromnode(node, &name);
4372 result = dns_name_copy(&name, foundname, NULL);
4373 while (result == ISC_R_SUCCESS && i > 0) {
4375 level_node = search->chain.levels[i];
4376 dns_name_init(&name, NULL);
4377 dns_rbt_namefromnode(level_node,
4380 dns_name_concatenate(foundname,
4385 if (result != ISC_R_SUCCESS) {
4390 result = DNS_R_DELEGATION;
4391 if (nodep != NULL) {
4392 new_reference(search->rbtdb, node);
4395 bind_rdataset(search->rbtdb, node, found, search->now,
4397 if (foundsig != NULL)
4398 bind_rdataset(search->rbtdb, node, foundsig,
4399 search->now, sigrdataset);
4400 if (need_headerupdate(found, search->now) ||
4401 (foundsig != NULL &&
4402 need_headerupdate(foundsig, search->now))) {
4403 if (locktype != isc_rwlocktype_write) {
4404 NODE_UNLOCK(lock, locktype);
4405 NODE_LOCK(lock, isc_rwlocktype_write);
4406 locktype = isc_rwlocktype_write;
4408 if (need_headerupdate(found, search->now))
4409 update_header(search->rbtdb, found,
4411 if (foundsig != NULL &&
4412 need_headerupdate(foundsig, search->now)) {
4413 update_header(search->rbtdb, foundsig,
4420 NODE_UNLOCK(lock, locktype);
4422 if (found == NULL && i > 0) {
4424 node = search->chain.levels[i];
4434 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4435 isc_stdtime_t now, dns_name_t *foundname,
4436 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4438 dns_rbtnode_t *node;
4439 rdatasetheader_t *header, *header_next, *header_prev;
4440 rdatasetheader_t *found, *foundsig;
4441 isc_boolean_t empty_node;
4442 isc_result_t result;
4443 dns_fixedname_t fname, forigin;
4444 dns_name_t *name, *origin;
4445 rbtdb_rdatatype_t matchtype, sigmatchtype;
4447 isc_rwlocktype_t locktype;
4449 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4450 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4451 dns_rdatatype_nsec);
4455 dns_fixedname_init(&fname);
4456 name = dns_fixedname_name(&fname);
4457 dns_fixedname_init(&forigin);
4458 origin = dns_fixedname_name(&forigin);
4459 result = dns_rbtnodechain_current(&search->chain, name,
4461 if (result != ISC_R_SUCCESS)
4463 locktype = isc_rwlocktype_read;
4464 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4465 NODE_LOCK(lock, locktype);
4468 empty_node = ISC_TRUE;
4470 for (header = node->data;
4472 header = header_next) {
4473 header_next = header->next;
4474 if (header->rdh_ttl <= now) {
4476 * This rdataset is stale. If no one else is
4477 * using the node, we can clean it up right
4478 * now, otherwise we mark it as stale, and the
4479 * node as dirty, so it will get cleaned up
4482 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4483 (locktype == isc_rwlocktype_write ||
4484 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4486 * We update the node's status only
4487 * when we can get write access.
4489 locktype = isc_rwlocktype_write;
4491 if (dns_rbtnode_refcurrent(node)
4495 m = search->rbtdb->common.mctx;
4496 clean_stale_headers(
4499 if (header_prev != NULL)
4503 node->data = header->next;
4504 free_rdataset(search->rbtdb, m,
4507 header->attributes |=
4508 RDATASET_ATTR_STALE;
4510 header_prev = header;
4513 header_prev = header;
4516 if (NONEXISTENT(header) ||
4517 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4518 header_prev = header;
4521 empty_node = ISC_FALSE;
4522 if (header->type == matchtype)
4524 else if (header->type == sigmatchtype)
4526 header_prev = header;
4528 if (found != NULL) {
4529 result = dns_name_concatenate(name, origin,
4531 if (result != ISC_R_SUCCESS)
4533 bind_rdataset(search->rbtdb, node, found,
4535 if (foundsig != NULL)
4536 bind_rdataset(search->rbtdb, node, foundsig,
4538 new_reference(search->rbtdb, node);
4540 result = DNS_R_COVERINGNSEC;
4541 } else if (!empty_node) {
4542 result = ISC_R_NOTFOUND;
4544 result = dns_rbtnodechain_prev(&search->chain, NULL,
4547 NODE_UNLOCK(lock, locktype);
4548 } while (empty_node && result == ISC_R_SUCCESS);
4553 * Mark a database for response policy rewriting.
4557 get_rpz_enabled(dns_db_t *db, dns_rpz_st_t *st)
4561 rbtdb = (dns_rbtdb_t *)db;
4562 REQUIRE(VALID_RBTDB(rbtdb));
4563 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4564 dns_rpz_enabled(rbtdb->rpz_cidr, st);
4565 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4569 * Search the CDIR block tree of a response policy tree of trees for all of
4570 * the IP addresses in an A or AAAA rdataset.
4571 * Among the policies for all IPv4 and IPv6 addresses for a name, choose
4572 * the longest prefix. Among those with the longest prefix, the first
4573 * configured policy. Among answers for with the longest prefixes for
4574 * two or more IP addresses in the A and AAAA rdatasets the lexically
4578 rpz_findips(dns_rpz_zone_t *rpz, dns_rpz_type_t rpz_type,
4579 dns_zone_t *zone, dns_db_t *db, dns_dbversion_t *version,
4580 dns_rdataset_t *ardataset, dns_rpz_st_t *st)
4584 struct in6_addr in6a;
4585 isc_netaddr_t netaddr;
4586 dns_fixedname_t selfnamef, qnamef;
4587 dns_name_t *selfname, *qname;
4588 dns_rbtnode_t *node;
4589 dns_rdataset_t zrdataset;
4590 dns_rpz_cidr_bits_t prefix;
4591 isc_result_t result;
4592 dns_rpz_policy_t rpz_policy;
4595 rbtdb = (dns_rbtdb_t *)db;
4596 REQUIRE(VALID_RBTDB(rbtdb));
4597 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4599 if (rbtdb->rpz_cidr == NULL) {
4600 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4602 dns_zone_detach(&zone);
4603 return (ISC_R_UNEXPECTED);
4606 dns_fixedname_init(&selfnamef);
4607 dns_fixedname_init(&qnamef);
4608 selfname = dns_fixedname_name(&selfnamef);
4609 qname = dns_fixedname_name(&qnamef);
4611 for (result = dns_rdataset_first(ardataset);
4612 result == ISC_R_SUCCESS;
4613 result = dns_rdataset_next(ardataset)) {
4614 dns_rdata_t rdata = DNS_RDATA_INIT;
4615 dns_rdataset_current(ardataset, &rdata);
4616 switch (rdata.type) {
4617 case dns_rdatatype_a:
4618 INSIST(rdata.length == 4);
4619 memcpy(&ina.s_addr, rdata.data, 4);
4620 isc_netaddr_fromin(&netaddr, &ina);
4622 case dns_rdatatype_aaaa:
4623 INSIST(rdata.length == 16);
4624 memcpy(in6a.s6_addr, rdata.data, 16);
4625 isc_netaddr_fromin6(&netaddr, &in6a);
4631 result = dns_rpz_cidr_find(rbtdb->rpz_cidr, &netaddr, rpz_type,
4632 selfname, qname, &prefix);
4633 if (result != ISC_R_SUCCESS)
4637 * Choose the policy with the longest matching prefix.
4638 * Between policies with the same prefix, choose the first
4641 if (st->m.policy != DNS_RPZ_POLICY_MISS) {
4642 if (prefix < st->m.prefix)
4644 if (prefix == st->m.prefix &&
4645 rpz->num > st->m.rpz->num)
4650 * We have rpz_st an entry with a prefix at least as long as
4651 * the prefix of the entry we had before. Find the node
4652 * corresponding to CDIR tree entry.
4655 result = dns_rbt_findnode(rbtdb->tree, qname, NULL,
4656 &node, NULL, 0, NULL, NULL);
4657 if (result != ISC_R_SUCCESS) {
4658 char namebuf[DNS_NAME_FORMATSIZE];
4660 dns_name_format(qname, namebuf, sizeof(namebuf));
4661 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4662 DNS_LOGMODULE_CACHE, DNS_RPZ_ERROR_LEVEL,
4663 "rpz_findips findnode(%s): %s",
4664 namebuf, isc_result_totext(result));
4668 * First look for a simple rewrite of the IP address.
4669 * If that fails, look for a CNAME. If we cannot find
4670 * a CNAME or the CNAME is neither of the special forms
4671 * "*" or ".", treat it like a real CNAME.
4673 dns_rdataset_init(&zrdataset);
4674 result = dns_db_findrdataset(db, node, version, ardataset->type,
4675 0, 0, &zrdataset, NULL);
4676 if (result != ISC_R_SUCCESS)
4677 result = dns_db_findrdataset(db, node, version,
4678 dns_rdatatype_cname,
4679 0, 0, &zrdataset, NULL);
4680 if (result == ISC_R_SUCCESS) {
4681 if (zrdataset.type != dns_rdatatype_cname) {
4682 rpz_policy = DNS_RPZ_POLICY_RECORD;
4684 rpz_policy = dns_rpz_decode_cname(&zrdataset,
4686 if (rpz_policy == DNS_RPZ_POLICY_RECORD)
4687 result = DNS_R_CNAME;
4689 ttl = zrdataset.ttl;
4691 rpz_policy = DNS_RPZ_POLICY_RECORD;
4692 result = DNS_R_NXRRSET;
4693 ttl = DNS_RPZ_TTL_DEFAULT;
4697 * Use an overriding action specified in the configuration file
4699 if (rpz->policy != DNS_RPZ_POLICY_GIVEN &&
4700 rpz_policy != DNS_RPZ_POLICY_NO_OP)
4701 rpz_policy = rpz->policy;
4704 * We know the new prefix is at least as long as the current.
4705 * Prefer the new answer if the new prefix is longer.
4706 * Prefer the zone configured first if the prefixes are equal.
4707 * With two actions from the same zone, prefer the action
4708 * on the "smallest" name.
4710 if (st->m.policy == DNS_RPZ_POLICY_MISS ||
4711 prefix > st->m.prefix ||
4712 rpz->num <= st->m.rpz->num ||
4713 0 > dns_name_compare(qname, st->qname)) {
4714 if (dns_rdataset_isassociated(st->m.rdataset))
4715 dns_rdataset_disassociate(st->m.rdataset);
4716 if (st->m.node != NULL)
4717 dns_db_detachnode(st->m.db, &st->m.node);
4718 if (st->m.db != NULL)
4719 dns_db_detach(&st->m.db);
4720 if (st->m.zone != NULL)
4721 dns_zone_detach(&st->m.zone);
4723 st->m.type = rpz_type;
4724 st->m.prefix = prefix;
4725 st->m.policy = rpz_policy;
4727 st->m.result = result;
4728 dns_name_copy(qname, st->qname, NULL);
4729 if (rpz_policy == DNS_RPZ_POLICY_RECORD &&
4730 result != DNS_R_NXRRSET) {
4731 dns_rdataset_clone(&zrdataset,st->m.rdataset);
4732 dns_db_attachnode(db, node, &st->m.node);
4734 dns_db_attach(db, &st->m.db);
4735 dns_zone_attach(zone, &st->m.zone);
4737 if (dns_rdataset_isassociated(&zrdataset))
4738 dns_rdataset_disassociate(&zrdataset);
4741 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4742 return (ISC_R_SUCCESS);
4747 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4748 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4749 dns_dbnode_t **nodep, dns_name_t *foundname,
4750 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4752 dns_rbtnode_t *node = NULL;
4753 isc_result_t result;
4754 rbtdb_search_t search;
4755 isc_boolean_t cname_ok = ISC_TRUE;
4756 isc_boolean_t empty_node;
4758 isc_rwlocktype_t locktype;
4759 rdatasetheader_t *header, *header_prev, *header_next;
4760 rdatasetheader_t *found, *nsheader;
4761 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4762 rdatasetheader_t *update, *updatesig;
4763 rbtdb_rdatatype_t sigtype, negtype;
4767 search.rbtdb = (dns_rbtdb_t *)db;
4769 REQUIRE(VALID_RBTDB(search.rbtdb));
4770 REQUIRE(version == NULL);
4773 isc_stdtime_get(&now);
4775 search.rbtversion = NULL;
4777 search.options = options;
4778 search.copy_name = ISC_FALSE;
4779 search.need_cleanup = ISC_FALSE;
4780 search.wild = ISC_FALSE;
4781 search.zonecut = NULL;
4782 dns_fixedname_init(&search.zonecut_name);
4783 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4788 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4791 * Search down from the root of the tree. If, while going down, we
4792 * encounter a callback node, cache_zonecut_callback() will search the
4793 * rdatasets at the zone cut for a DNAME rdataset.
4795 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4796 &search.chain, DNS_RBTFIND_EMPTYDATA,
4797 cache_zonecut_callback, &search);
4799 if (result == DNS_R_PARTIALMATCH) {
4800 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4801 result = find_coveringnsec(&search, nodep, now,
4802 foundname, rdataset,
4804 if (result == DNS_R_COVERINGNSEC)
4807 if (search.zonecut != NULL) {
4808 result = setup_delegation(&search, nodep, foundname,
4809 rdataset, sigrdataset);
4813 result = find_deepest_zonecut(&search, node, nodep,
4814 foundname, rdataset,
4818 } else if (result != ISC_R_SUCCESS)
4822 * Certain DNSSEC types are not subject to CNAME matching
4823 * (RFC4035, section 2.5 and RFC3007).
4825 * We don't check for RRSIG, because we don't store RRSIG records
4828 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4829 cname_ok = ISC_FALSE;
4832 * We now go looking for rdata...
4835 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4836 locktype = isc_rwlocktype_read;
4837 NODE_LOCK(lock, locktype);
4841 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4842 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4846 empty_node = ISC_TRUE;
4848 for (header = node->data; header != NULL; header = header_next) {
4849 header_next = header->next;
4850 if (header->rdh_ttl <= now) {
4852 * This rdataset is stale. If no one else is using the
4853 * node, we can clean it up right now, otherwise we
4854 * mark it as stale, and the node as dirty, so it will
4855 * get cleaned up later.
4857 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4858 (locktype == isc_rwlocktype_write ||
4859 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4861 * We update the node's status only when we
4862 * can get write access.
4864 locktype = isc_rwlocktype_write;
4866 if (dns_rbtnode_refcurrent(node) == 0) {
4869 mctx = search.rbtdb->common.mctx;
4870 clean_stale_headers(search.rbtdb, mctx,
4872 if (header_prev != NULL)
4876 node->data = header->next;
4877 free_rdataset(search.rbtdb, mctx,
4880 header->attributes |=
4881 RDATASET_ATTR_STALE;
4883 header_prev = header;
4886 header_prev = header;
4887 } else if (EXISTS(header)) {
4889 * We now know that there is at least one active
4890 * non-stale rdataset at this node.
4892 empty_node = ISC_FALSE;
4895 * If we found a type we were looking for, remember
4898 if (header->type == type ||
4899 (type == dns_rdatatype_any &&
4900 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4901 (cname_ok && header->type ==
4902 dns_rdatatype_cname)) {
4904 * We've found the answer.
4907 if (header->type == dns_rdatatype_cname &&
4911 * If we've already got the CNAME RRSIG,
4912 * use it, otherwise change sigtype
4913 * so that we find it.
4915 if (cnamesig != NULL)
4916 foundsig = cnamesig;
4919 RBTDB_RDATATYPE_SIGCNAME;
4920 foundsig = cnamesig;
4922 } else if (header->type == sigtype) {
4924 * We've found the RRSIG rdataset for our
4925 * target type. Remember it.
4928 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4929 header->type == negtype) {
4931 * We've found a negative cache entry.
4934 } else if (header->type == dns_rdatatype_ns) {
4936 * Remember a NS rdataset even if we're
4937 * not specifically looking for it, because
4938 * we might need it later.
4941 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4943 * If we need the NS rdataset, we'll also
4944 * need its signature.
4947 } else if (cname_ok &&
4948 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4950 * If we get a CNAME match, we'll also need
4955 header_prev = header;
4957 header_prev = header;
4962 * We have an exact match for the name, but there are no
4963 * extant rdatasets. That means that this node doesn't
4964 * meaningfully exist, and that we really have a partial match.
4966 NODE_UNLOCK(lock, locktype);
4971 * If we didn't find what we were looking for...
4973 if (found == NULL ||
4974 (DNS_TRUST_ADDITIONAL(found->trust) &&
4975 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4976 (found->trust == dns_trust_glue &&
4977 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4978 (DNS_TRUST_PENDING(found->trust) &&
4979 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4981 * If there is an NS rdataset at this node, then this is the
4984 if (nsheader != NULL) {
4985 if (nodep != NULL) {
4986 new_reference(search.rbtdb, node);
4987 INSIST(!ISC_LINK_LINKED(node, deadlink));
4990 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4992 if (need_headerupdate(nsheader, search.now))
4994 if (nssig != NULL) {
4995 bind_rdataset(search.rbtdb, node, nssig,
4996 search.now, sigrdataset);
4997 if (need_headerupdate(nssig, search.now))
5000 result = DNS_R_DELEGATION;
5005 * Go find the deepest zone cut.
5007 NODE_UNLOCK(lock, locktype);
5012 * We found what we were looking for, or we found a CNAME.
5015 if (nodep != NULL) {
5016 new_reference(search.rbtdb, node);
5017 INSIST(!ISC_LINK_LINKED(node, deadlink));
5021 if (NEGATIVE(found)) {
5023 * We found a negative cache entry.
5025 if (NXDOMAIN(found))
5026 result = DNS_R_NCACHENXDOMAIN;
5028 result = DNS_R_NCACHENXRRSET;
5029 } else if (type != found->type &&
5030 type != dns_rdatatype_any &&
5031 found->type == dns_rdatatype_cname) {
5033 * We weren't doing an ANY query and we found a CNAME instead
5034 * of the type we were looking for, so we need to indicate
5035 * that result to the caller.
5037 result = DNS_R_CNAME;
5040 * An ordinary successful query!
5042 result = ISC_R_SUCCESS;
5045 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5046 result == DNS_R_NCACHENXRRSET) {
5047 bind_rdataset(search.rbtdb, node, found, search.now,
5049 if (need_headerupdate(found, search.now))
5051 if (foundsig != NULL) {
5052 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5054 if (need_headerupdate(foundsig, search.now))
5055 updatesig = foundsig;
5060 if ((update != NULL || updatesig != NULL) &&
5061 locktype != isc_rwlocktype_write) {
5062 NODE_UNLOCK(lock, locktype);
5063 NODE_LOCK(lock, isc_rwlocktype_write);
5064 locktype = isc_rwlocktype_write;
5066 if (update != NULL && need_headerupdate(update, search.now))
5067 update_header(search.rbtdb, update, search.now);
5068 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5069 update_header(search.rbtdb, updatesig, search.now);
5071 NODE_UNLOCK(lock, locktype);
5074 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5077 * If we found a zonecut but aren't going to use it, we have to
5080 if (search.need_cleanup) {
5081 node = search.zonecut;
5082 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5084 NODE_LOCK(lock, isc_rwlocktype_read);
5085 decrement_reference(search.rbtdb, node, 0,
5086 isc_rwlocktype_read, isc_rwlocktype_none,
5088 NODE_UNLOCK(lock, isc_rwlocktype_read);
5091 dns_rbtnodechain_reset(&search.chain);
5097 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5098 isc_stdtime_t now, dns_dbnode_t **nodep,
5099 dns_name_t *foundname,
5100 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5102 dns_rbtnode_t *node = NULL;
5104 isc_result_t result;
5105 rbtdb_search_t search;
5106 rdatasetheader_t *header, *header_prev, *header_next;
5107 rdatasetheader_t *found, *foundsig;
5108 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5109 isc_rwlocktype_t locktype;
5111 search.rbtdb = (dns_rbtdb_t *)db;
5113 REQUIRE(VALID_RBTDB(search.rbtdb));
5116 isc_stdtime_get(&now);
5118 search.rbtversion = NULL;
5120 search.options = options;
5121 search.copy_name = ISC_FALSE;
5122 search.need_cleanup = ISC_FALSE;
5123 search.wild = ISC_FALSE;
5124 search.zonecut = NULL;
5125 dns_fixedname_init(&search.zonecut_name);
5126 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5129 if ((options & DNS_DBFIND_NOEXACT) != 0)
5130 rbtoptions |= DNS_RBTFIND_NOEXACT;
5132 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5135 * Search down from the root of the tree.
5137 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5138 &search.chain, rbtoptions, NULL, &search);
5140 if (result == DNS_R_PARTIALMATCH) {
5142 result = find_deepest_zonecut(&search, node, nodep, foundname,
5143 rdataset, sigrdataset);
5145 } else if (result != ISC_R_SUCCESS)
5149 * We now go looking for an NS rdataset at the node.
5152 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5153 locktype = isc_rwlocktype_read;
5154 NODE_LOCK(lock, locktype);
5159 for (header = node->data; header != NULL; header = header_next) {
5160 header_next = header->next;
5161 if (header->rdh_ttl <= now) {
5163 * This rdataset is stale. If no one else is using the
5164 * node, we can clean it up right now, otherwise we
5165 * mark it as stale, and the node as dirty, so it will
5166 * get cleaned up later.
5168 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5169 (locktype == isc_rwlocktype_write ||
5170 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5172 * We update the node's status only when we
5173 * can get write access.
5175 locktype = isc_rwlocktype_write;
5177 if (dns_rbtnode_refcurrent(node) == 0) {
5180 mctx = search.rbtdb->common.mctx;
5181 clean_stale_headers(search.rbtdb, mctx,
5183 if (header_prev != NULL)
5187 node->data = header->next;
5188 free_rdataset(search.rbtdb, mctx,
5191 header->attributes |=
5192 RDATASET_ATTR_STALE;
5194 header_prev = header;
5197 header_prev = header;
5198 } else if (EXISTS(header)) {
5200 * If we found a type we were looking for, remember
5203 if (header->type == dns_rdatatype_ns) {
5205 * Remember a NS rdataset even if we're
5206 * not specifically looking for it, because
5207 * we might need it later.
5210 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5212 * If we need the NS rdataset, we'll also
5213 * need its signature.
5217 header_prev = header;
5219 header_prev = header;
5222 if (found == NULL) {
5224 * No NS records here.
5226 NODE_UNLOCK(lock, locktype);
5230 if (nodep != NULL) {
5231 new_reference(search.rbtdb, node);
5232 INSIST(!ISC_LINK_LINKED(node, deadlink));
5236 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5237 if (foundsig != NULL)
5238 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5241 if (need_headerupdate(found, search.now) ||
5242 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
5243 if (locktype != isc_rwlocktype_write) {
5244 NODE_UNLOCK(lock, locktype);
5245 NODE_LOCK(lock, isc_rwlocktype_write);
5246 locktype = isc_rwlocktype_write;
5248 if (need_headerupdate(found, search.now))
5249 update_header(search.rbtdb, found, search.now);
5250 if (foundsig != NULL &&
5251 need_headerupdate(foundsig, search.now)) {
5252 update_header(search.rbtdb, foundsig, search.now);
5256 NODE_UNLOCK(lock, locktype);
5259 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5261 INSIST(!search.need_cleanup);
5263 dns_rbtnodechain_reset(&search.chain);
5265 if (result == DNS_R_DELEGATION)
5266 result = ISC_R_SUCCESS;
5272 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5273 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5274 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5277 REQUIRE(VALID_RBTDB(rbtdb));
5278 REQUIRE(targetp != NULL && *targetp == NULL);
5280 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5281 dns_rbtnode_refincrement(node, &refs);
5283 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5289 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5290 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5291 dns_rbtnode_t *node;
5292 isc_boolean_t want_free = ISC_FALSE;
5293 isc_boolean_t inactive = ISC_FALSE;
5294 rbtdb_nodelock_t *nodelock;
5296 REQUIRE(VALID_RBTDB(rbtdb));
5297 REQUIRE(targetp != NULL && *targetp != NULL);
5299 node = (dns_rbtnode_t *)(*targetp);
5300 nodelock = &rbtdb->node_locks[node->locknum];
5302 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5304 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5305 isc_rwlocktype_none, ISC_FALSE)) {
5306 if (isc_refcount_current(&nodelock->references) == 0 &&
5307 nodelock->exiting) {
5308 inactive = ISC_TRUE;
5312 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5317 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5319 if (rbtdb->active == 0)
5320 want_free = ISC_TRUE;
5321 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5323 char buf[DNS_NAME_FORMATSIZE];
5324 if (dns_name_dynamic(&rbtdb->common.origin))
5325 dns_name_format(&rbtdb->common.origin, buf,
5328 strcpy(buf, "<UNKNOWN>");
5329 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5330 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5331 "calling free_rbtdb(%s)", buf);
5332 free_rbtdb(rbtdb, ISC_TRUE, NULL);
5338 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5339 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5340 dns_rbtnode_t *rbtnode = node;
5341 rdatasetheader_t *header;
5342 isc_boolean_t force_expire = ISC_FALSE;
5344 * These are the category and module used by the cache cleaner.
5346 isc_boolean_t log = ISC_FALSE;
5347 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5348 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5349 int level = ISC_LOG_DEBUG(2);
5350 char printname[DNS_NAME_FORMATSIZE];
5352 REQUIRE(VALID_RBTDB(rbtdb));
5355 * Caller must hold a tree lock.
5359 isc_stdtime_get(&now);
5361 if (isc_mem_isovermem(rbtdb->common.mctx)) {
5364 isc_random_get(&val);
5366 * XXXDCL Could stand to have a better policy, like LRU.
5368 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5371 * Note that 'log' can be true IFF overmem is also true.
5372 * overmem can currently only be true for cache
5373 * databases -- hence all of the "overmem cache" log strings.
5375 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5377 isc_log_write(dns_lctx, category, module, level,
5378 "overmem cache: %s %s",
5379 force_expire ? "FORCE" : "check",
5380 dns_rbt_formatnodename(rbtnode,
5382 sizeof(printname)));
5386 * We may not need write access, but this code path is not performance
5387 * sensitive, so it should be okay to always lock as a writer.
5389 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5390 isc_rwlocktype_write);
5392 for (header = rbtnode->data; header != NULL; header = header->next)
5393 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5395 * We don't check if refcurrent(rbtnode) == 0 and try
5396 * to free like we do in cache_find(), because
5397 * refcurrent(rbtnode) must be non-zero. This is so
5398 * because 'node' is an argument to the function.
5400 header->attributes |= RDATASET_ATTR_STALE;
5403 isc_log_write(dns_lctx, category, module,
5404 level, "overmem cache: stale %s",
5406 } else if (force_expire) {
5407 if (! RETAIN(header)) {
5408 set_ttl(rbtdb, header, 0);
5409 header->attributes |= RDATASET_ATTR_STALE;
5412 isc_log_write(dns_lctx, category, module,
5413 level, "overmem cache: "
5414 "reprieve by RETAIN() %s",
5417 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5418 isc_log_write(dns_lctx, category, module, level,
5419 "overmem cache: saved %s", printname);
5421 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5422 isc_rwlocktype_write);
5424 return (ISC_R_SUCCESS);
5428 overmem(dns_db_t *db, isc_boolean_t overmem) {
5429 /* This is an empty callback. See adb.c:water() */
5438 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5439 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5440 dns_rbtnode_t *rbtnode = node;
5441 isc_boolean_t first;
5443 REQUIRE(VALID_RBTDB(rbtdb));
5445 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5446 isc_rwlocktype_read);
5448 fprintf(out, "node %p, %u references, locknum = %u\n",
5449 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5451 if (rbtnode->data != NULL) {
5452 rdatasetheader_t *current, *top_next;
5454 for (current = rbtnode->data; current != NULL;
5455 current = top_next) {
5456 top_next = current->next;
5458 fprintf(out, "\ttype %u", current->type);
5464 "\tserial = %lu, ttl = %u, "
5465 "trust = %u, attributes = %u, "
5467 (unsigned long)current->serial,
5470 current->attributes,
5472 current = current->down;
5473 } while (current != NULL);
5476 fprintf(out, "(empty)\n");
5478 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5479 isc_rwlocktype_read);
5483 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5485 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5486 rbtdb_dbiterator_t *rbtdbiter;
5488 REQUIRE(VALID_RBTDB(rbtdb));
5490 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5491 if (rbtdbiter == NULL)
5492 return (ISC_R_NOMEMORY);
5494 rbtdbiter->common.methods = &dbiterator_methods;
5495 rbtdbiter->common.db = NULL;
5496 dns_db_attach(db, &rbtdbiter->common.db);
5497 rbtdbiter->common.relative_names =
5498 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5499 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5500 rbtdbiter->common.cleaning = ISC_FALSE;
5501 rbtdbiter->paused = ISC_TRUE;
5502 rbtdbiter->tree_locked = isc_rwlocktype_none;
5503 rbtdbiter->result = ISC_R_SUCCESS;
5504 dns_fixedname_init(&rbtdbiter->name);
5505 dns_fixedname_init(&rbtdbiter->origin);
5506 rbtdbiter->node = NULL;
5507 rbtdbiter->delete = 0;
5508 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5509 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5510 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5511 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5512 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5513 if (rbtdbiter->nsec3only)
5514 rbtdbiter->current = &rbtdbiter->nsec3chain;
5516 rbtdbiter->current = &rbtdbiter->chain;
5518 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5520 return (ISC_R_SUCCESS);
5524 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5525 dns_rdatatype_t type, dns_rdatatype_t covers,
5526 isc_stdtime_t now, dns_rdataset_t *rdataset,
5527 dns_rdataset_t *sigrdataset)
5529 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5530 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5531 rdatasetheader_t *header, *header_next, *found, *foundsig;
5532 rbtdb_serial_t serial;
5533 rbtdb_version_t *rbtversion = version;
5534 isc_boolean_t close_version = ISC_FALSE;
5535 rbtdb_rdatatype_t matchtype, sigmatchtype;
5537 REQUIRE(VALID_RBTDB(rbtdb));
5538 REQUIRE(type != dns_rdatatype_any);
5540 if (rbtversion == NULL) {
5541 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5542 close_version = ISC_TRUE;
5544 serial = rbtversion->serial;
5547 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5548 isc_rwlocktype_read);
5552 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5554 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5558 for (header = rbtnode->data; header != NULL; header = header_next) {
5559 header_next = header->next;
5561 if (header->serial <= serial &&
5564 * Is this a "this rdataset doesn't
5567 if (NONEXISTENT(header))
5571 header = header->down;
5572 } while (header != NULL);
5573 if (header != NULL) {
5575 * We have an active, extant rdataset. If it's a
5576 * type we're looking for, remember it.
5578 if (header->type == matchtype) {
5580 if (foundsig != NULL)
5582 } else if (header->type == sigmatchtype) {
5589 if (found != NULL) {
5590 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5591 if (foundsig != NULL)
5592 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5596 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5597 isc_rwlocktype_read);
5600 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5604 return (ISC_R_NOTFOUND);
5606 return (ISC_R_SUCCESS);
5610 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5611 dns_rdatatype_t type, dns_rdatatype_t covers,
5612 isc_stdtime_t now, dns_rdataset_t *rdataset,
5613 dns_rdataset_t *sigrdataset)
5615 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5616 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5617 rdatasetheader_t *header, *header_next, *found, *foundsig;
5618 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5619 isc_result_t result;
5621 isc_rwlocktype_t locktype;
5623 REQUIRE(VALID_RBTDB(rbtdb));
5624 REQUIRE(type != dns_rdatatype_any);
5628 result = ISC_R_SUCCESS;
5631 isc_stdtime_get(&now);
5633 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5634 locktype = isc_rwlocktype_read;
5635 NODE_LOCK(lock, locktype);
5639 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5640 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5642 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5646 for (header = rbtnode->data; header != NULL; header = header_next) {
5647 header_next = header->next;
5648 if (header->rdh_ttl <= now) {
5649 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5650 (locktype == isc_rwlocktype_write ||
5651 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5653 * We update the node's status only when we
5654 * can get write access.
5656 locktype = isc_rwlocktype_write;
5659 * We don't check if refcurrent(rbtnode) == 0
5660 * and try to free like we do in cache_find(),
5661 * because refcurrent(rbtnode) must be
5662 * non-zero. This is so because 'node' is an
5663 * argument to the function.
5665 header->attributes |= RDATASET_ATTR_STALE;
5668 } else if (EXISTS(header)) {
5669 if (header->type == matchtype)
5671 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5672 header->type == negtype)
5674 else if (header->type == sigmatchtype)
5678 if (found != NULL) {
5679 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5680 if (foundsig != NULL)
5681 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5685 NODE_UNLOCK(lock, locktype);
5688 return (ISC_R_NOTFOUND);
5690 if (NEGATIVE(found)) {
5692 * We found a negative cache entry.
5694 if (NXDOMAIN(found))
5695 result = DNS_R_NCACHENXDOMAIN;
5697 result = DNS_R_NCACHENXRRSET;
5704 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5705 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5707 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5708 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5709 rbtdb_version_t *rbtversion = version;
5710 rbtdb_rdatasetiter_t *iterator;
5713 REQUIRE(VALID_RBTDB(rbtdb));
5715 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5716 if (iterator == NULL)
5717 return (ISC_R_NOMEMORY);
5719 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5721 if (rbtversion == NULL)
5723 (dns_dbversion_t **) (void *)(&rbtversion));
5727 isc_refcount_increment(&rbtversion->references,
5733 isc_stdtime_get(&now);
5737 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5738 iterator->common.methods = &rdatasetiter_methods;
5739 iterator->common.db = db;
5740 iterator->common.node = node;
5741 iterator->common.version = (dns_dbversion_t *)rbtversion;
5742 iterator->common.now = now;
5744 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5746 dns_rbtnode_refincrement(rbtnode, &refs);
5749 iterator->current = NULL;
5751 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5753 *iteratorp = (dns_rdatasetiter_t *)iterator;
5755 return (ISC_R_SUCCESS);
5758 static isc_boolean_t
5759 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5760 rdatasetheader_t *header, *header_next;
5761 isc_boolean_t cname, other_data;
5762 dns_rdatatype_t rdtype;
5765 * The caller must hold the node lock.
5769 * Look for CNAME and "other data" rdatasets active in our version.
5772 other_data = ISC_FALSE;
5773 for (header = node->data; header != NULL; header = header_next) {
5774 header_next = header->next;
5775 if (header->type == dns_rdatatype_cname) {
5777 * Look for an active extant CNAME.
5780 if (header->serial <= serial &&
5783 * Is this a "this rdataset doesn't
5786 if (NONEXISTENT(header))
5790 header = header->down;
5791 } while (header != NULL);
5796 * Look for active extant "other data".
5798 * "Other data" is any rdataset whose type is not
5799 * KEY, NSEC, SIG or RRSIG.
5801 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5802 if (rdtype != dns_rdatatype_key &&
5803 rdtype != dns_rdatatype_sig &&
5804 rdtype != dns_rdatatype_nsec &&
5805 rdtype != dns_rdatatype_rrsig) {
5807 * Is it active and extant?
5810 if (header->serial <= serial &&
5813 * Is this a "this rdataset
5814 * doesn't exist" record?
5816 if (NONEXISTENT(header))
5820 header = header->down;
5821 } while (header != NULL);
5823 other_data = ISC_TRUE;
5828 if (cname && other_data)
5835 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5836 isc_result_t result;
5838 INSIST(!IS_CACHE(rbtdb));
5839 INSIST(newheader->heap_index == 0);
5840 INSIST(!ISC_LINK_LINKED(newheader, link));
5842 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5847 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5848 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5849 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5851 rbtdb_changed_t *changed = NULL;
5852 rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5853 unsigned char *merged;
5854 isc_result_t result;
5855 isc_boolean_t header_nx;
5856 isc_boolean_t newheader_nx;
5857 isc_boolean_t merge;
5858 dns_rdatatype_t rdtype, covers;
5859 rbtdb_rdatatype_t negtype, sigtype;
5864 * Add an rdatasetheader_t to a node.
5868 * Caller must be holding the node lock.
5871 if ((options & DNS_DBADD_MERGE) != 0) {
5872 REQUIRE(rbtversion != NULL);
5877 if ((options & DNS_DBADD_FORCE) != 0)
5878 trust = dns_trust_ultimate;
5880 trust = newheader->trust;
5882 if (rbtversion != NULL && !loading) {
5884 * We always add a changed record, even if no changes end up
5885 * being made to this node, because it's harmless and
5886 * simplifies the code.
5888 changed = add_changed(rbtdb, rbtversion, rbtnode);
5889 if (changed == NULL) {
5890 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5891 return (ISC_R_NOMEMORY);
5895 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5896 topheader_prev = NULL;
5899 if (rbtversion == NULL && !newheader_nx) {
5900 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5901 if (NEGATIVE(newheader)) {
5903 * We're adding a negative cache entry.
5905 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5906 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5908 for (topheader = rbtnode->data;
5910 topheader = topheader->next) {
5912 * If we're adding an negative cache entry
5913 * which covers all types (NXDOMAIN,
5914 * NODATA(QTYPE=ANY)).
5916 * We make all other data stale so that the
5917 * only rdataset that can be found at this
5918 * node is the negative cache entry.
5920 * Otherwise look for any RRSIGs of the
5921 * given type so they can be marked stale
5924 if (covers == dns_rdatatype_any) {
5925 set_ttl(rbtdb, topheader, 0);
5926 topheader->attributes |=
5927 RDATASET_ATTR_STALE;
5929 } else if (topheader->type == sigtype)
5930 sigheader = topheader;
5932 if (covers == dns_rdatatype_any)
5934 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5937 * We're adding something that isn't a
5938 * negative cache entry. Look for an extant
5939 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5942 for (topheader = rbtnode->data;
5944 topheader = topheader->next) {
5945 if (topheader->type ==
5946 RBTDB_RDATATYPE_NCACHEANY)
5949 if (topheader != NULL && EXISTS(topheader) &&
5950 topheader->rdh_ttl > now) {
5954 if (trust < topheader->trust) {
5956 * The NXDOMAIN/NODATA(QTYPE=ANY)
5959 free_rdataset(rbtdb,
5962 if (addedrdataset != NULL)
5963 bind_rdataset(rbtdb, rbtnode,
5966 return (DNS_R_UNCHANGED);
5969 * The new rdataset is better. Expire the
5970 * NXDOMAIN/NODATA(QTYPE=ANY).
5972 set_ttl(rbtdb, topheader, 0);
5973 topheader->attributes |= RDATASET_ATTR_STALE;
5978 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5982 for (topheader = rbtnode->data;
5984 topheader = topheader->next) {
5985 if (topheader->type == newheader->type ||
5986 topheader->type == negtype)
5988 topheader_prev = topheader;
5993 * If header isn't NULL, we've found the right type. There may be
5994 * IGNORE rdatasets between the top of the chain and the first real
5995 * data. We skip over them.
5998 while (header != NULL && IGNORE(header))
5999 header = header->down;
6000 if (header != NULL) {
6001 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6004 * Deleting an already non-existent rdataset has no effect.
6006 if (header_nx && newheader_nx) {
6007 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6008 return (DNS_R_UNCHANGED);
6012 * Trying to add an rdataset with lower trust to a cache DB
6013 * has no effect, provided that the cache data isn't stale.
6015 if (rbtversion == NULL && trust < header->trust &&
6016 (header->rdh_ttl > now || header_nx)) {
6017 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6018 if (addedrdataset != NULL)
6019 bind_rdataset(rbtdb, rbtnode, header, now,
6021 return (DNS_R_UNCHANGED);
6025 * Don't merge if a nonexistent rdataset is involved.
6027 if (merge && (header_nx || newheader_nx))
6031 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6032 * that is the union of 'newheader' and 'header'.
6035 unsigned int flags = 0;
6036 INSIST(rbtversion->serial >= header->serial);
6038 result = ISC_R_SUCCESS;
6040 if ((options & DNS_DBADD_EXACT) != 0)
6041 flags |= DNS_RDATASLAB_EXACT;
6042 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6043 newheader->rdh_ttl != header->rdh_ttl)
6044 result = DNS_R_NOTEXACT;
6045 else if (newheader->rdh_ttl != header->rdh_ttl)
6046 flags |= DNS_RDATASLAB_FORCE;
6047 if (result == ISC_R_SUCCESS)
6048 result = dns_rdataslab_merge(
6049 (unsigned char *)header,
6050 (unsigned char *)newheader,
6051 (unsigned int)(sizeof(*newheader)),
6053 rbtdb->common.rdclass,
6054 (dns_rdatatype_t)header->type,
6056 if (result == ISC_R_SUCCESS) {
6058 * If 'header' has the same serial number as
6059 * we do, we could clean it up now if we knew
6060 * that our caller had no references to it.
6061 * We don't know this, however, so we leave it
6062 * alone. It will get cleaned up when
6063 * clean_zone_node() runs.
6065 free_rdataset(rbtdb, rbtdb->common.mctx,
6067 newheader = (rdatasetheader_t *)merged;
6068 init_rdataset(rbtdb, newheader);
6069 if (loading && RESIGN(newheader) &&
6071 header->resign < newheader->resign)
6072 newheader->resign = header->resign;
6074 free_rdataset(rbtdb, rbtdb->common.mctx,
6080 * Don't replace existing NS, A and AAAA RRsets
6081 * in the cache if they are already exist. This
6082 * prevents named being locked to old servers.
6083 * Don't lower trust of existing record if the
6086 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6087 header->type == dns_rdatatype_ns &&
6088 !header_nx && !newheader_nx &&
6089 header->trust >= newheader->trust &&
6090 dns_rdataslab_equalx((unsigned char *)header,
6091 (unsigned char *)newheader,
6092 (unsigned int)(sizeof(*newheader)),
6093 rbtdb->common.rdclass,
6094 (dns_rdatatype_t)header->type)) {
6096 * Honour the new ttl if it is less than the
6099 if (header->rdh_ttl > newheader->rdh_ttl)
6100 set_ttl(rbtdb, header, newheader->rdh_ttl);
6101 if (header->noqname == NULL &&
6102 newheader->noqname != NULL) {
6103 header->noqname = newheader->noqname;
6104 newheader->noqname = NULL;
6106 if (header->closest == NULL &&
6107 newheader->closest != NULL) {
6108 header->closest = newheader->closest;
6109 newheader->closest = NULL;
6111 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6112 if (addedrdataset != NULL)
6113 bind_rdataset(rbtdb, rbtnode, header, now,
6115 return (ISC_R_SUCCESS);
6117 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6118 (header->type == dns_rdatatype_a ||
6119 header->type == dns_rdatatype_aaaa) &&
6120 !header_nx && !newheader_nx &&
6121 header->trust >= newheader->trust &&
6122 dns_rdataslab_equal((unsigned char *)header,
6123 (unsigned char *)newheader,
6124 (unsigned int)(sizeof(*newheader)))) {
6126 * Honour the new ttl if it is less than the
6129 if (header->rdh_ttl > newheader->rdh_ttl)
6130 set_ttl(rbtdb, header, newheader->rdh_ttl);
6131 if (header->noqname == NULL &&
6132 newheader->noqname != NULL) {
6133 header->noqname = newheader->noqname;
6134 newheader->noqname = NULL;
6136 if (header->closest == NULL &&
6137 newheader->closest != NULL) {
6138 header->closest = newheader->closest;
6139 newheader->closest = NULL;
6141 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6142 if (addedrdataset != NULL)
6143 bind_rdataset(rbtdb, rbtnode, header, now,
6145 return (ISC_R_SUCCESS);
6147 INSIST(rbtversion == NULL ||
6148 rbtversion->serial >= topheader->serial);
6149 if (topheader_prev != NULL)
6150 topheader_prev->next = newheader;
6152 rbtnode->data = newheader;
6153 newheader->next = topheader->next;
6156 * There are no other references to 'header' when
6157 * loading, so we MAY clean up 'header' now.
6158 * Since we don't generate changed records when
6159 * loading, we MUST clean up 'header' now.
6161 newheader->down = NULL;
6162 free_rdataset(rbtdb, rbtdb->common.mctx, header);
6164 newheader->down = topheader;
6165 topheader->next = newheader;
6167 if (changed != NULL)
6168 changed->dirty = ISC_TRUE;
6169 if (rbtversion == NULL) {
6170 set_ttl(rbtdb, header, 0);
6171 header->attributes |= RDATASET_ATTR_STALE;
6172 if (sigheader != NULL) {
6173 set_ttl(rbtdb, sigheader, 0);
6174 sigheader->attributes |=
6175 RDATASET_ATTR_STALE;
6178 idx = newheader->node->locknum;
6179 if (IS_CACHE(rbtdb)) {
6180 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6183 * XXXMLG We don't check the return value
6184 * here. If it fails, we will not do TTL
6185 * based expiry on this node. However, we
6186 * will do it on the LRU side, so memory
6187 * will not leak... for long.
6189 isc_heap_insert(rbtdb->heaps[idx], newheader);
6190 } else if (RESIGN(newheader))
6191 resign_insert(rbtdb, idx, newheader);
6195 * No non-IGNORED rdatasets of the given type exist at
6200 * If we're trying to delete the type, don't bother.
6203 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6204 return (DNS_R_UNCHANGED);
6207 if (topheader != NULL) {
6209 * We have an list of rdatasets of the given type,
6210 * but they're all marked IGNORE. We simply insert
6211 * the new rdataset at the head of the list.
6213 * Ignored rdatasets cannot occur during loading, so
6217 INSIST(rbtversion == NULL ||
6218 rbtversion->serial >= topheader->serial);
6219 if (topheader_prev != NULL)
6220 topheader_prev->next = newheader;
6222 rbtnode->data = newheader;
6223 newheader->next = topheader->next;
6224 newheader->down = topheader;
6225 topheader->next = newheader;
6227 if (changed != NULL)
6228 changed->dirty = ISC_TRUE;
6231 * No rdatasets of the given type exist at the node.
6233 newheader->next = rbtnode->data;
6234 newheader->down = NULL;
6235 rbtnode->data = newheader;
6237 idx = newheader->node->locknum;
6238 if (IS_CACHE(rbtdb)) {
6239 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6241 isc_heap_insert(rbtdb->heaps[idx], newheader);
6242 } else if (RESIGN(newheader)) {
6243 resign_insert(rbtdb, idx, newheader);
6248 * Check if the node now contains CNAME and other data.
6250 if (rbtversion != NULL &&
6251 cname_and_other_data(rbtnode, rbtversion->serial))
6252 return (DNS_R_CNAMEANDOTHER);
6254 if (addedrdataset != NULL)
6255 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6257 return (ISC_R_SUCCESS);
6260 static inline isc_boolean_t
6261 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6262 rbtdb_rdatatype_t type)
6264 if (IS_CACHE(rbtdb)) {
6265 if (type == dns_rdatatype_dname)
6269 } else if (type == dns_rdatatype_dname ||
6270 (type == dns_rdatatype_ns &&
6271 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6276 static inline isc_result_t
6277 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6278 dns_rdataset_t *rdataset)
6280 struct noqname *noqname;
6281 isc_mem_t *mctx = rbtdb->common.mctx;
6283 dns_rdataset_t neg, negsig;
6284 isc_result_t result;
6287 dns_name_init(&name, NULL);
6288 dns_rdataset_init(&neg);
6289 dns_rdataset_init(&negsig);
6291 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6292 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6294 noqname = isc_mem_get(mctx, sizeof(*noqname));
6295 if (noqname == NULL) {
6296 result = ISC_R_NOMEMORY;
6299 dns_name_init(&noqname->name, NULL);
6300 noqname->neg = NULL;
6301 noqname->negsig = NULL;
6302 noqname->type = neg.type;
6303 result = dns_name_dup(&name, mctx, &noqname->name);
6304 if (result != ISC_R_SUCCESS)
6306 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6307 if (result != ISC_R_SUCCESS)
6309 noqname->neg = r.base;
6310 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6311 if (result != ISC_R_SUCCESS)
6313 noqname->negsig = r.base;
6314 dns_rdataset_disassociate(&neg);
6315 dns_rdataset_disassociate(&negsig);
6316 newheader->noqname = noqname;
6317 return (ISC_R_SUCCESS);
6320 dns_rdataset_disassociate(&neg);
6321 dns_rdataset_disassociate(&negsig);
6322 free_noqname(mctx, &noqname);
6326 static inline isc_result_t
6327 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6328 dns_rdataset_t *rdataset)
6330 struct noqname *closest;
6331 isc_mem_t *mctx = rbtdb->common.mctx;
6333 dns_rdataset_t neg, negsig;
6334 isc_result_t result;
6337 dns_name_init(&name, NULL);
6338 dns_rdataset_init(&neg);
6339 dns_rdataset_init(&negsig);
6341 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6342 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6344 closest = isc_mem_get(mctx, sizeof(*closest));
6345 if (closest == NULL) {
6346 result = ISC_R_NOMEMORY;
6349 dns_name_init(&closest->name, NULL);
6350 closest->neg = NULL;
6351 closest->negsig = NULL;
6352 closest->type = neg.type;
6353 result = dns_name_dup(&name, mctx, &closest->name);
6354 if (result != ISC_R_SUCCESS)
6356 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6357 if (result != ISC_R_SUCCESS)
6359 closest->neg = r.base;
6360 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6361 if (result != ISC_R_SUCCESS)
6363 closest->negsig = r.base;
6364 dns_rdataset_disassociate(&neg);
6365 dns_rdataset_disassociate(&negsig);
6366 newheader->closest = closest;
6367 return (ISC_R_SUCCESS);
6370 dns_rdataset_disassociate(&neg);
6371 dns_rdataset_disassociate(&negsig);
6372 free_noqname(mctx, &closest);
6376 static dns_dbmethods_t zone_methods;
6379 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6380 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6381 dns_rdataset_t *addedrdataset)
6383 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6384 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6385 rbtdb_version_t *rbtversion = version;
6386 isc_region_t region;
6387 rdatasetheader_t *newheader;
6388 rdatasetheader_t *header;
6389 isc_result_t result;
6390 isc_boolean_t delegating;
6391 isc_boolean_t newnsec;
6392 isc_boolean_t tree_locked = ISC_FALSE;
6393 isc_boolean_t cache_is_overmem = ISC_FALSE;
6395 REQUIRE(VALID_RBTDB(rbtdb));
6397 if (rbtdb->common.methods == &zone_methods)
6398 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6399 (rdataset->type == dns_rdatatype_nsec3 ||
6400 rdataset->covers == dns_rdatatype_nsec3)) ||
6401 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6402 rdataset->type != dns_rdatatype_nsec3 &&
6403 rdataset->covers != dns_rdatatype_nsec3)));
6405 if (rbtversion == NULL) {
6407 isc_stdtime_get(&now);
6411 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6413 sizeof(rdatasetheader_t));
6414 if (result != ISC_R_SUCCESS)
6417 newheader = (rdatasetheader_t *)region.base;
6418 init_rdataset(rbtdb, newheader);
6419 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6420 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6422 newheader->attributes = 0;
6423 newheader->noqname = NULL;
6424 newheader->closest = NULL;
6425 newheader->count = init_count++;
6426 newheader->trust = rdataset->trust;
6427 newheader->additional_auth = NULL;
6428 newheader->additional_glue = NULL;
6429 newheader->last_used = now;
6430 newheader->node = rbtnode;
6431 if (rbtversion != NULL) {
6432 newheader->serial = rbtversion->serial;
6435 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6436 newheader->attributes |= RDATASET_ATTR_RESIGN;
6437 newheader->resign = rdataset->resign;
6439 newheader->resign = 0;
6441 newheader->serial = 1;
6442 newheader->resign = 0;
6443 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6444 newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6445 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6446 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6447 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6448 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6449 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6450 result = addnoqname(rbtdb, newheader, rdataset);
6451 if (result != ISC_R_SUCCESS) {
6452 free_rdataset(rbtdb, rbtdb->common.mctx,
6457 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6458 result = addclosest(rbtdb, newheader, rdataset);
6459 if (result != ISC_R_SUCCESS) {
6460 free_rdataset(rbtdb, rbtdb->common.mctx,
6468 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6469 * just DNAME for the cache), then we need to set the callback bit
6472 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6473 delegating = ISC_TRUE;
6475 delegating = ISC_FALSE;
6478 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6480 if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6481 rdataset->type == dns_rdatatype_nsec)
6484 newnsec = ISC_FALSE;
6487 * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6488 * or the DB is a cache in an overmem state, hold an exclusive lock on
6489 * the tree. In the latter case the lock does not necessarily have to
6490 * be acquired but it will help purge stale entries more effectively.
6492 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6493 cache_is_overmem = ISC_TRUE;
6494 if (delegating || newnsec || cache_is_overmem) {
6495 tree_locked = ISC_TRUE;
6496 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6499 if (cache_is_overmem)
6500 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6502 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6503 isc_rwlocktype_write);
6505 if (rbtdb->rrsetstats != NULL) {
6506 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6507 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6510 if (IS_CACHE(rbtdb)) {
6512 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6514 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6515 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6516 expire_header(rbtdb, header, tree_locked);
6519 * If we've been holding a write lock on the tree just for
6520 * cleaning, we can release it now. However, we still need the
6523 if (tree_locked && !delegating && !newnsec) {
6524 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6525 tree_locked = ISC_FALSE;
6529 result = ISC_R_SUCCESS;
6531 dns_fixedname_t fname;
6533 dns_rbtnode_t *nsecnode;
6535 dns_fixedname_init(&fname);
6536 name = dns_fixedname_name(&fname);
6537 dns_rbt_fullnamefromnode(rbtnode, name);
6539 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6540 if (result == ISC_R_SUCCESS) {
6541 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6542 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6543 } else if (result == ISC_R_EXISTS) {
6544 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6545 result = ISC_R_SUCCESS;
6549 if (result == ISC_R_SUCCESS)
6550 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6551 ISC_FALSE, addedrdataset, now);
6552 if (result == ISC_R_SUCCESS && delegating)
6553 rbtnode->find_callback = 1;
6555 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6556 isc_rwlocktype_write);
6559 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6562 * Update the zone's secure status. If version is non-NULL
6563 * this is deferred until closeversion() is called.
6565 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6566 iszonesecure(db, version, rbtdb->origin_node);
6572 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6573 dns_rdataset_t *rdataset, unsigned int options,
6574 dns_rdataset_t *newrdataset)
6576 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6577 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6578 rbtdb_version_t *rbtversion = version;
6579 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6580 unsigned char *subresult;
6581 isc_region_t region;
6582 isc_result_t result;
6583 rbtdb_changed_t *changed;
6585 REQUIRE(VALID_RBTDB(rbtdb));
6587 if (rbtdb->common.methods == &zone_methods)
6588 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6589 (rdataset->type == dns_rdatatype_nsec3 ||
6590 rdataset->covers == dns_rdatatype_nsec3)) ||
6591 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6592 rdataset->type != dns_rdatatype_nsec3 &&
6593 rdataset->covers != dns_rdatatype_nsec3)));
6595 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6597 sizeof(rdatasetheader_t));
6598 if (result != ISC_R_SUCCESS)
6600 newheader = (rdatasetheader_t *)region.base;
6601 init_rdataset(rbtdb, newheader);
6602 set_ttl(rbtdb, newheader, rdataset->ttl);
6603 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6605 newheader->attributes = 0;
6606 newheader->serial = rbtversion->serial;
6607 newheader->trust = 0;
6608 newheader->noqname = NULL;
6609 newheader->closest = NULL;
6610 newheader->count = init_count++;
6611 newheader->additional_auth = NULL;
6612 newheader->additional_glue = NULL;
6613 newheader->last_used = 0;
6614 newheader->node = rbtnode;
6615 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6616 newheader->attributes |= RDATASET_ATTR_RESIGN;
6617 newheader->resign = rdataset->resign;
6619 newheader->resign = 0;
6621 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6622 isc_rwlocktype_write);
6624 changed = add_changed(rbtdb, rbtversion, rbtnode);
6625 if (changed == NULL) {
6626 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6627 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6628 isc_rwlocktype_write);
6629 return (ISC_R_NOMEMORY);
6632 topheader_prev = NULL;
6633 for (topheader = rbtnode->data;
6635 topheader = topheader->next) {
6636 if (topheader->type == newheader->type)
6638 topheader_prev = topheader;
6641 * If header isn't NULL, we've found the right type. There may be
6642 * IGNORE rdatasets between the top of the chain and the first real
6643 * data. We skip over them.
6646 while (header != NULL && IGNORE(header))
6647 header = header->down;
6648 if (header != NULL && EXISTS(header)) {
6649 unsigned int flags = 0;
6651 result = ISC_R_SUCCESS;
6652 if ((options & DNS_DBSUB_EXACT) != 0) {
6653 flags |= DNS_RDATASLAB_EXACT;
6654 if (newheader->rdh_ttl != header->rdh_ttl)
6655 result = DNS_R_NOTEXACT;
6657 if (result == ISC_R_SUCCESS)
6658 result = dns_rdataslab_subtract(
6659 (unsigned char *)header,
6660 (unsigned char *)newheader,
6661 (unsigned int)(sizeof(*newheader)),
6663 rbtdb->common.rdclass,
6664 (dns_rdatatype_t)header->type,
6666 if (result == ISC_R_SUCCESS) {
6667 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6668 newheader = (rdatasetheader_t *)subresult;
6669 init_rdataset(rbtdb, newheader);
6671 * We have to set the serial since the rdataslab
6672 * subtraction routine copies the reserved portion of
6673 * header, not newheader.
6675 newheader->serial = rbtversion->serial;
6677 * XXXJT: dns_rdataslab_subtract() copied the pointers
6678 * to additional info. We need to clear these fields
6679 * to avoid having duplicated references.
6681 newheader->additional_auth = NULL;
6682 newheader->additional_glue = NULL;
6683 } else if (result == DNS_R_NXRRSET) {
6685 * This subtraction would remove all of the rdata;
6686 * add a nonexistent header instead.
6688 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6689 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6690 if (newheader == NULL) {
6691 result = ISC_R_NOMEMORY;
6694 set_ttl(rbtdb, newheader, 0);
6695 newheader->type = topheader->type;
6696 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6697 newheader->trust = 0;
6698 newheader->serial = rbtversion->serial;
6699 newheader->noqname = NULL;
6700 newheader->closest = NULL;
6701 newheader->count = 0;
6702 newheader->additional_auth = NULL;
6703 newheader->additional_glue = NULL;
6704 newheader->node = rbtnode;
6705 newheader->resign = 0;
6706 newheader->last_used = 0;
6708 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6713 * If we're here, we want to link newheader in front of
6716 INSIST(rbtversion->serial >= topheader->serial);
6717 if (topheader_prev != NULL)
6718 topheader_prev->next = newheader;
6720 rbtnode->data = newheader;
6721 newheader->next = topheader->next;
6722 newheader->down = topheader;
6723 topheader->next = newheader;
6725 changed->dirty = ISC_TRUE;
6728 * The rdataset doesn't exist, so we don't need to do anything
6729 * to satisfy the deletion request.
6731 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6732 if ((options & DNS_DBSUB_EXACT) != 0)
6733 result = DNS_R_NOTEXACT;
6735 result = DNS_R_UNCHANGED;
6738 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6739 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6742 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6743 isc_rwlocktype_write);
6746 * Update the zone's secure status. If version is non-NULL
6747 * this is deferred until closeversion() is called.
6749 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6750 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6756 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6757 dns_rdatatype_t type, dns_rdatatype_t covers)
6759 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6760 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6761 rbtdb_version_t *rbtversion = version;
6762 isc_result_t result;
6763 rdatasetheader_t *newheader;
6765 REQUIRE(VALID_RBTDB(rbtdb));
6767 if (type == dns_rdatatype_any)
6768 return (ISC_R_NOTIMPLEMENTED);
6769 if (type == dns_rdatatype_rrsig && covers == 0)
6770 return (ISC_R_NOTIMPLEMENTED);
6772 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6773 if (newheader == NULL)
6774 return (ISC_R_NOMEMORY);
6775 set_ttl(rbtdb, newheader, 0);
6776 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6777 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6778 newheader->trust = 0;
6779 newheader->noqname = NULL;
6780 newheader->closest = NULL;
6781 newheader->additional_auth = NULL;
6782 newheader->additional_glue = NULL;
6783 if (rbtversion != NULL)
6784 newheader->serial = rbtversion->serial;
6786 newheader->serial = 0;
6787 newheader->count = 0;
6788 newheader->last_used = 0;
6789 newheader->node = rbtnode;
6791 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6792 isc_rwlocktype_write);
6794 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6795 ISC_FALSE, NULL, 0);
6797 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6798 isc_rwlocktype_write);
6801 * Update the zone's secure status. If version is non-NULL
6802 * this is deferred until closeversion() is called.
6804 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6805 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6811 * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6814 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6815 isc_boolean_t hasnsec)
6817 isc_result_t noderesult, nsecresult;
6818 dns_rbtnode_t *nsecnode;
6820 noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6823 if (noderesult == ISC_R_SUCCESS)
6824 dns_rpz_cidr_addip(rbtdb->rpz_cidr, name);
6828 return (noderesult);
6829 if (noderesult == ISC_R_EXISTS) {
6831 * Add a node to the auxiliary NSEC tree for an old node
6832 * just now getting an NSEC record.
6834 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6835 return (noderesult);
6836 } else if (noderesult != ISC_R_SUCCESS) {
6837 return (noderesult);
6841 * Build the auxiliary tree for NSECs as we go.
6842 * This tree speeds searches for closest NSECs that would otherwise
6843 * need to examine many irrelevant nodes in large TLDs.
6845 * Add nodes to the auxiliary tree after corresponding nodes have
6846 * been added to the main tree.
6849 nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6850 if (nsecresult == ISC_R_SUCCESS) {
6851 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6852 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6853 return (noderesult);
6856 if (nsecresult == ISC_R_EXISTS) {
6858 isc_log_write(dns_lctx,
6859 DNS_LOGCATEGORY_DATABASE,
6860 DNS_LOGMODULE_CACHE,
6862 "addnode: NSEC node already exists");
6864 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6865 return (noderesult);
6868 nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6869 if (nsecresult != ISC_R_SUCCESS)
6870 isc_log_write(dns_lctx,
6871 DNS_LOGCATEGORY_DATABASE,
6872 DNS_LOGMODULE_CACHE,
6874 "loading_addrdataset: "
6875 "dns_rbt_deletenode: %s after "
6876 "dns_rbt_addnode(NSEC): %s",
6877 isc_result_totext(nsecresult),
6878 isc_result_totext(noderesult));
6879 return (noderesult);
6883 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6884 rbtdb_load_t *loadctx = arg;
6885 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6886 dns_rbtnode_t *node;
6887 isc_result_t result;
6888 isc_region_t region;
6889 rdatasetheader_t *newheader;
6892 * This routine does no node locking. See comments in
6893 * 'load' below for more information on loading and
6899 * SOA records are only allowed at top of zone.
6901 if (rdataset->type == dns_rdatatype_soa &&
6902 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6903 return (DNS_R_NOTZONETOP);
6905 if (rdataset->type != dns_rdatatype_nsec3 &&
6906 rdataset->covers != dns_rdatatype_nsec3)
6907 add_empty_wildcards(rbtdb, name);
6909 if (dns_name_iswildcard(name)) {
6911 * NS record owners cannot legally be wild cards.
6913 if (rdataset->type == dns_rdatatype_ns)
6914 return (DNS_R_INVALIDNS);
6916 * NSEC3 record owners cannot legally be wild cards.
6918 if (rdataset->type == dns_rdatatype_nsec3)
6919 return (DNS_R_INVALIDNSEC3);
6920 result = add_wildcard_magic(rbtdb, name);
6921 if (result != ISC_R_SUCCESS)
6926 if (rdataset->type == dns_rdatatype_nsec3 ||
6927 rdataset->covers == dns_rdatatype_nsec3) {
6928 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6929 if (result == ISC_R_SUCCESS)
6930 node->nsec = DNS_RBT_NSEC_NSEC3;
6931 } else if (rdataset->type == dns_rdatatype_nsec) {
6932 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6934 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6936 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6938 if (result == ISC_R_SUCCESS) {
6939 dns_name_t foundname;
6940 dns_name_init(&foundname, NULL);
6941 dns_rbt_namefromnode(node, &foundname);
6942 #ifdef DNS_RBT_USEHASH
6943 node->locknum = node->hashval % rbtdb->node_lock_count;
6945 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6946 rbtdb->node_lock_count;
6950 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6952 sizeof(rdatasetheader_t));
6953 if (result != ISC_R_SUCCESS)
6955 newheader = (rdatasetheader_t *)region.base;
6956 init_rdataset(rbtdb, newheader);
6957 set_ttl(rbtdb, newheader,
6958 rdataset->ttl + loadctx->now); /* XXX overflow check */
6959 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6961 newheader->attributes = 0;
6962 newheader->trust = rdataset->trust;
6963 newheader->serial = 1;
6964 newheader->noqname = NULL;
6965 newheader->closest = NULL;
6966 newheader->count = init_count++;
6967 newheader->additional_auth = NULL;
6968 newheader->additional_glue = NULL;
6969 newheader->last_used = 0;
6970 newheader->node = node;
6971 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6972 newheader->attributes |= RDATASET_ATTR_RESIGN;
6973 newheader->resign = rdataset->resign;
6975 newheader->resign = 0;
6977 result = add(rbtdb, node, rbtdb->current_version, newheader,
6978 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6979 if (result == ISC_R_SUCCESS &&
6980 delegating_type(rbtdb, node, rdataset->type))
6981 node->find_callback = 1;
6982 else if (result == DNS_R_UNCHANGED)
6983 result = ISC_R_SUCCESS;
6989 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6990 rbtdb_load_t *loadctx;
6993 rbtdb = (dns_rbtdb_t *)db;
6995 REQUIRE(VALID_RBTDB(rbtdb));
6997 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6998 if (loadctx == NULL)
6999 return (ISC_R_NOMEMORY);
7001 loadctx->rbtdb = rbtdb;
7002 if (IS_CACHE(rbtdb))
7003 isc_stdtime_get(&loadctx->now);
7007 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7009 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7011 rbtdb->attributes |= RBTDB_ATTR_LOADING;
7013 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7015 *addp = loading_addrdataset;
7018 return (ISC_R_SUCCESS);
7022 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
7023 rbtdb_load_t *loadctx;
7024 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7026 REQUIRE(VALID_RBTDB(rbtdb));
7027 REQUIRE(dbloadp != NULL);
7029 REQUIRE(loadctx->rbtdb == rbtdb);
7031 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7033 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7034 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7036 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7037 rbtdb->attributes |= RBTDB_ATTR_LOADED;
7039 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7042 * If there's a KEY rdataset at the zone origin containing a
7043 * zone key, we consider the zone secure.
7045 if (! IS_CACHE(rbtdb))
7046 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7050 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7052 return (ISC_R_SUCCESS);
7056 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7057 dns_masterformat_t masterformat) {
7060 rbtdb = (dns_rbtdb_t *)db;
7062 REQUIRE(VALID_RBTDB(rbtdb));
7065 return (dns_master_dump2(rbtdb->common.mctx, db, version,
7066 &dns_master_style_default,
7067 filename, masterformat));
7071 UNUSED(masterformat);
7073 return (ISC_R_NOTIMPLEMENTED);
7078 delete_callback(void *data, void *arg) {
7079 dns_rbtdb_t *rbtdb = arg;
7080 rdatasetheader_t *current, *next;
7081 unsigned int locknum;
7084 locknum = current->node->locknum;
7085 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7086 while (current != NULL) {
7087 next = current->next;
7088 free_rdataset(rbtdb, rbtdb->common.mctx, current);
7091 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7094 static isc_boolean_t
7095 issecure(dns_db_t *db) {
7097 isc_boolean_t secure;
7099 rbtdb = (dns_rbtdb_t *)db;
7101 REQUIRE(VALID_RBTDB(rbtdb));
7103 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7104 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7105 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7110 static isc_boolean_t
7111 isdnssec(dns_db_t *db) {
7113 isc_boolean_t dnssec;
7115 rbtdb = (dns_rbtdb_t *)db;
7117 REQUIRE(VALID_RBTDB(rbtdb));
7119 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7120 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7121 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7127 nodecount(dns_db_t *db) {
7131 rbtdb = (dns_rbtdb_t *)db;
7133 REQUIRE(VALID_RBTDB(rbtdb));
7135 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7136 count = dns_rbt_nodecount(rbtdb->tree);
7137 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7143 settask(dns_db_t *db, isc_task_t *task) {
7146 rbtdb = (dns_rbtdb_t *)db;
7148 REQUIRE(VALID_RBTDB(rbtdb));
7150 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7151 if (rbtdb->task != NULL)
7152 isc_task_detach(&rbtdb->task);
7154 isc_task_attach(task, &rbtdb->task);
7155 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7158 static isc_boolean_t
7159 ispersistent(dns_db_t *db) {
7165 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7166 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7167 dns_rbtnode_t *onode;
7168 isc_result_t result = ISC_R_SUCCESS;
7170 REQUIRE(VALID_RBTDB(rbtdb));
7171 REQUIRE(nodep != NULL && *nodep == NULL);
7173 /* Note that the access to origin_node doesn't require a DB lock */
7174 onode = (dns_rbtnode_t *)rbtdb->origin_node;
7175 if (onode != NULL) {
7176 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7177 new_reference(rbtdb, onode);
7178 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7180 *nodep = rbtdb->origin_node;
7182 INSIST(IS_CACHE(rbtdb));
7183 result = ISC_R_NOTFOUND;
7190 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7191 isc_uint8_t *flags, isc_uint16_t *iterations,
7192 unsigned char *salt, size_t *salt_length)
7195 isc_result_t result = ISC_R_NOTFOUND;
7196 rbtdb_version_t *rbtversion = version;
7198 rbtdb = (dns_rbtdb_t *)db;
7200 REQUIRE(VALID_RBTDB(rbtdb));
7202 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7204 if (rbtversion == NULL)
7205 rbtversion = rbtdb->current_version;
7207 if (rbtversion->havensec3) {
7209 *hash = rbtversion->hash;
7210 if (salt != NULL && salt_length != NULL) {
7211 REQUIRE(*salt_length >= rbtversion->salt_length);
7212 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
7214 if (salt_length != NULL)
7215 *salt_length = rbtversion->salt_length;
7216 if (iterations != NULL)
7217 *iterations = rbtversion->iterations;
7219 *flags = rbtversion->flags;
7220 result = ISC_R_SUCCESS;
7222 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7228 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7229 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7230 isc_stdtime_t oldresign;
7231 isc_result_t result = ISC_R_SUCCESS;
7232 rdatasetheader_t *header;
7234 REQUIRE(VALID_RBTDB(rbtdb));
7235 REQUIRE(!IS_CACHE(rbtdb));
7236 REQUIRE(rdataset != NULL);
7238 header = rdataset->private3;
7241 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7242 isc_rwlocktype_write);
7244 oldresign = header->resign;
7245 header->resign = resign;
7246 if (header->heap_index != 0) {
7247 INSIST(RESIGN(header));
7249 isc_heap_delete(rbtdb->heaps[header->node->locknum],
7250 header->heap_index);
7251 header->heap_index = 0;
7252 } else if (resign < oldresign)
7253 isc_heap_increased(rbtdb->heaps[header->node->locknum],
7254 header->heap_index);
7255 else if (resign > oldresign)
7256 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7257 header->heap_index);
7258 } else if (resign && header->heap_index == 0) {
7259 header->attributes |= RDATASET_ATTR_RESIGN;
7260 result = resign_insert(rbtdb, header->node->locknum, header);
7262 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7263 isc_rwlocktype_write);
7268 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7269 dns_name_t *foundname)
7271 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7272 rdatasetheader_t *header = NULL, *this;
7274 isc_result_t result = ISC_R_NOTFOUND;
7275 unsigned int locknum;
7277 REQUIRE(VALID_RBTDB(rbtdb));
7279 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7281 for (i = 0; i < rbtdb->node_lock_count; i++) {
7282 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7283 this = isc_heap_element(rbtdb->heaps[i], 1);
7285 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7286 isc_rwlocktype_read);
7291 else if (isc_serial_lt(this->resign, header->resign)) {
7292 locknum = header->node->locknum;
7293 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7294 isc_rwlocktype_read);
7297 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7298 isc_rwlocktype_read);
7304 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7306 if (foundname != NULL)
7307 dns_rbt_fullnamefromnode(header->node, foundname);
7309 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7310 isc_rwlocktype_read);
7312 result = ISC_R_SUCCESS;
7315 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7321 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7323 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7324 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7325 dns_rbtnode_t *node;
7326 rdatasetheader_t *header;
7328 REQUIRE(VALID_RBTDB(rbtdb));
7329 REQUIRE(rdataset != NULL);
7330 REQUIRE(rbtdb->future_version == rbtversion);
7331 REQUIRE(rbtversion->writer);
7333 node = rdataset->private2;
7334 header = rdataset->private3;
7337 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7338 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7339 isc_rwlocktype_write);
7341 * Delete from heap and save to re-signed list so that it can
7342 * be restored if we backout of this change.
7344 new_reference(rbtdb, node);
7345 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7346 header->heap_index = 0;
7347 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7349 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7350 isc_rwlocktype_write);
7351 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7354 static dns_stats_t *
7355 getrrsetstats(dns_db_t *db) {
7356 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7358 REQUIRE(VALID_RBTDB(rbtdb));
7359 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7361 return (rbtdb->rrsetstats);
7364 static dns_dbmethods_t zone_methods = {
7410 static dns_dbmethods_t cache_methods = {
7452 #ifdef DNS_RBTDB_VERSION64
7457 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7458 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7459 void *driverarg, dns_db_t **dbp)
7462 isc_result_t result;
7465 isc_boolean_t (*sooner)(void *, void *);
7467 /* Keep the compiler happy. */
7472 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7474 return (ISC_R_NOMEMORY);
7476 memset(rbtdb, '\0', sizeof(*rbtdb));
7477 dns_name_init(&rbtdb->common.origin, NULL);
7478 rbtdb->common.attributes = 0;
7479 if (type == dns_dbtype_cache) {
7480 rbtdb->common.methods = &cache_methods;
7481 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7482 } else if (type == dns_dbtype_stub) {
7483 rbtdb->common.methods = &zone_methods;
7484 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7486 rbtdb->common.methods = &zone_methods;
7487 rbtdb->common.rdclass = rdclass;
7488 rbtdb->common.mctx = NULL;
7490 result = RBTDB_INITLOCK(&rbtdb->lock);
7491 if (result != ISC_R_SUCCESS)
7494 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7495 if (result != ISC_R_SUCCESS)
7499 * Initialize node_lock_count in a generic way to support future
7500 * extension which allows the user to specify this value on creation.
7501 * Note that when specified for a cache DB it must be larger than 1
7502 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7504 if (rbtdb->node_lock_count == 0) {
7505 if (IS_CACHE(rbtdb))
7506 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7508 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7509 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7510 result = ISC_R_RANGE;
7511 goto cleanup_tree_lock;
7513 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7514 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7515 sizeof(rbtdb_nodelock_t));
7516 if (rbtdb->node_locks == NULL) {
7517 result = ISC_R_NOMEMORY;
7518 goto cleanup_tree_lock;
7521 rbtdb->rrsetstats = NULL;
7522 if (IS_CACHE(rbtdb)) {
7523 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7524 if (result != ISC_R_SUCCESS)
7525 goto cleanup_node_locks;
7526 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7527 sizeof(rdatasetheaderlist_t));
7528 if (rbtdb->rdatasets == NULL) {
7529 result = ISC_R_NOMEMORY;
7530 goto cleanup_rrsetstats;
7532 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7533 ISC_LIST_INIT(rbtdb->rdatasets[i]);
7535 rbtdb->rdatasets = NULL;
7540 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
7541 sizeof(isc_heap_t *));
7542 if (rbtdb->heaps == NULL) {
7543 result = ISC_R_NOMEMORY;
7544 goto cleanup_rdatasets;
7546 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7547 rbtdb->heaps[i] = NULL;
7548 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7549 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7550 result = isc_heap_create(mctx, sooner, set_index, 0,
7552 if (result != ISC_R_SUCCESS)
7557 * Create deadnode lists.
7559 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7560 sizeof(rbtnodelist_t));
7561 if (rbtdb->deadnodes == NULL) {
7562 result = ISC_R_NOMEMORY;
7565 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7566 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7568 rbtdb->active = rbtdb->node_lock_count;
7570 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7571 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7572 if (result == ISC_R_SUCCESS) {
7573 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7574 if (result != ISC_R_SUCCESS)
7575 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7577 if (result != ISC_R_SUCCESS) {
7579 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7580 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7581 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7583 goto cleanup_deadnodes;
7585 rbtdb->node_locks[i].exiting = ISC_FALSE;
7589 * Attach to the mctx. The database will persist so long as there
7590 * are references to it, and attaching to the mctx ensures that our
7591 * mctx won't disappear out from under us.
7593 isc_mem_attach(mctx, &rbtdb->common.mctx);
7596 * Must be initialized before free_rbtdb() is called.
7598 isc_ondestroy_init(&rbtdb->common.ondest);
7601 * Make a copy of the origin name.
7603 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7604 if (result != ISC_R_SUCCESS) {
7605 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7610 * Make the Red-Black Trees.
7612 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7613 if (result != ISC_R_SUCCESS) {
7614 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7618 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7619 if (result != ISC_R_SUCCESS) {
7620 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7624 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7625 if (result != ISC_R_SUCCESS) {
7626 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7632 * Get ready for response policy IP address searching if at least one
7633 * zone has been configured as a response policy zone and this
7634 * is not a cache zone.
7635 * It would be better to know that this database is for a policy
7636 * zone named for a view, but that would require knowledge from
7637 * above such as an argv[] set from data in the zone.
7639 if (type == dns_dbtype_zone && !dns_name_equal(origin, dns_rootname)) {
7640 result = dns_rpz_new_cidr(mctx, origin, &rbtdb->rpz_cidr);
7641 if (result != ISC_R_SUCCESS) {
7642 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7649 * In order to set the node callback bit correctly in zone databases,
7650 * we need to know if the node has the origin name of the zone.
7651 * In loading_addrdataset() we could simply compare the new name
7652 * to the origin name, but this is expensive. Also, we don't know the
7653 * node name in addrdataset(), so we need another way of knowing the
7656 * We now explicitly create a node for the zone's origin, and then
7657 * we simply remember the node's address. This is safe, because
7658 * the top-of-zone node can never be deleted, nor can its address
7661 if (!IS_CACHE(rbtdb)) {
7662 dns_rbtnode_t *nsec3node;
7664 rbtdb->origin_node = NULL;
7665 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7666 &rbtdb->origin_node);
7667 if (result != ISC_R_SUCCESS) {
7668 INSIST(result != ISC_R_EXISTS);
7669 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7672 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7674 * We need to give the origin node the right locknum.
7676 dns_name_init(&name, NULL);
7677 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7678 #ifdef DNS_RBT_USEHASH
7679 rbtdb->origin_node->locknum =
7680 rbtdb->origin_node->hashval %
7681 rbtdb->node_lock_count;
7683 rbtdb->origin_node->locknum =
7684 dns_name_hash(&name, ISC_TRUE) %
7685 rbtdb->node_lock_count;
7688 * Add an apex node to the NSEC3 tree so that NSEC3 searches
7689 * return partial matches when there is only a single NSEC3
7690 * record in the tree.
7693 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7695 if (result != ISC_R_SUCCESS) {
7696 INSIST(result != ISC_R_EXISTS);
7697 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7700 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
7702 * We need to give the nsec3 origin node the right locknum.
7704 dns_name_init(&name, NULL);
7705 dns_rbt_namefromnode(nsec3node, &name);
7706 #ifdef DNS_RBT_USEHASH
7707 nsec3node->locknum = nsec3node->hashval %
7708 rbtdb->node_lock_count;
7710 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7711 rbtdb->node_lock_count;
7716 * Misc. Initialization.
7718 result = isc_refcount_init(&rbtdb->references, 1);
7719 if (result != ISC_R_SUCCESS) {
7720 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7723 rbtdb->attributes = 0;
7727 * Version Initialization.
7729 rbtdb->current_serial = 1;
7730 rbtdb->least_serial = 1;
7731 rbtdb->next_serial = 2;
7732 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7733 if (rbtdb->current_version == NULL) {
7734 isc_refcount_decrement(&rbtdb->references, NULL);
7735 isc_refcount_destroy(&rbtdb->references);
7736 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7737 return (ISC_R_NOMEMORY);
7739 rbtdb->current_version->secure = dns_db_insecure;
7740 rbtdb->current_version->havensec3 = ISC_FALSE;
7741 rbtdb->current_version->flags = 0;
7742 rbtdb->current_version->iterations = 0;
7743 rbtdb->current_version->hash = 0;
7744 rbtdb->current_version->salt_length = 0;
7745 memset(rbtdb->current_version->salt, 0,
7746 sizeof(rbtdb->current_version->salt));
7747 rbtdb->future_version = NULL;
7748 ISC_LIST_INIT(rbtdb->open_versions);
7750 * Keep the current version in the open list so that list operation
7751 * won't happen in normal lookup operations.
7753 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7755 rbtdb->common.magic = DNS_DB_MAGIC;
7756 rbtdb->common.impmagic = RBTDB_MAGIC;
7758 *dbp = (dns_db_t *)rbtdb;
7760 return (ISC_R_SUCCESS);
7763 isc_mem_put(mctx, rbtdb->deadnodes,
7764 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7767 if (rbtdb->heaps != NULL) {
7768 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7769 if (rbtdb->heaps[i] != NULL)
7770 isc_heap_destroy(&rbtdb->heaps[i]);
7771 isc_mem_put(mctx, rbtdb->heaps,
7772 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7776 if (rbtdb->rdatasets != NULL)
7777 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7778 sizeof(rdatasetheaderlist_t));
7780 if (rbtdb->rrsetstats != NULL)
7781 dns_stats_detach(&rbtdb->rrsetstats);
7784 isc_mem_put(mctx, rbtdb->node_locks,
7785 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7788 isc_rwlock_destroy(&rbtdb->tree_lock);
7791 RBTDB_DESTROYLOCK(&rbtdb->lock);
7794 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7800 * Slabbed Rdataset Methods
7804 rdataset_disassociate(dns_rdataset_t *rdataset) {
7805 dns_db_t *db = rdataset->private1;
7806 dns_dbnode_t *node = rdataset->private2;
7808 detachnode(db, &node);
7812 rdataset_first(dns_rdataset_t *rdataset) {
7813 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7816 count = raw[0] * 256 + raw[1];
7818 rdataset->private5 = NULL;
7819 return (ISC_R_NOMORE);
7822 #if DNS_RDATASET_FIXED
7823 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7824 raw += 2 + (4 * count);
7830 * The privateuint4 field is the number of rdata beyond the
7831 * cursor position, so we decrement the total count by one
7832 * before storing it.
7834 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7835 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7836 * to the first entry in the offset table.
7839 rdataset->privateuint4 = count;
7840 rdataset->private5 = raw;
7842 return (ISC_R_SUCCESS);
7846 rdataset_next(dns_rdataset_t *rdataset) {
7848 unsigned int length;
7849 unsigned char *raw; /* RDATASLAB */
7851 count = rdataset->privateuint4;
7853 return (ISC_R_NOMORE);
7855 rdataset->privateuint4 = count;
7858 * Skip forward one record (length + 4) or one offset (4).
7860 raw = rdataset->private5;
7861 #if DNS_RDATASET_FIXED
7862 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7864 length = raw[0] * 256 + raw[1];
7866 #if DNS_RDATASET_FIXED
7868 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7870 rdataset->private5 = raw + 2; /* length(2) */
7873 return (ISC_R_SUCCESS);
7877 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7878 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7879 #if DNS_RDATASET_FIXED
7880 unsigned int offset;
7882 unsigned int length;
7884 unsigned int flags = 0;
7886 REQUIRE(raw != NULL);
7889 * Find the start of the record if not already in private5
7890 * then skip the length and order fields.
7892 #if DNS_RDATASET_FIXED
7893 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7894 offset = (raw[0] << 24) + (raw[1] << 16) +
7895 (raw[2] << 8) + raw[3];
7896 raw = rdataset->private3;
7900 length = raw[0] * 256 + raw[1];
7901 #if DNS_RDATASET_FIXED
7906 if (rdataset->type == dns_rdatatype_rrsig) {
7907 if (*raw & DNS_RDATASLAB_OFFLINE)
7908 flags |= DNS_RDATA_OFFLINE;
7914 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7915 rdata->flags |= flags;
7919 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7920 dns_db_t *db = source->private1;
7921 dns_dbnode_t *node = source->private2;
7922 dns_dbnode_t *cloned_node = NULL;
7924 attachnode(db, node, &cloned_node);
7928 * Reset iterator state.
7930 target->privateuint4 = 0;
7931 target->private5 = NULL;
7935 rdataset_count(dns_rdataset_t *rdataset) {
7936 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7939 count = raw[0] * 256 + raw[1];
7945 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7946 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7948 dns_db_t *db = rdataset->private1;
7949 dns_dbnode_t *node = rdataset->private2;
7950 dns_dbnode_t *cloned_node;
7951 struct noqname *noqname = rdataset->private6;
7954 attachnode(db, node, &cloned_node);
7955 nsec->methods = &rdataset_methods;
7956 nsec->rdclass = db->rdclass;
7957 nsec->type = noqname->type;
7959 nsec->ttl = rdataset->ttl;
7960 nsec->trust = rdataset->trust;
7961 nsec->private1 = rdataset->private1;
7962 nsec->private2 = rdataset->private2;
7963 nsec->private3 = noqname->neg;
7964 nsec->privateuint4 = 0;
7965 nsec->private5 = NULL;
7966 nsec->private6 = NULL;
7967 nsec->private7 = NULL;
7970 attachnode(db, node, &cloned_node);
7971 nsecsig->methods = &rdataset_methods;
7972 nsecsig->rdclass = db->rdclass;
7973 nsecsig->type = dns_rdatatype_rrsig;
7974 nsecsig->covers = noqname->type;
7975 nsecsig->ttl = rdataset->ttl;
7976 nsecsig->trust = rdataset->trust;
7977 nsecsig->private1 = rdataset->private1;
7978 nsecsig->private2 = rdataset->private2;
7979 nsecsig->private3 = noqname->negsig;
7980 nsecsig->privateuint4 = 0;
7981 nsecsig->private5 = NULL;
7982 nsec->private6 = NULL;
7983 nsec->private7 = NULL;
7985 dns_name_clone(&noqname->name, name);
7987 return (ISC_R_SUCCESS);
7991 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7992 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7994 dns_db_t *db = rdataset->private1;
7995 dns_dbnode_t *node = rdataset->private2;
7996 dns_dbnode_t *cloned_node;
7997 struct noqname *closest = rdataset->private7;
8000 attachnode(db, node, &cloned_node);
8001 nsec->methods = &rdataset_methods;
8002 nsec->rdclass = db->rdclass;
8003 nsec->type = closest->type;
8005 nsec->ttl = rdataset->ttl;
8006 nsec->trust = rdataset->trust;
8007 nsec->private1 = rdataset->private1;
8008 nsec->private2 = rdataset->private2;
8009 nsec->private3 = closest->neg;
8010 nsec->privateuint4 = 0;
8011 nsec->private5 = NULL;
8012 nsec->private6 = NULL;
8013 nsec->private7 = NULL;
8016 attachnode(db, node, &cloned_node);
8017 nsecsig->methods = &rdataset_methods;
8018 nsecsig->rdclass = db->rdclass;
8019 nsecsig->type = dns_rdatatype_rrsig;
8020 nsecsig->covers = closest->type;
8021 nsecsig->ttl = rdataset->ttl;
8022 nsecsig->trust = rdataset->trust;
8023 nsecsig->private1 = rdataset->private1;
8024 nsecsig->private2 = rdataset->private2;
8025 nsecsig->private3 = closest->negsig;
8026 nsecsig->privateuint4 = 0;
8027 nsecsig->private5 = NULL;
8028 nsec->private6 = NULL;
8029 nsec->private7 = NULL;
8031 dns_name_clone(&closest->name, name);
8033 return (ISC_R_SUCCESS);
8037 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8038 dns_rbtdb_t *rbtdb = rdataset->private1;
8039 dns_rbtnode_t *rbtnode = rdataset->private2;
8040 rdatasetheader_t *header = rdataset->private3;
8043 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8044 isc_rwlocktype_write);
8045 header->trust = rdataset->trust = trust;
8046 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8047 isc_rwlocktype_write);
8051 rdataset_expire(dns_rdataset_t *rdataset) {
8052 dns_rbtdb_t *rbtdb = rdataset->private1;
8053 dns_rbtnode_t *rbtnode = rdataset->private2;
8054 rdatasetheader_t *header = rdataset->private3;
8057 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8058 isc_rwlocktype_write);
8059 expire_header(rbtdb, header, ISC_FALSE);
8060 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8061 isc_rwlocktype_write);
8065 * Rdataset Iterator Methods
8069 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8070 rbtdb_rdatasetiter_t *rbtiterator;
8072 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8074 if (rbtiterator->common.version != NULL)
8075 closeversion(rbtiterator->common.db,
8076 &rbtiterator->common.version, ISC_FALSE);
8077 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8078 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8079 sizeof(*rbtiterator));
8085 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8086 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8087 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8088 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8089 rbtdb_version_t *rbtversion = rbtiterator->common.version;
8090 rdatasetheader_t *header, *top_next;
8091 rbtdb_serial_t serial;
8094 if (IS_CACHE(rbtdb)) {
8096 now = rbtiterator->common.now;
8098 serial = rbtversion->serial;
8102 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8103 isc_rwlocktype_read);
8105 for (header = rbtnode->data; header != NULL; header = top_next) {
8106 top_next = header->next;
8108 if (header->serial <= serial && !IGNORE(header)) {
8110 * Is this a "this rdataset doesn't exist"
8111 * record? Or is it too old in the cache?
8113 * Note: unlike everywhere else, we
8114 * check for now > header->rdh_ttl instead
8115 * of now >= header->rdh_ttl. This allows
8116 * ANY and RRSIG queries for 0 TTL
8117 * rdatasets to work.
8119 if (NONEXISTENT(header) ||
8120 (now != 0 && now > header->rdh_ttl))
8124 header = header->down;
8125 } while (header != NULL);
8130 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8131 isc_rwlocktype_read);
8133 rbtiterator->current = header;
8136 return (ISC_R_NOMORE);
8138 return (ISC_R_SUCCESS);
8142 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8143 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8144 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8145 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8146 rbtdb_version_t *rbtversion = rbtiterator->common.version;
8147 rdatasetheader_t *header, *top_next;
8148 rbtdb_serial_t serial;
8150 rbtdb_rdatatype_t type, negtype;
8151 dns_rdatatype_t rdtype, covers;
8153 header = rbtiterator->current;
8155 return (ISC_R_NOMORE);
8157 if (IS_CACHE(rbtdb)) {
8159 now = rbtiterator->common.now;
8161 serial = rbtversion->serial;
8165 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8166 isc_rwlocktype_read);
8168 type = header->type;
8169 rdtype = RBTDB_RDATATYPE_BASE(header->type);
8170 if (NEGATIVE(header)) {
8171 covers = RBTDB_RDATATYPE_EXT(header->type);
8172 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8174 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8175 for (header = header->next; header != NULL; header = top_next) {
8176 top_next = header->next;
8178 * If not walking back up the down list.
8180 if (header->type != type && header->type != negtype) {
8182 if (header->serial <= serial &&
8185 * Is this a "this rdataset doesn't
8188 * Note: unlike everywhere else, we
8189 * check for now > header->ttl instead
8190 * of now >= header->ttl. This allows
8191 * ANY and RRSIG queries for 0 TTL
8192 * rdatasets to work.
8194 if ((header->attributes &
8195 RDATASET_ATTR_NONEXISTENT) != 0 ||
8196 (now != 0 && now > header->rdh_ttl))
8200 header = header->down;
8201 } while (header != NULL);
8207 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8208 isc_rwlocktype_read);
8210 rbtiterator->current = header;
8213 return (ISC_R_NOMORE);
8215 return (ISC_R_SUCCESS);
8219 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8220 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8221 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8222 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8223 rdatasetheader_t *header;
8225 header = rbtiterator->current;
8226 REQUIRE(header != NULL);
8228 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8229 isc_rwlocktype_read);
8231 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8234 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8235 isc_rwlocktype_read);
8240 * Database Iterator Methods
8244 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8245 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8246 dns_rbtnode_t *node = rbtdbiter->node;
8251 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8252 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8256 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8257 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8258 dns_rbtnode_t *node = rbtdbiter->node;
8264 lock = &rbtdb->node_locks[node->locknum].lock;
8265 NODE_LOCK(lock, isc_rwlocktype_read);
8266 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8267 rbtdbiter->tree_locked, ISC_FALSE);
8268 NODE_UNLOCK(lock, isc_rwlocktype_read);
8270 rbtdbiter->node = NULL;
8274 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8275 dns_rbtnode_t *node;
8276 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8277 isc_boolean_t was_read_locked = ISC_FALSE;
8281 if (rbtdbiter->delete != 0) {
8283 * Note that "%d node of %d in tree" can report things like
8284 * "flush_deletions: 59 nodes of 41 in tree". This means
8285 * That some nodes appear on the deletions list more than
8286 * once. Only the last occurence will actually be deleted.
8288 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8289 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8290 "flush_deletions: %d nodes of %d in tree",
8292 dns_rbt_nodecount(rbtdb->tree));
8294 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8295 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8296 was_read_locked = ISC_TRUE;
8298 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8299 rbtdbiter->tree_locked = isc_rwlocktype_write;
8301 for (i = 0; i < rbtdbiter->delete; i++) {
8302 node = rbtdbiter->deletions[i];
8303 lock = &rbtdb->node_locks[node->locknum].lock;
8305 NODE_LOCK(lock, isc_rwlocktype_read);
8306 decrement_reference(rbtdb, node, 0,
8307 isc_rwlocktype_read,
8308 rbtdbiter->tree_locked, ISC_FALSE);
8309 NODE_UNLOCK(lock, isc_rwlocktype_read);
8312 rbtdbiter->delete = 0;
8314 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8315 if (was_read_locked) {
8316 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8317 rbtdbiter->tree_locked = isc_rwlocktype_read;
8320 rbtdbiter->tree_locked = isc_rwlocktype_none;
8326 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8327 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8329 REQUIRE(rbtdbiter->paused);
8330 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8332 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8333 rbtdbiter->tree_locked = isc_rwlocktype_read;
8335 rbtdbiter->paused = ISC_FALSE;
8339 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8340 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8341 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8342 dns_db_t *db = NULL;
8344 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8345 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8346 rbtdbiter->tree_locked = isc_rwlocktype_none;
8348 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8350 dereference_iter_node(rbtdbiter);
8352 flush_deletions(rbtdbiter);
8354 dns_db_attach(rbtdbiter->common.db, &db);
8355 dns_db_detach(&rbtdbiter->common.db);
8357 dns_rbtnodechain_reset(&rbtdbiter->chain);
8358 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8359 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8366 dbiterator_first(dns_dbiterator_t *iterator) {
8367 isc_result_t result;
8368 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8369 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8370 dns_name_t *name, *origin;
8372 if (rbtdbiter->result != ISC_R_SUCCESS &&
8373 rbtdbiter->result != ISC_R_NOMORE)
8374 return (rbtdbiter->result);
8376 if (rbtdbiter->paused)
8377 resume_iteration(rbtdbiter);
8379 dereference_iter_node(rbtdbiter);
8381 name = dns_fixedname_name(&rbtdbiter->name);
8382 origin = dns_fixedname_name(&rbtdbiter->origin);
8383 dns_rbtnodechain_reset(&rbtdbiter->chain);
8384 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8386 if (rbtdbiter->nsec3only) {
8387 rbtdbiter->current = &rbtdbiter->nsec3chain;
8388 result = dns_rbtnodechain_first(rbtdbiter->current,
8389 rbtdb->nsec3, name, origin);
8391 rbtdbiter->current = &rbtdbiter->chain;
8392 result = dns_rbtnodechain_first(rbtdbiter->current,
8393 rbtdb->tree, name, origin);
8394 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8395 rbtdbiter->current = &rbtdbiter->nsec3chain;
8396 result = dns_rbtnodechain_first(rbtdbiter->current,
8401 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8402 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8403 NULL, &rbtdbiter->node);
8404 if (result == ISC_R_SUCCESS) {
8405 rbtdbiter->new_origin = ISC_TRUE;
8406 reference_iter_node(rbtdbiter);
8409 INSIST(result == ISC_R_NOTFOUND);
8410 result = ISC_R_NOMORE; /* The tree is empty. */
8413 rbtdbiter->result = result;
8419 dbiterator_last(dns_dbiterator_t *iterator) {
8420 isc_result_t result;
8421 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8422 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8423 dns_name_t *name, *origin;
8425 if (rbtdbiter->result != ISC_R_SUCCESS &&
8426 rbtdbiter->result != ISC_R_NOMORE)
8427 return (rbtdbiter->result);
8429 if (rbtdbiter->paused)
8430 resume_iteration(rbtdbiter);
8432 dereference_iter_node(rbtdbiter);
8434 name = dns_fixedname_name(&rbtdbiter->name);
8435 origin = dns_fixedname_name(&rbtdbiter->origin);
8436 dns_rbtnodechain_reset(&rbtdbiter->chain);
8437 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8439 result = ISC_R_NOTFOUND;
8440 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8441 rbtdbiter->current = &rbtdbiter->nsec3chain;
8442 result = dns_rbtnodechain_last(rbtdbiter->current,
8443 rbtdb->nsec3, name, origin);
8445 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8446 rbtdbiter->current = &rbtdbiter->chain;
8447 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8450 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8451 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8452 NULL, &rbtdbiter->node);
8453 if (result == ISC_R_SUCCESS) {
8454 rbtdbiter->new_origin = ISC_TRUE;
8455 reference_iter_node(rbtdbiter);
8458 INSIST(result == ISC_R_NOTFOUND);
8459 result = ISC_R_NOMORE; /* The tree is empty. */
8462 rbtdbiter->result = result;
8468 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8469 isc_result_t result;
8470 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8471 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8472 dns_name_t *iname, *origin;
8474 if (rbtdbiter->result != ISC_R_SUCCESS &&
8475 rbtdbiter->result != ISC_R_NOTFOUND &&
8476 rbtdbiter->result != ISC_R_NOMORE)
8477 return (rbtdbiter->result);
8479 if (rbtdbiter->paused)
8480 resume_iteration(rbtdbiter);
8482 dereference_iter_node(rbtdbiter);
8484 iname = dns_fixedname_name(&rbtdbiter->name);
8485 origin = dns_fixedname_name(&rbtdbiter->origin);
8486 dns_rbtnodechain_reset(&rbtdbiter->chain);
8487 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8489 if (rbtdbiter->nsec3only) {
8490 rbtdbiter->current = &rbtdbiter->nsec3chain;
8491 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8494 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8495 } else if (rbtdbiter->nonsec3) {
8496 rbtdbiter->current = &rbtdbiter->chain;
8497 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8500 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8503 * Stay on main chain if not found on either chain.
8505 rbtdbiter->current = &rbtdbiter->chain;
8506 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8509 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8510 if (result == DNS_R_PARTIALMATCH) {
8511 dns_rbtnode_t *node = NULL;
8512 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8513 &node, &rbtdbiter->nsec3chain,
8514 DNS_RBTFIND_EMPTYDATA,
8516 if (result == ISC_R_SUCCESS) {
8517 rbtdbiter->node = node;
8518 rbtdbiter->current = &rbtdbiter->nsec3chain;
8524 if (result == ISC_R_SUCCESS) {
8525 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8527 if (result == ISC_R_SUCCESS) {
8528 rbtdbiter->new_origin = ISC_TRUE;
8529 reference_iter_node(rbtdbiter);
8531 } else if (result == DNS_R_PARTIALMATCH) {
8532 result = ISC_R_NOTFOUND;
8533 rbtdbiter->node = NULL;
8536 rbtdbiter->result = result;
8538 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8539 isc_result_t tresult;
8540 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8542 if (tresult == ISC_R_SUCCESS) {
8543 rbtdbiter->new_origin = ISC_TRUE;
8544 reference_iter_node(rbtdbiter);
8547 rbtdbiter->node = NULL;
8550 rbtdbiter->node = NULL;
8552 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8553 ISC_R_SUCCESS : result;
8560 dbiterator_prev(dns_dbiterator_t *iterator) {
8561 isc_result_t result;
8562 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8563 dns_name_t *name, *origin;
8564 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8566 REQUIRE(rbtdbiter->node != NULL);
8568 if (rbtdbiter->result != ISC_R_SUCCESS)
8569 return (rbtdbiter->result);
8571 if (rbtdbiter->paused)
8572 resume_iteration(rbtdbiter);
8574 name = dns_fixedname_name(&rbtdbiter->name);
8575 origin = dns_fixedname_name(&rbtdbiter->origin);
8576 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8577 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8578 !rbtdbiter->nonsec3 &&
8579 &rbtdbiter->nsec3chain == rbtdbiter->current) {
8580 rbtdbiter->current = &rbtdbiter->chain;
8581 dns_rbtnodechain_reset(rbtdbiter->current);
8582 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8584 if (result == ISC_R_NOTFOUND)
8585 result = ISC_R_NOMORE;
8588 dereference_iter_node(rbtdbiter);
8590 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8591 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8592 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8593 NULL, &rbtdbiter->node);
8596 if (result == ISC_R_SUCCESS)
8597 reference_iter_node(rbtdbiter);
8599 rbtdbiter->result = result;
8605 dbiterator_next(dns_dbiterator_t *iterator) {
8606 isc_result_t result;
8607 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8608 dns_name_t *name, *origin;
8609 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8611 REQUIRE(rbtdbiter->node != NULL);
8613 if (rbtdbiter->result != ISC_R_SUCCESS)
8614 return (rbtdbiter->result);
8616 if (rbtdbiter->paused)
8617 resume_iteration(rbtdbiter);
8619 name = dns_fixedname_name(&rbtdbiter->name);
8620 origin = dns_fixedname_name(&rbtdbiter->origin);
8621 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8622 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8623 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8624 rbtdbiter->current = &rbtdbiter->nsec3chain;
8625 dns_rbtnodechain_reset(rbtdbiter->current);
8626 result = dns_rbtnodechain_first(rbtdbiter->current,
8627 rbtdb->nsec3, name, origin);
8628 if (result == ISC_R_NOTFOUND)
8629 result = ISC_R_NOMORE;
8632 dereference_iter_node(rbtdbiter);
8634 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8635 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8636 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8637 NULL, &rbtdbiter->node);
8639 if (result == ISC_R_SUCCESS)
8640 reference_iter_node(rbtdbiter);
8642 rbtdbiter->result = result;
8648 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8651 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8652 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8653 dns_rbtnode_t *node = rbtdbiter->node;
8654 isc_result_t result;
8655 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8656 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8658 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8659 REQUIRE(rbtdbiter->node != NULL);
8661 if (rbtdbiter->paused)
8662 resume_iteration(rbtdbiter);
8665 if (rbtdbiter->common.relative_names)
8667 result = dns_name_concatenate(nodename, origin, name, NULL);
8668 if (result != ISC_R_SUCCESS)
8670 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8671 result = DNS_R_NEWORIGIN;
8673 result = ISC_R_SUCCESS;
8675 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8676 new_reference(rbtdb, node);
8677 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8679 *nodep = rbtdbiter->node;
8681 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8682 isc_result_t expire_result;
8685 * If the deletion array is full, flush it before trying
8686 * to expire the current node. The current node can't
8687 * fully deleted while the iteration cursor is still on it.
8689 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8690 flush_deletions(rbtdbiter);
8692 expire_result = expirenode(iterator->db, *nodep, 0);
8695 * expirenode() currently always returns success.
8697 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8700 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8701 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8702 dns_rbtnode_refincrement(node, &refs);
8704 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8712 dbiterator_pause(dns_dbiterator_t *iterator) {
8713 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8714 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8716 if (rbtdbiter->result != ISC_R_SUCCESS &&
8717 rbtdbiter->result != ISC_R_NOMORE)
8718 return (rbtdbiter->result);
8720 if (rbtdbiter->paused)
8721 return (ISC_R_SUCCESS);
8723 rbtdbiter->paused = ISC_TRUE;
8725 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8726 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8727 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8728 rbtdbiter->tree_locked = isc_rwlocktype_none;
8731 flush_deletions(rbtdbiter);
8733 return (ISC_R_SUCCESS);
8737 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8738 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8739 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8741 if (rbtdbiter->result != ISC_R_SUCCESS)
8742 return (rbtdbiter->result);
8744 return (dns_name_copy(origin, name, NULL));
8748 * Additional cache routines.
8751 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8752 dns_rdatatype_t qtype, dns_acache_t *acache,
8753 dns_zone_t **zonep, dns_db_t **dbp,
8754 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8755 dns_name_t *fname, dns_message_t *msg,
8771 return (ISC_R_NOTIMPLEMENTED);
8773 dns_rbtdb_t *rbtdb = rdataset->private1;
8774 dns_rbtnode_t *rbtnode = rdataset->private2;
8775 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8776 unsigned int current_count = rdataset->privateuint4;
8778 rdatasetheader_t *header;
8779 nodelock_t *nodelock;
8780 unsigned int total_count;
8781 acachectl_t *acarray;
8782 dns_acacheentry_t *entry;
8783 isc_result_t result;
8785 UNUSED(qtype); /* we do not use this value at least for now */
8788 header = (struct rdatasetheader *)(raw - sizeof(*header));
8790 total_count = raw[0] * 256 + raw[1];
8791 INSIST(total_count > current_count);
8792 count = total_count - current_count - 1;
8796 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8797 NODE_LOCK(nodelock, isc_rwlocktype_read);
8800 case dns_rdatasetadditional_fromauth:
8801 acarray = header->additional_auth;
8803 case dns_rdatasetadditional_fromcache:
8806 case dns_rdatasetadditional_fromglue:
8807 acarray = header->additional_glue;
8813 if (acarray == NULL) {
8814 if (type != dns_rdatasetadditional_fromcache)
8815 dns_acache_countquerymiss(acache);
8816 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8817 return (ISC_R_NOTFOUND);
8820 if (acarray[count].entry == NULL) {
8821 dns_acache_countquerymiss(acache);
8822 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8823 return (ISC_R_NOTFOUND);
8827 dns_acache_attachentry(acarray[count].entry, &entry);
8829 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8831 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8832 nodep, fname, msg, now);
8834 dns_acache_detachentry(&entry);
8840 acache_callback(dns_acacheentry_t *entry, void **arg) {
8842 dns_rbtnode_t *rbtnode;
8843 nodelock_t *nodelock;
8844 acachectl_t *acarray = NULL;
8845 acache_cbarg_t *cbarg;
8848 REQUIRE(arg != NULL);
8852 * The caller must hold the entry lock.
8855 rbtdb = (dns_rbtdb_t *)cbarg->db;
8856 rbtnode = (dns_rbtnode_t *)cbarg->node;
8858 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8859 NODE_LOCK(nodelock, isc_rwlocktype_write);
8861 switch (cbarg->type) {
8862 case dns_rdatasetadditional_fromauth:
8863 acarray = cbarg->header->additional_auth;
8865 case dns_rdatasetadditional_fromglue:
8866 acarray = cbarg->header->additional_glue;
8872 count = cbarg->count;
8873 if (acarray != NULL && acarray[count].entry == entry) {
8874 acarray[count].entry = NULL;
8875 INSIST(acarray[count].cbarg == cbarg);
8876 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8877 acarray[count].cbarg = NULL;
8879 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8881 dns_acache_detachentry(&entry);
8883 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8885 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8886 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8894 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8895 acache_cbarg_t **cbargp)
8897 acache_cbarg_t *cbarg;
8899 REQUIRE(mctx != NULL);
8900 REQUIRE(entry != NULL);
8901 REQUIRE(cbargp != NULL && *cbargp != NULL);
8905 dns_acache_cancelentry(entry);
8906 dns_db_detachnode(cbarg->db, &cbarg->node);
8907 dns_db_detach(&cbarg->db);
8909 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8916 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8917 dns_rdatatype_t qtype, dns_acache_t *acache,
8918 dns_zone_t *zone, dns_db_t *db,
8919 dns_dbversion_t *version, dns_dbnode_t *node,
8933 return (ISC_R_NOTIMPLEMENTED);
8935 dns_rbtdb_t *rbtdb = rdataset->private1;
8936 dns_rbtnode_t *rbtnode = rdataset->private2;
8937 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8938 unsigned int current_count = rdataset->privateuint4;
8939 rdatasetheader_t *header;
8940 unsigned int total_count, count;
8941 nodelock_t *nodelock;
8942 isc_result_t result;
8943 acachectl_t *acarray;
8944 dns_acacheentry_t *newentry, *oldentry = NULL;
8945 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8949 if (type == dns_rdatasetadditional_fromcache)
8950 return (ISC_R_SUCCESS);
8952 header = (struct rdatasetheader *)(raw - sizeof(*header));
8954 total_count = raw[0] * 256 + raw[1];
8955 INSIST(total_count > current_count);
8956 count = total_count - current_count - 1; /* should be private data */
8958 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8959 if (newcbarg == NULL)
8960 return (ISC_R_NOMEMORY);
8961 newcbarg->type = type;
8962 newcbarg->count = count;
8963 newcbarg->header = header;
8964 newcbarg->db = NULL;
8965 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8966 newcbarg->node = NULL;
8967 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8970 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8971 acache_callback, newcbarg, &newentry);
8972 if (result != ISC_R_SUCCESS)
8974 /* Set cache data in the new entry. */
8975 result = dns_acache_setentry(acache, newentry, zone, db,
8976 version, node, fname);
8977 if (result != ISC_R_SUCCESS)
8980 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8981 NODE_LOCK(nodelock, isc_rwlocktype_write);
8985 case dns_rdatasetadditional_fromauth:
8986 acarray = header->additional_auth;
8988 case dns_rdatasetadditional_fromglue:
8989 acarray = header->additional_glue;
8995 if (acarray == NULL) {
8998 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8999 sizeof(acachectl_t));
9001 if (acarray == NULL) {
9002 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9006 for (i = 0; i < total_count; i++) {
9007 acarray[i].entry = NULL;
9008 acarray[i].cbarg = NULL;
9012 case dns_rdatasetadditional_fromauth:
9013 header->additional_auth = acarray;
9015 case dns_rdatasetadditional_fromglue:
9016 header->additional_glue = acarray;
9022 if (acarray[count].entry != NULL) {
9024 * Swap the entry. Delay cleaning-up the old entry since
9025 * it would require a node lock.
9027 oldentry = acarray[count].entry;
9028 INSIST(acarray[count].cbarg != NULL);
9029 oldcbarg = acarray[count].cbarg;
9031 acarray[count].entry = newentry;
9032 acarray[count].cbarg = newcbarg;
9034 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9036 if (oldentry != NULL) {
9037 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9038 dns_acache_detachentry(&oldentry);
9041 return (ISC_R_SUCCESS);
9044 if (newcbarg != NULL) {
9045 if (newentry != NULL) {
9046 acache_cancelentry(rbtdb->common.mctx, newentry,
9048 dns_acache_detachentry(&newentry);
9050 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9051 dns_db_detach(&newcbarg->db);
9052 isc_mem_put(rbtdb->common.mctx, newcbarg,
9062 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9063 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9071 return (ISC_R_NOTIMPLEMENTED);
9073 dns_rbtdb_t *rbtdb = rdataset->private1;
9074 dns_rbtnode_t *rbtnode = rdataset->private2;
9075 unsigned char *raw = rdataset->private3; /* RDATASLAB */
9076 unsigned int current_count = rdataset->privateuint4;
9077 rdatasetheader_t *header;
9078 nodelock_t *nodelock;
9079 unsigned int total_count, count;
9080 acachectl_t *acarray;
9081 dns_acacheentry_t *entry;
9082 acache_cbarg_t *cbarg;
9084 UNUSED(qtype); /* we do not use this value at least for now */
9087 if (type == dns_rdatasetadditional_fromcache)
9088 return (ISC_R_SUCCESS);
9090 header = (struct rdatasetheader *)(raw - sizeof(*header));
9092 total_count = raw[0] * 256 + raw[1];
9093 INSIST(total_count > current_count);
9094 count = total_count - current_count - 1;
9099 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9100 NODE_LOCK(nodelock, isc_rwlocktype_write);
9103 case dns_rdatasetadditional_fromauth:
9104 acarray = header->additional_auth;
9106 case dns_rdatasetadditional_fromglue:
9107 acarray = header->additional_glue;
9113 if (acarray == NULL) {
9114 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9115 return (ISC_R_NOTFOUND);
9118 entry = acarray[count].entry;
9119 if (entry == NULL) {
9120 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9121 return (ISC_R_NOTFOUND);
9124 acarray[count].entry = NULL;
9125 cbarg = acarray[count].cbarg;
9126 acarray[count].cbarg = NULL;
9128 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9130 if (entry != NULL) {
9132 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9133 dns_acache_detachentry(&entry);
9136 return (ISC_R_SUCCESS);
9141 * Routines for LRU-based cache management.
9145 * See if a given cache entry that is being reused needs to be updated
9146 * in the LRU-list. From the LRU management point of view, this function is
9147 * expected to return true for almost all cases. When used with threads,
9148 * however, this may cause a non-negligible performance penalty because a
9149 * writer lock will have to be acquired before updating the list.
9150 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9151 * function returns true if the entry has not been updated for some period of
9152 * time. We differentiate the NS or glue address case and the others since
9153 * experiments have shown that the former tends to be accessed relatively
9154 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9155 * may cause external queries at a higher level zone, involving more
9158 * Caller must hold the node (read or write) lock.
9160 static inline isc_boolean_t
9161 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9162 if ((header->attributes &
9163 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9166 #if DNS_RBTDB_LIMITLRUUPDATE
9167 if (header->type == dns_rdatatype_ns ||
9168 (header->trust == dns_trust_glue &&
9169 (header->type == dns_rdatatype_a ||
9170 header->type == dns_rdatatype_aaaa))) {
9172 * Glue records are updated if at least 60 seconds have passed
9173 * since the previous update time.
9175 return (header->last_used + 60 <= now);
9178 /* Other records are updated if 5 minutes have passed. */
9179 return (header->last_used + 300 <= now);
9188 * Update the timestamp of a given cache entry and move it to the head
9189 * of the corresponding LRU list.
9191 * Caller must hold the node (write) lock.
9193 * Note that the we do NOT touch the heap here, as the TTL has not changed.
9196 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9199 INSIST(IS_CACHE(rbtdb));
9201 /* To be checked: can we really assume this? XXXMLG */
9202 INSIST(ISC_LINK_LINKED(header, link));
9204 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9205 header->last_used = now;
9206 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9210 * Purge some expired and/or stale (i.e. unused for some period) cache entries
9211 * under an overmem condition. To recover from this condition quickly, up to
9212 * 2 entries will be purged. This process is triggered while adding a new
9213 * entry, and we specifically avoid purging entries in the same LRU bucket as
9214 * the one to which the new entry will belong. Otherwise, we might purge
9215 * entries of the same name of different RR types while adding RRsets from a
9216 * single response (consider the case where we're adding A and AAAA glue records
9217 * of the same NS name).
9220 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9221 isc_stdtime_t now, isc_boolean_t tree_locked)
9223 rdatasetheader_t *header, *header_prev;
9224 unsigned int locknum;
9227 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9228 locknum != locknum_start && purgecount > 0;
9229 locknum = (locknum + 1) % rbtdb->node_lock_count) {
9230 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9231 isc_rwlocktype_write);
9233 header = isc_heap_element(rbtdb->heaps[locknum], 1);
9234 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
9235 expire_header(rbtdb, header, tree_locked);
9239 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9240 header != NULL && purgecount > 0;
9241 header = header_prev) {
9242 header_prev = ISC_LIST_PREV(header, link);
9244 * Unlink the entry at this point to avoid checking it
9245 * again even if it's currently used someone else and
9246 * cannot be purged at this moment. This entry won't be
9247 * referenced any more (so unlinking is safe) since the
9248 * TTL was reset to 0.
9250 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9252 expire_header(rbtdb, header, tree_locked);
9256 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9257 isc_rwlocktype_write);
9262 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9263 isc_boolean_t tree_locked)
9265 set_ttl(rbtdb, header, 0);
9266 header->attributes |= RDATASET_ATTR_STALE;
9267 header->node->dirty = 1;
9270 * Caller must hold the node (write) lock.
9273 if (dns_rbtnode_refcurrent(header->node) == 0) {
9275 * If no one else is using the node, we can clean it up now.
9276 * We first need to gain a new reference to the node to meet a
9277 * requirement of decrement_reference().
9279 new_reference(rbtdb, header->node);
9280 decrement_reference(rbtdb, header->node, 0,
9281 isc_rwlocktype_write,
9282 tree_locked ? isc_rwlocktype_write :
9283 isc_rwlocktype_none, ISC_FALSE);