2 * Copyright (C) 2004-2011 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.310.8.5.4.1 2011-11-16 09:32:08 marka Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
57 #include <dns/rdata.h>
58 #include <dns/rdataset.h>
59 #include <dns/rdatasetiter.h>
60 #include <dns/rdataslab.h>
61 #include <dns/rdatastruct.h>
62 #include <dns/result.h>
63 #include <dns/stats.h>
66 #include <dns/zonekey.h>
68 #ifdef DNS_RBTDB_VERSION64
74 #ifdef DNS_RBTDB_VERSION64
75 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
77 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
81 * Note that "impmagic" is not the first four bytes of the struct, so
82 * ISC_MAGIC_VALID cannot be used.
84 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
85 (rbtdb)->common.impmagic == RBTDB_MAGIC)
87 #ifdef DNS_RBTDB_VERSION64
88 typedef isc_uint64_t rbtdb_serial_t;
90 * Make casting easier in symbolic debuggers by using different names
91 * for the 64 bit version.
93 #define dns_rbtdb_t dns_rbtdb64_t
94 #define rdatasetheader_t rdatasetheader64_t
95 #define rbtdb_version_t rbtdb_version64_t
97 typedef isc_uint32_t rbtdb_serial_t;
100 typedef isc_uint32_t rbtdb_rdatatype_t;
102 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
103 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
104 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
106 #define RBTDB_RDATATYPE_SIGNSEC \
107 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
108 #define RBTDB_RDATATYPE_SIGNSEC3 \
109 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
110 #define RBTDB_RDATATYPE_SIGNS \
111 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
112 #define RBTDB_RDATATYPE_SIGCNAME \
113 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
114 #define RBTDB_RDATATYPE_SIGDNAME \
115 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
116 #define RBTDB_RDATATYPE_NCACHEANY \
117 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
120 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
121 * Using rwlock is effective with regard to lookup performance only when
122 * it is implemented in an efficient way.
123 * Otherwise, it is generally wise to stick to the simple locking since rwlock
124 * would require more memory or can even make lookups slower due to its own
125 * overhead (when it internally calls mutex locks).
127 #ifdef ISC_RWLOCK_USEATOMIC
128 #define DNS_RBTDB_USERWLOCK 1
130 #define DNS_RBTDB_USERWLOCK 0
133 #if DNS_RBTDB_USERWLOCK
134 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
135 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
136 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
137 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
139 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
140 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
141 #define RBTDB_LOCK(l, t) LOCK(l)
142 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
146 * Since node locking is sensitive to both performance and memory footprint,
147 * we need some trick here. If we have both high-performance rwlock and
148 * high performance and small-memory reference counters, we use rwlock for
149 * node lock and isc_refcount for node references. In this case, we don't have
150 * to protect the access to the counters by locks.
151 * Otherwise, we simply use ordinary mutex lock for node locking, and use
152 * simple integers as reference counters which is protected by the lock.
153 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
154 * NODE_UNLOCK. In some other cases, however, we need to protect reference
155 * counters first and then protect other parts of a node as read-only data.
156 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
157 * provided for these special cases. When we can use the efficient backend
158 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
159 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
160 * section including the access to the reference counter.
161 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
162 * section is also protected by NODE_STRONGLOCK().
164 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
165 typedef isc_rwlock_t nodelock_t;
167 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
168 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
169 #define NODE_LOCK(l, t) RWLOCK((l), (t))
170 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
171 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
173 #define NODE_STRONGLOCK(l) ((void)0)
174 #define NODE_STRONGUNLOCK(l) ((void)0)
175 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
176 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
177 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
179 typedef isc_mutex_t nodelock_t;
181 #define NODE_INITLOCK(l) isc_mutex_init(l)
182 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
183 #define NODE_LOCK(l, t) LOCK(l)
184 #define NODE_UNLOCK(l, t) UNLOCK(l)
185 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
187 #define NODE_STRONGLOCK(l) LOCK(l)
188 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
189 #define NODE_WEAKLOCK(l, t) ((void)0)
190 #define NODE_WEAKUNLOCK(l, t) ((void)0)
191 #define NODE_WEAKDOWNGRADE(l) ((void)0)
195 * Whether to rate-limit updating the LRU to avoid possible thread contention.
196 * Our performance measurement has shown the cost is marginal, so it's defined
197 * to be 0 by default either with or without threads.
199 #ifndef DNS_RBTDB_LIMITLRUUPDATE
200 #define DNS_RBTDB_LIMITLRUUPDATE 0
204 * Allow clients with a virtual time of up to 5 minutes in the past to see
205 * records that would have otherwise have expired.
207 #define RBTDB_VIRTUAL 300
213 dns_rdatatype_t type;
216 typedef struct acachectl acachectl_t;
218 typedef struct rdatasetheader {
220 * Locked by the owning node's lock.
222 rbtdb_serial_t serial;
224 rbtdb_rdatatype_t type;
225 isc_uint16_t attributes;
227 struct noqname *noqname;
228 struct noqname *closest;
230 * We don't use the LIST macros, because the LIST structure has
231 * both head and tail pointers, and is doubly linked.
234 struct rdatasetheader *next;
236 * If this is the top header for an rdataset, 'next' points
237 * to the top header for the next rdataset (i.e., the next type).
238 * Otherwise, it points up to the header whose down pointer points
242 struct rdatasetheader *down;
244 * Points to the header for the next older version of
250 * Monotonously increased every time this rdataset is bound so that
251 * it is used as the base of the starting point in DNS responses
252 * when the "cyclic" rrset-order is required. Since the ordering
253 * should not be so crucial, no lock is set for the counter for
254 * performance reasons.
257 acachectl_t *additional_auth;
258 acachectl_t *additional_glue;
261 isc_stdtime_t last_used;
262 ISC_LINK(struct rdatasetheader) link;
264 unsigned int heap_index;
266 * Used for TTL-based cache cleaning.
268 isc_stdtime_t resign;
271 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
272 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
274 #define RDATASET_ATTR_NONEXISTENT 0x0001
275 #define RDATASET_ATTR_STALE 0x0002
276 #define RDATASET_ATTR_IGNORE 0x0004
277 #define RDATASET_ATTR_RETAIN 0x0008
278 #define RDATASET_ATTR_NXDOMAIN 0x0010
279 #define RDATASET_ATTR_RESIGN 0x0020
280 #define RDATASET_ATTR_STATCOUNT 0x0040
281 #define RDATASET_ATTR_OPTOUT 0x0080
282 #define RDATASET_ATTR_NEGATIVE 0x0100
284 typedef struct acache_cbarg {
285 dns_rdatasetadditional_t type;
289 rdatasetheader_t *header;
293 dns_acacheentry_t *entry;
294 acache_cbarg_t *cbarg;
299 * When the cache will pre-expire data (due to memory low or other
300 * situations) before the rdataset's TTL has expired, it MUST
301 * respect the RETAIN bit and not expire the data until its TTL is
305 #undef IGNORE /* WIN32 winbase.h defines this. */
307 #define EXISTS(header) \
308 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
309 #define NONEXISTENT(header) \
310 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
311 #define IGNORE(header) \
312 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
313 #define RETAIN(header) \
314 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
315 #define NXDOMAIN(header) \
316 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
317 #define RESIGN(header) \
318 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
319 #define OPTOUT(header) \
320 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
321 #define NEGATIVE(header) \
322 (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
324 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
327 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
328 * There is a tradeoff issue about configuring this value: if this is too
329 * small, it may cause heavier contention between threads; if this is too large,
330 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
331 * The default value should work well for most environments, but this can
332 * also be configurable at compilation time via the
333 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
334 * 1 due to the assumption of overmem_purge().
336 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
338 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
340 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
343 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
344 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
348 /* Protected in the refcount routines. */
349 isc_refcount_t references;
350 /* Locked by lock. */
351 isc_boolean_t exiting;
354 typedef struct rbtdb_changed {
355 dns_rbtnode_t * node;
357 ISC_LINK(struct rbtdb_changed) link;
360 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
368 typedef struct rbtdb_version {
370 rbtdb_serial_t serial;
372 * Protected in the refcount routines.
373 * XXXJT: should we change the lock policy based on the refcount
376 isc_refcount_t references;
377 /* Locked by database lock. */
378 isc_boolean_t writer;
379 isc_boolean_t commit_ok;
380 rbtdb_changedlist_t changed_list;
381 rdatasetheaderlist_t resigned_list;
382 ISC_LINK(struct rbtdb_version) link;
383 dns_db_secure_t secure;
384 isc_boolean_t havensec3;
385 /* NSEC3 parameters */
388 isc_uint16_t iterations;
389 isc_uint8_t salt_length;
390 unsigned char salt[DNS_NSEC3_SALTSIZE];
393 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
398 /* Locks the data in this struct */
399 #if DNS_RBTDB_USERWLOCK
404 /* Locks the tree structure (prevents nodes appearing/disappearing) */
405 isc_rwlock_t tree_lock;
406 /* Locks for individual tree nodes */
407 unsigned int node_lock_count;
408 rbtdb_nodelock_t * node_locks;
409 dns_rbtnode_t * origin_node;
410 dns_stats_t * rrsetstats; /* cache DB only */
411 /* Locked by lock. */
413 isc_refcount_t references;
414 unsigned int attributes;
415 rbtdb_serial_t current_serial;
416 rbtdb_serial_t least_serial;
417 rbtdb_serial_t next_serial;
418 rbtdb_version_t * current_version;
419 rbtdb_version_t * future_version;
420 rbtdb_versionlist_t open_versions;
422 dns_dbnode_t *soanode;
423 dns_dbnode_t *nsnode;
426 * This is a linked list used to implement the LRU cache. There will
427 * be node_lock_count linked lists here. Nodes in bucket 1 will be
428 * placed on the linked list rdatasets[1].
430 rdatasetheaderlist_t *rdatasets;
433 * Temporary storage for stale cache nodes and dynamically deleted
434 * nodes that await being cleaned up.
436 rbtnodelist_t *deadnodes;
439 * Heaps. These are used for TTL based expiry in a cache,
440 * or for zone resigning in a zone DB. hmctx is the memory
441 * context to use for the heap (which differs from the main
442 * database memory context in the case of a cache).
447 /* Locked by tree_lock. */
451 dns_rpz_cidr_t * rpz_cidr;
454 unsigned int quantum;
457 #define RBTDB_ATTR_LOADED 0x01
458 #define RBTDB_ATTR_LOADING 0x02
465 rbtdb_version_t * rbtversion;
466 rbtdb_serial_t serial;
467 unsigned int options;
468 dns_rbtnodechain_t chain;
469 isc_boolean_t copy_name;
470 isc_boolean_t need_cleanup;
472 dns_rbtnode_t * zonecut;
473 rdatasetheader_t * zonecut_rdataset;
474 rdatasetheader_t * zonecut_sigrdataset;
475 dns_fixedname_t zonecut_name;
487 static void rdataset_disassociate(dns_rdataset_t *rdataset);
488 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
489 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
490 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
491 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
492 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
493 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
496 dns_rdataset_t *negsig);
497 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
500 dns_rdataset_t *negsig);
501 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
502 dns_rdatasetadditional_t type,
503 dns_rdatatype_t qtype,
504 dns_acache_t *acache,
507 dns_dbversion_t **versionp,
508 dns_dbnode_t **nodep,
512 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
513 dns_rdatasetadditional_t type,
514 dns_rdatatype_t qtype,
515 dns_acache_t *acache,
518 dns_dbversion_t *version,
521 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
522 dns_rdataset_t *rdataset,
523 dns_rdatasetadditional_t type,
524 dns_rdatatype_t qtype);
525 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
527 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
529 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
530 isc_boolean_t tree_locked);
531 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
532 isc_stdtime_t now, isc_boolean_t tree_locked);
533 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
534 rdatasetheader_t *newheader);
535 static void prune_tree(isc_task_t *task, isc_event_t *event);
536 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
537 static void rdataset_expire(dns_rdataset_t *rdataset);
539 static dns_rdatasetmethods_t rdataset_methods = {
540 rdataset_disassociate,
550 rdataset_getadditional,
551 rdataset_setadditional,
552 rdataset_putadditional,
557 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
558 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
559 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
560 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
561 dns_rdataset_t *rdataset);
563 static dns_rdatasetitermethods_t rdatasetiter_methods = {
564 rdatasetiter_destroy,
570 typedef struct rbtdb_rdatasetiter {
571 dns_rdatasetiter_t common;
572 rdatasetheader_t * current;
573 } rbtdb_rdatasetiter_t;
575 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
576 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
577 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
578 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
580 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
581 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
582 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
583 dns_dbnode_t **nodep,
585 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
586 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
589 static dns_dbiteratormethods_t dbiterator_methods = {
601 #define DELETION_BATCH_MAX 64
604 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
606 typedef struct rbtdb_dbiterator {
607 dns_dbiterator_t common;
608 isc_boolean_t paused;
609 isc_boolean_t new_origin;
610 isc_rwlocktype_t tree_locked;
612 dns_fixedname_t name;
613 dns_fixedname_t origin;
614 dns_rbtnodechain_t chain;
615 dns_rbtnodechain_t nsec3chain;
616 dns_rbtnodechain_t *current;
618 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
620 isc_boolean_t nsec3only;
621 isc_boolean_t nonsec3;
622 } rbtdb_dbiterator_t;
625 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
626 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
628 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
630 static void overmem(dns_db_t *db, isc_boolean_t overmem);
632 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
636 * 'init_count' is used to initialize 'newheader->count' which inturn
637 * is used to determine where in the cycle rrset-order cyclic starts.
638 * We don't lock this as we don't care about simultaneous updates.
641 * Both init_count and header->count can be ISC_UINT32_MAX.
642 * The count on the returned rdataset however can't be as
643 * that indicates that the database does not implement cyclic
646 static unsigned int init_count;
651 * If a routine is going to lock more than one lock in this module, then
652 * the locking must be done in the following order:
656 * Node Lock (Only one from the set may be locked at one time by
661 * Failure to follow this hierarchy can result in deadlock.
667 * For zone databases the node for the origin of the zone MUST NOT be deleted.
676 attach(dns_db_t *source, dns_db_t **targetp) {
677 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
679 REQUIRE(VALID_RBTDB(rbtdb));
681 isc_refcount_increment(&rbtdb->references, NULL);
687 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
688 dns_rbtdb_t *rbtdb = event->ev_arg;
692 free_rbtdb(rbtdb, ISC_TRUE, event);
696 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
697 isc_boolean_t increment)
699 dns_rdatastatstype_t statattributes = 0;
700 dns_rdatastatstype_t base = 0;
701 dns_rdatastatstype_t type;
703 /* At the moment we count statistics only for cache DB */
704 INSIST(IS_CACHE(rbtdb));
706 if (NEGATIVE(header)) {
707 if (NXDOMAIN(header))
708 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
710 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
711 base = RBTDB_RDATATYPE_EXT(header->type);
714 base = RBTDB_RDATATYPE_BASE(header->type);
716 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
718 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
720 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
724 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
729 oldttl = header->rdh_ttl;
730 header->rdh_ttl = newttl;
732 if (!IS_CACHE(rbtdb))
736 * It's possible the rbtdb is not a cache. If this is the case,
737 * we will not have a heap, and we move on. If we do, though,
738 * we might need to adjust things.
740 if (header->heap_index == 0 || newttl == oldttl)
742 idx = header->node->locknum;
743 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
745 heap = rbtdb->heaps[idx];
748 isc_heap_increased(heap, header->heap_index);
750 isc_heap_decreased(heap, header->heap_index);
754 * These functions allow the heap code to rank the priority of each
755 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
758 ttl_sooner(void *v1, void *v2) {
759 rdatasetheader_t *h1 = v1;
760 rdatasetheader_t *h2 = v2;
762 if (h1->rdh_ttl < h2->rdh_ttl)
768 resign_sooner(void *v1, void *v2) {
769 rdatasetheader_t *h1 = v1;
770 rdatasetheader_t *h2 = v2;
772 if (h1->resign < h2->resign)
778 * This function sets the heap index into the header.
781 set_index(void *what, unsigned int index) {
782 rdatasetheader_t *h = what;
784 h->heap_index = index;
788 * Work out how many nodes can be deleted in the time between two
789 * requests to the nameserver. Smooth the resulting number and use it
790 * as a estimate for the number of nodes to be deleted in the next
794 adjust_quantum(unsigned int old, isc_time_t *start) {
795 unsigned int pps = dns_pps; /* packets per second */
796 unsigned int interval;
805 interval = 1000000 / pps; /* interval in usec */
808 usecs = isc_time_microdiff(&end, start);
811 * We were unable to measure the amount of time taken.
812 * Double the nodes deleted next time.
819 new = old * interval;
820 new /= (unsigned int)usecs;
827 new = (new + old * 3) / 4;
829 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
830 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
836 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
838 isc_ondestroy_t ondest;
840 char buf[DNS_NAME_FORMATSIZE];
844 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
845 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
847 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
848 REQUIRE(rbtdb->future_version == NULL);
850 if (rbtdb->current_version != NULL) {
853 isc_refcount_decrement(&rbtdb->current_version->references,
856 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
857 isc_refcount_destroy(&rbtdb->current_version->references);
858 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
859 sizeof(rbtdb_version_t));
863 * We assume the number of remaining dead nodes is reasonably small;
864 * the overhead of unlinking all nodes here should be negligible.
866 for (i = 0; i < rbtdb->node_lock_count; i++) {
869 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
870 while (node != NULL) {
871 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
872 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
877 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
881 * pick the next tree to (start to) destroy
883 treep = &rbtdb->tree;
884 if (*treep == NULL) {
885 treep = &rbtdb->nsec;
886 if (*treep == NULL) {
887 treep = &rbtdb->nsec3;
889 * we're finished after clear cutting
896 isc_time_now(&start);
897 result = dns_rbt_destroy2(treep, rbtdb->quantum);
898 if (result == ISC_R_QUOTA) {
899 INSIST(rbtdb->task != NULL);
900 if (rbtdb->quantum != 0)
901 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
904 event = isc_event_allocate(rbtdb->common.mctx,
906 DNS_EVENT_FREESTORAGE,
909 sizeof(isc_event_t));
912 isc_task_send(rbtdb->task, &event);
915 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
919 isc_event_free(&event);
921 if (dns_name_dynamic(&rbtdb->common.origin))
922 dns_name_format(&rbtdb->common.origin, buf,
925 strcpy(buf, "<UNKNOWN>");
926 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
927 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
928 "done free_rbtdb(%s)", buf);
930 if (dns_name_dynamic(&rbtdb->common.origin))
931 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
932 for (i = 0; i < rbtdb->node_lock_count; i++) {
933 isc_refcount_destroy(&rbtdb->node_locks[i].references);
934 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
938 * Clean up LRU / re-signing order lists.
940 if (rbtdb->rdatasets != NULL) {
941 for (i = 0; i < rbtdb->node_lock_count; i++)
942 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
943 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
944 rbtdb->node_lock_count *
945 sizeof(rdatasetheaderlist_t));
948 * Clean up dead node buckets.
950 if (rbtdb->deadnodes != NULL) {
951 for (i = 0; i < rbtdb->node_lock_count; i++)
952 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
953 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
954 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
957 * Clean up heap objects.
959 if (rbtdb->heaps != NULL) {
960 for (i = 0; i < rbtdb->node_lock_count; i++)
961 isc_heap_destroy(&rbtdb->heaps[i]);
962 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
963 rbtdb->node_lock_count * sizeof(isc_heap_t *));
966 if (rbtdb->rrsetstats != NULL)
967 dns_stats_detach(&rbtdb->rrsetstats);
970 if (rbtdb->rpz_cidr != NULL)
971 dns_rpz_cidr_free(&rbtdb->rpz_cidr);
974 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
975 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
976 isc_rwlock_destroy(&rbtdb->tree_lock);
977 isc_refcount_destroy(&rbtdb->references);
978 if (rbtdb->task != NULL)
979 isc_task_detach(&rbtdb->task);
981 RBTDB_DESTROYLOCK(&rbtdb->lock);
982 rbtdb->common.magic = 0;
983 rbtdb->common.impmagic = 0;
984 ondest = rbtdb->common.ondest;
985 isc_mem_detach(&rbtdb->hmctx);
986 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
987 isc_ondestroy_notify(&ondest, rbtdb);
991 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
992 isc_boolean_t want_free = ISC_FALSE;
994 unsigned int inactive = 0;
996 /* XXX check for open versions here */
998 if (rbtdb->soanode != NULL)
999 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1000 if (rbtdb->nsnode != NULL)
1001 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1004 * Even though there are no external direct references, there still
1005 * may be nodes in use.
1007 for (i = 0; i < rbtdb->node_lock_count; i++) {
1008 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1009 rbtdb->node_locks[i].exiting = ISC_TRUE;
1010 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1011 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1017 if (inactive != 0) {
1018 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1019 rbtdb->active -= inactive;
1020 if (rbtdb->active == 0)
1021 want_free = ISC_TRUE;
1022 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1024 char buf[DNS_NAME_FORMATSIZE];
1025 if (dns_name_dynamic(&rbtdb->common.origin))
1026 dns_name_format(&rbtdb->common.origin, buf,
1029 strcpy(buf, "<UNKNOWN>");
1030 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1031 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1032 "calling free_rbtdb(%s)", buf);
1033 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1039 detach(dns_db_t **dbp) {
1040 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1043 REQUIRE(VALID_RBTDB(rbtdb));
1045 isc_refcount_decrement(&rbtdb->references, &refs);
1048 maybe_free_rbtdb(rbtdb);
1054 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1055 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1056 rbtdb_version_t *version;
1059 REQUIRE(VALID_RBTDB(rbtdb));
1061 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1062 version = rbtdb->current_version;
1063 isc_refcount_increment(&version->references, &refs);
1064 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1066 *versionp = (dns_dbversion_t *)version;
1069 static inline rbtdb_version_t *
1070 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1071 unsigned int references, isc_boolean_t writer)
1073 isc_result_t result;
1074 rbtdb_version_t *version;
1076 version = isc_mem_get(mctx, sizeof(*version));
1077 if (version == NULL)
1079 version->serial = serial;
1080 result = isc_refcount_init(&version->references, references);
1081 if (result != ISC_R_SUCCESS) {
1082 isc_mem_put(mctx, version, sizeof(*version));
1085 version->writer = writer;
1086 version->commit_ok = ISC_FALSE;
1087 ISC_LIST_INIT(version->changed_list);
1088 ISC_LIST_INIT(version->resigned_list);
1089 ISC_LINK_INIT(version, link);
1095 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1096 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1097 rbtdb_version_t *version;
1099 REQUIRE(VALID_RBTDB(rbtdb));
1100 REQUIRE(versionp != NULL && *versionp == NULL);
1101 REQUIRE(rbtdb->future_version == NULL);
1103 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1104 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1105 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1107 if (version != NULL) {
1108 version->commit_ok = ISC_TRUE;
1109 version->secure = rbtdb->current_version->secure;
1110 version->havensec3 = rbtdb->current_version->havensec3;
1111 if (version->havensec3) {
1112 version->flags = rbtdb->current_version->flags;
1113 version->iterations =
1114 rbtdb->current_version->iterations;
1115 version->hash = rbtdb->current_version->hash;
1116 version->salt_length =
1117 rbtdb->current_version->salt_length;
1118 memcpy(version->salt, rbtdb->current_version->salt,
1119 version->salt_length);
1122 version->iterations = 0;
1124 version->salt_length = 0;
1125 memset(version->salt, 0, sizeof(version->salt));
1127 rbtdb->next_serial++;
1128 rbtdb->future_version = version;
1130 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1132 if (version == NULL)
1133 return (ISC_R_NOMEMORY);
1135 *versionp = version;
1137 return (ISC_R_SUCCESS);
1141 attachversion(dns_db_t *db, dns_dbversion_t *source,
1142 dns_dbversion_t **targetp)
1144 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1145 rbtdb_version_t *rbtversion = source;
1148 REQUIRE(VALID_RBTDB(rbtdb));
1150 isc_refcount_increment(&rbtversion->references, &refs);
1153 *targetp = rbtversion;
1156 static rbtdb_changed_t *
1157 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1158 dns_rbtnode_t *node)
1160 rbtdb_changed_t *changed;
1164 * Caller must be holding the node lock if its reference must be
1165 * protected by the lock.
1168 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1170 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1172 REQUIRE(version->writer);
1174 if (changed != NULL) {
1175 dns_rbtnode_refincrement(node, &refs);
1177 changed->node = node;
1178 changed->dirty = ISC_FALSE;
1179 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1181 version->commit_ok = ISC_FALSE;
1183 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1189 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1194 unsigned char *raw; /* RDATASLAB */
1197 * The caller must be holding the corresponding node lock.
1203 raw = (unsigned char *)header + sizeof(*header);
1204 count = raw[0] * 256 + raw[1];
1207 * Sanity check: since an additional cache entry has a reference to
1208 * the original DB node (in the callback arg), there should be no
1209 * acache entries when the node can be freed.
1211 for (i = 0; i < count; i++)
1212 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1214 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1218 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1220 if (dns_name_dynamic(&(*noqname)->name))
1221 dns_name_free(&(*noqname)->name, mctx);
1222 if ((*noqname)->neg != NULL)
1223 isc_mem_put(mctx, (*noqname)->neg,
1224 dns_rdataslab_size((*noqname)->neg, 0));
1225 if ((*noqname)->negsig != NULL)
1226 isc_mem_put(mctx, (*noqname)->negsig,
1227 dns_rdataslab_size((*noqname)->negsig, 0));
1228 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1233 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1235 ISC_LINK_INIT(h, link);
1239 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1240 fprintf(stderr, "initialized header: %p\n", h);
1246 static inline rdatasetheader_t *
1247 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1249 rdatasetheader_t *h;
1251 h = isc_mem_get(mctx, sizeof(*h));
1256 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1257 fprintf(stderr, "allocated header: %p\n", h);
1259 init_rdataset(rbtdb, h);
1264 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1269 if (EXISTS(rdataset) &&
1270 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1271 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1274 idx = rdataset->node->locknum;
1275 if (ISC_LINK_LINKED(rdataset, link)) {
1276 INSIST(IS_CACHE(rbtdb));
1277 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1279 if (rdataset->heap_index != 0)
1280 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1281 rdataset->heap_index = 0;
1283 if (rdataset->noqname != NULL)
1284 free_noqname(mctx, &rdataset->noqname);
1285 if (rdataset->closest != NULL)
1286 free_noqname(mctx, &rdataset->closest);
1288 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1289 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1291 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1292 size = sizeof(*rdataset);
1294 size = dns_rdataslab_size((unsigned char *)rdataset,
1296 isc_mem_put(mctx, rdataset, size);
1300 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1301 rdatasetheader_t *header, *dcurrent;
1302 isc_boolean_t make_dirty = ISC_FALSE;
1305 * Caller must hold the node lock.
1309 * We set the IGNORE attribute on rdatasets with serial number
1310 * 'serial'. When the reference count goes to zero, these rdatasets
1311 * will be cleaned up; until that time, they will be ignored.
1313 for (header = node->data; header != NULL; header = header->next) {
1314 if (header->serial == serial) {
1315 header->attributes |= RDATASET_ATTR_IGNORE;
1316 make_dirty = ISC_TRUE;
1318 for (dcurrent = header->down;
1320 dcurrent = dcurrent->down) {
1321 if (dcurrent->serial == serial) {
1322 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1323 make_dirty = ISC_TRUE;
1332 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1334 rdatasetheader_t *d, *down_next;
1336 for (d = top->down; d != NULL; d = down_next) {
1337 down_next = d->down;
1338 free_rdataset(rbtdb, mctx, d);
1344 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1345 rdatasetheader_t *current, *top_prev, *top_next;
1346 isc_mem_t *mctx = rbtdb->common.mctx;
1349 * Caller must be holding the node lock.
1353 for (current = node->data; current != NULL; current = top_next) {
1354 top_next = current->next;
1355 clean_stale_headers(rbtdb, mctx, current);
1357 * If current is nonexistent or stale, we can clean it up.
1359 if ((current->attributes &
1360 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1361 if (top_prev != NULL)
1362 top_prev->next = current->next;
1364 node->data = current->next;
1365 free_rdataset(rbtdb, mctx, current);
1373 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1374 rbtdb_serial_t least_serial)
1376 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1377 rdatasetheader_t *top_prev, *top_next;
1378 isc_mem_t *mctx = rbtdb->common.mctx;
1379 isc_boolean_t still_dirty = ISC_FALSE;
1382 * Caller must be holding the node lock.
1384 REQUIRE(least_serial != 0);
1387 for (current = node->data; current != NULL; current = top_next) {
1388 top_next = current->next;
1391 * First, we clean up any instances of multiple rdatasets
1392 * with the same serial number, or that have the IGNORE
1396 for (dcurrent = current->down;
1398 dcurrent = down_next) {
1399 down_next = dcurrent->down;
1400 INSIST(dcurrent->serial <= dparent->serial);
1401 if (dcurrent->serial == dparent->serial ||
1403 if (down_next != NULL)
1404 down_next->next = dparent;
1405 dparent->down = down_next;
1406 free_rdataset(rbtdb, mctx, dcurrent);
1412 * We've now eliminated all IGNORE datasets with the possible
1413 * exception of current, which we now check.
1415 if (IGNORE(current)) {
1416 down_next = current->down;
1417 if (down_next == NULL) {
1418 if (top_prev != NULL)
1419 top_prev->next = current->next;
1421 node->data = current->next;
1422 free_rdataset(rbtdb, mctx, current);
1424 * current no longer exists, so we can
1425 * just continue with the loop.
1430 * Pull up current->down, making it the new
1433 if (top_prev != NULL)
1434 top_prev->next = down_next;
1436 node->data = down_next;
1437 down_next->next = top_next;
1438 free_rdataset(rbtdb, mctx, current);
1439 current = down_next;
1444 * We now try to find the first down node less than the
1448 for (dcurrent = current->down;
1450 dcurrent = down_next) {
1451 down_next = dcurrent->down;
1452 if (dcurrent->serial < least_serial)
1458 * If there is a such an rdataset, delete it and any older
1461 if (dcurrent != NULL) {
1463 down_next = dcurrent->down;
1464 INSIST(dcurrent->serial <= least_serial);
1465 free_rdataset(rbtdb, mctx, dcurrent);
1466 dcurrent = down_next;
1467 } while (dcurrent != NULL);
1468 dparent->down = NULL;
1472 * Note. The serial number of 'current' might be less than
1473 * least_serial too, but we cannot delete it because it is
1474 * the most recent version, unless it is a NONEXISTENT
1477 if (current->down != NULL) {
1478 still_dirty = ISC_TRUE;
1482 * If this is a NONEXISTENT rdataset, we can delete it.
1484 if (NONEXISTENT(current)) {
1485 if (top_prev != NULL)
1486 top_prev->next = current->next;
1488 node->data = current->next;
1489 free_rdataset(rbtdb, mctx, current);
1499 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1501 dns_rbtnode_t *nsecnode;
1502 dns_fixedname_t fname;
1504 isc_result_t result = ISC_R_UNEXPECTED;
1506 INSIST(!ISC_LINK_LINKED(node, deadlink));
1508 switch (node->nsec) {
1509 case DNS_RBT_NSEC_NORMAL:
1511 if (rbtdb->rpz_cidr != NULL) {
1512 dns_fixedname_init(&fname);
1513 name = dns_fixedname_name(&fname);
1514 dns_rbt_fullnamefromnode(node, name);
1515 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1518 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1520 case DNS_RBT_NSEC_HAS_NSEC:
1521 dns_fixedname_init(&fname);
1522 name = dns_fixedname_name(&fname);
1523 dns_rbt_fullnamefromnode(node, name);
1525 * Delete the corresponding node from the auxiliary NSEC
1526 * tree before deleting from the main tree.
1529 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1530 NULL, DNS_RBTFIND_EMPTYDATA,
1532 if (result != ISC_R_SUCCESS) {
1533 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1534 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1536 "dns_rbt_findnode(nsec): %s",
1537 isc_result_totext(result));
1539 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1541 if (result != ISC_R_SUCCESS) {
1542 isc_log_write(dns_lctx,
1543 DNS_LOGCATEGORY_DATABASE,
1544 DNS_LOGMODULE_CACHE,
1546 "delete_nsecnode(): "
1547 "dns_rbt_deletenode(nsecnode): %s",
1548 isc_result_totext(result));
1551 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1553 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1556 case DNS_RBT_NSEC_NSEC:
1557 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1559 case DNS_RBT_NSEC_NSEC3:
1560 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1563 if (result != ISC_R_SUCCESS) {
1564 isc_log_write(dns_lctx,
1565 DNS_LOGCATEGORY_DATABASE,
1566 DNS_LOGMODULE_CACHE,
1568 "delete_nsecnode(): "
1569 "dns_rbt_deletenode: %s",
1570 isc_result_totext(result));
1575 * Clean up dead nodes. These are nodes which have no references, and
1576 * have no data. They are dead but we could not or chose not to delete
1577 * them when we deleted all the data at that node because we did not want
1578 * to wait for the tree write lock.
1580 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1583 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1584 dns_rbtnode_t *node;
1585 int count = 10; /* XXXJT: should be adjustable */
1587 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1588 while (node != NULL && count > 0) {
1589 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1592 * Since we're holding a tree write lock, it should be
1593 * impossible for this node to be referenced by others.
1595 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1596 node->data == NULL);
1598 delete_node(rbtdb, node);
1600 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1606 * Caller must be holding the node lock if its reference must be protected
1610 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1611 unsigned int lockrefs, noderefs;
1612 isc_refcount_t *lockref;
1614 dns_rbtnode_refincrement0(node, &noderefs);
1615 if (noderefs == 1) { /* this is the first reference to the node */
1616 lockref = &rbtdb->node_locks[node->locknum].references;
1617 isc_refcount_increment0(lockref, &lockrefs);
1618 INSIST(lockrefs != 0);
1620 INSIST(noderefs != 0);
1624 * This function is assumed to be called when a node is newly referenced
1625 * and can be in the deadnode list. In that case the node must be retrieved
1626 * from the list because it is going to be used. In addition, if the caller
1627 * happens to hold a write lock on the tree, it's a good chance to purge dead
1629 * Note: while a new reference is gained in multiple places, there are only very
1630 * few cases where the node can be in the deadnode list (only empty nodes can
1631 * have been added to the list).
1634 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1635 isc_rwlocktype_t treelocktype)
1637 isc_boolean_t need_relock = ISC_FALSE;
1639 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1640 new_reference(rbtdb, node);
1642 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1643 isc_rwlocktype_read);
1644 if (ISC_LINK_LINKED(node, deadlink))
1645 need_relock = ISC_TRUE;
1646 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1647 treelocktype == isc_rwlocktype_write)
1648 need_relock = ISC_TRUE;
1649 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1650 isc_rwlocktype_read);
1652 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1653 isc_rwlocktype_write);
1654 if (ISC_LINK_LINKED(node, deadlink))
1655 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1657 if (treelocktype == isc_rwlocktype_write)
1658 cleanup_dead_nodes(rbtdb, node->locknum);
1659 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1660 isc_rwlocktype_write);
1663 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1667 * Caller must be holding the node lock; either the "strong", read or write
1668 * lock. Note that the lock must be held even when node references are
1669 * atomically modified; in that case the decrement operation itself does not
1670 * have to be protected, but we must avoid a race condition where multiple
1671 * threads are decreasing the reference to zero simultaneously and at least
1672 * one of them is going to free the node.
1673 * This function returns ISC_TRUE if and only if the node reference decreases
1676 static isc_boolean_t
1677 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1678 rbtdb_serial_t least_serial,
1679 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1680 isc_boolean_t pruning)
1682 isc_result_t result;
1683 isc_boolean_t write_locked;
1684 rbtdb_nodelock_t *nodelock;
1685 unsigned int refs, nrefs;
1686 int bucket = node->locknum;
1687 isc_boolean_t no_reference;
1689 nodelock = &rbtdb->node_locks[bucket];
1691 /* Handle easy and typical case first. */
1692 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1693 dns_rbtnode_refdecrement(node, &nrefs);
1694 INSIST((int)nrefs >= 0);
1696 isc_refcount_decrement(&nodelock->references, &refs);
1697 INSIST((int)refs >= 0);
1699 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1702 /* Upgrade the lock? */
1703 if (nlock == isc_rwlocktype_read) {
1704 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1705 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1707 dns_rbtnode_refdecrement(node, &nrefs);
1708 INSIST((int)nrefs >= 0);
1710 /* Restore the lock? */
1711 if (nlock == isc_rwlocktype_read)
1712 NODE_WEAKDOWNGRADE(&nodelock->lock);
1716 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1717 if (IS_CACHE(rbtdb))
1718 clean_cache_node(rbtdb, node);
1720 if (least_serial == 0) {
1722 * Caller doesn't know the least serial.
1725 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1726 least_serial = rbtdb->least_serial;
1727 RBTDB_UNLOCK(&rbtdb->lock,
1728 isc_rwlocktype_read);
1730 clean_zone_node(rbtdb, node, least_serial);
1734 isc_refcount_decrement(&nodelock->references, &refs);
1735 INSIST((int)refs >= 0);
1738 * XXXDCL should this only be done for cache zones?
1740 if (node->data != NULL || node->down != NULL) {
1741 /* Restore the lock? */
1742 if (nlock == isc_rwlocktype_read)
1743 NODE_WEAKDOWNGRADE(&nodelock->lock);
1748 * Attempt to switch to a write lock on the tree. If this fails,
1749 * we will add this node to a linked list of nodes in this locking
1750 * bucket which we will free later.
1752 if (tlock != isc_rwlocktype_write) {
1754 * Locking hierarchy notwithstanding, we don't need to free
1755 * the node lock before acquiring the tree write lock because
1756 * we only do a trylock.
1758 if (tlock == isc_rwlocktype_read)
1759 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1761 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1762 isc_rwlocktype_write);
1763 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1764 result == ISC_R_LOCKBUSY);
1766 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1768 write_locked = ISC_TRUE;
1770 no_reference = ISC_TRUE;
1771 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1773 * We can now delete the node if the reference counter is
1774 * zero. This should be typically the case, but a different
1775 * thread may still gain a (new) reference just before the
1776 * current thread locks the tree (e.g., in findnode()).
1780 * If this node is the only one in the level it's in, deleting
1781 * this node may recursively make its parent the only node in
1782 * the parent level; if so, and if no one is currently using
1783 * the parent node, this is almost the only opportunity to
1784 * clean it up. But the recursive cleanup is not that trivial
1785 * since the child and parent may be in different lock buckets,
1786 * which would cause a lock order reversal problem. To avoid
1787 * the trouble, we'll dispatch a separate event for batch
1788 * cleaning. We need to check whether we're deleting the node
1789 * as a result of pruning to avoid infinite dispatching.
1790 * Note: pruning happens only when a task has been set for the
1791 * rbtdb. If the user of the rbtdb chooses not to set a task,
1792 * it's their responsibility to purge stale leaves (e.g. by
1793 * periodic walk-through).
1795 if (!pruning && node->parent != NULL &&
1796 node->parent->down == node && node->left == NULL &&
1797 node->right == NULL && rbtdb->task != NULL) {
1801 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1804 sizeof(isc_event_t));
1806 new_reference(rbtdb, node);
1808 attach((dns_db_t *)rbtdb, &db);
1810 isc_task_send(rbtdb->task, &ev);
1811 no_reference = ISC_FALSE;
1814 * XXX: this is a weird situation. We could
1815 * ignore this error case, but then the stale
1816 * node will unlikely be purged except via a
1817 * rare condition such as manual cleanup. So
1818 * we queue it in the deadnodes list, hoping
1819 * the memory shortage is temporary and the node
1820 * will be deleted later.
1822 isc_log_write(dns_lctx,
1823 DNS_LOGCATEGORY_DATABASE,
1824 DNS_LOGMODULE_CACHE,
1826 "decrement_reference: failed to "
1827 "allocate pruning event");
1828 INSIST(!ISC_LINK_LINKED(node, deadlink));
1829 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1833 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1834 char printname[DNS_NAME_FORMATSIZE];
1836 isc_log_write(dns_lctx,
1837 DNS_LOGCATEGORY_DATABASE,
1838 DNS_LOGMODULE_CACHE,
1840 "decrement_reference: "
1841 "delete from rbt: %p %s",
1843 dns_rbt_formatnodename(node,
1845 sizeof(printname)));
1848 delete_node(rbtdb, node);
1850 } else if (dns_rbtnode_refcurrent(node) == 0) {
1851 INSIST(!ISC_LINK_LINKED(node, deadlink));
1852 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1854 no_reference = ISC_FALSE;
1856 /* Restore the lock? */
1857 if (nlock == isc_rwlocktype_read)
1858 NODE_WEAKDOWNGRADE(&nodelock->lock);
1861 * Relock a read lock, or unlock the write lock if no lock was held.
1863 if (tlock == isc_rwlocktype_none)
1865 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1867 if (tlock == isc_rwlocktype_read)
1869 isc_rwlock_downgrade(&rbtdb->tree_lock);
1871 return (no_reference);
1875 * Prune the tree by recursively cleaning-up single leaves. In the worst
1876 * case, the number of iteration is the number of tree levels, which is at
1877 * most the maximum number of domain name labels, i.e, 127. In practice, this
1878 * should be much smaller (only a few times), and even the worst case would be
1879 * acceptable for a single event.
1882 prune_tree(isc_task_t *task, isc_event_t *event) {
1883 dns_rbtdb_t *rbtdb = event->ev_sender;
1884 dns_rbtnode_t *node = event->ev_arg;
1885 dns_rbtnode_t *parent;
1886 unsigned int locknum;
1890 isc_event_free(&event);
1892 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1893 locknum = node->locknum;
1894 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1896 parent = node->parent;
1897 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1898 isc_rwlocktype_write, ISC_TRUE);
1900 if (parent != NULL && parent->down == NULL) {
1902 * node was the only down child of the parent and has
1903 * just been removed. We'll then need to examine the
1904 * parent. Keep the lock if possible; otherwise,
1905 * release the old lock and acquire one for the parent.
1907 if (parent->locknum != locknum) {
1908 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1909 isc_rwlocktype_write);
1910 locknum = parent->locknum;
1911 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1912 isc_rwlocktype_write);
1916 * We need to gain a reference to the node before
1917 * decrementing it in the next iteration. In addition,
1918 * if the node is in the dead-nodes list, extract it
1919 * from the list beforehand as we do in
1920 * reactivate_node().
1922 new_reference(rbtdb, parent);
1923 if (ISC_LINK_LINKED(parent, deadlink)) {
1924 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1931 } while (node != NULL);
1932 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1933 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1935 detach((dns_db_t **)&rbtdb);
1939 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1940 rbtdb_changedlist_t *cleanup_list)
1943 * Caller must be holding the database lock.
1946 rbtdb->least_serial = version->serial;
1947 *cleanup_list = version->changed_list;
1948 ISC_LIST_INIT(version->changed_list);
1952 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1953 rbtdb_changed_t *changed, *next_changed;
1956 * If the changed record is dirty, then
1957 * an update created multiple versions of
1958 * a given rdataset. We keep this list
1959 * until we're the least open version, at
1960 * which point it's safe to get rid of any
1963 * If the changed record isn't dirty, then
1964 * we don't need it anymore since we're
1965 * committing and not rolling back.
1967 * The caller must be holding the database lock.
1969 for (changed = HEAD(version->changed_list);
1971 changed = next_changed) {
1972 next_changed = NEXT(changed, link);
1973 if (!changed->dirty) {
1974 UNLINK(version->changed_list,
1976 APPEND(*cleanup_list,
1983 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1991 dns_rdataset_t keyset;
1992 dns_rdataset_t nsecset, signsecset;
1993 isc_boolean_t haszonekey = ISC_FALSE;
1994 isc_boolean_t hasnsec = ISC_FALSE;
1995 isc_result_t result;
1997 dns_rdataset_init(&keyset);
1998 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1999 0, 0, &keyset, NULL);
2000 if (result == ISC_R_SUCCESS) {
2001 dns_rdata_t keyrdata = DNS_RDATA_INIT;
2002 result = dns_rdataset_first(&keyset);
2003 while (result == ISC_R_SUCCESS) {
2004 dns_rdataset_current(&keyset, &keyrdata);
2005 if (dns_zonekey_iszonekey(&keyrdata)) {
2006 haszonekey = ISC_TRUE;
2009 result = dns_rdataset_next(&keyset);
2011 dns_rdataset_disassociate(&keyset);
2014 version->secure = dns_db_insecure;
2015 version->havensec3 = ISC_FALSE;
2019 dns_rdataset_init(&nsecset);
2020 dns_rdataset_init(&signsecset);
2021 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2022 0, 0, &nsecset, &signsecset);
2023 if (result == ISC_R_SUCCESS) {
2024 if (dns_rdataset_isassociated(&signsecset)) {
2026 dns_rdataset_disassociate(&signsecset);
2028 dns_rdataset_disassociate(&nsecset);
2031 setnsec3parameters(db, version);
2034 * Do we have a valid NSEC/NSEC3 chain?
2036 if (version->havensec3 || hasnsec)
2037 version->secure = dns_db_secure;
2039 version->secure = dns_db_insecure;
2044 * Walk the origin node looking for NSEC3PARAM records.
2045 * Cache the nsec3 parameters.
2049 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2050 dns_rbtnode_t *node;
2051 dns_rdata_nsec3param_t nsec3param;
2052 dns_rdata_t rdata = DNS_RDATA_INIT;
2053 isc_region_t region;
2054 isc_result_t result;
2055 rdatasetheader_t *header, *header_next;
2056 unsigned char *raw; /* RDATASLAB */
2057 unsigned int count, length;
2058 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2060 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2061 version->havensec3 = ISC_FALSE;
2062 node = rbtdb->origin_node;
2063 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2064 isc_rwlocktype_read);
2065 for (header = node->data;
2067 header = header_next) {
2068 header_next = header->next;
2070 if (header->serial <= version->serial &&
2072 if (NONEXISTENT(header))
2076 header = header->down;
2077 } while (header != NULL);
2079 if (header != NULL &&
2080 (header->type == dns_rdatatype_nsec3param)) {
2082 * Find A NSEC3PARAM with a supported algorithm.
2084 raw = (unsigned char *)header + sizeof(*header);
2085 count = raw[0] * 256 + raw[1]; /* count */
2086 #if DNS_RDATASET_FIXED
2087 raw += count * 4 + 2;
2091 while (count-- > 0U) {
2092 length = raw[0] * 256 + raw[1];
2093 #if DNS_RDATASET_FIXED
2099 region.length = length;
2101 dns_rdata_fromregion(&rdata,
2102 rbtdb->common.rdclass,
2103 dns_rdatatype_nsec3param,
2105 result = dns_rdata_tostruct(&rdata,
2108 INSIST(result == ISC_R_SUCCESS);
2109 dns_rdata_reset(&rdata);
2111 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2112 !dns_nsec3_supportedhash(nsec3param.hash))
2115 if (nsec3param.flags != 0)
2118 memcpy(version->salt, nsec3param.salt,
2119 nsec3param.salt_length);
2120 version->hash = nsec3param.hash;
2121 version->salt_length = nsec3param.salt_length;
2122 version->iterations = nsec3param.iterations;
2123 version->flags = nsec3param.flags;
2124 version->havensec3 = ISC_TRUE;
2126 * Look for a better algorithm than the
2127 * unknown test algorithm.
2129 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2135 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2136 isc_rwlocktype_read);
2137 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2142 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2143 dns_rbtdb_t *rbtdb = event->ev_arg;
2144 isc_boolean_t again = ISC_FALSE;
2145 unsigned int locknum;
2148 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2149 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2150 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2151 isc_rwlocktype_write);
2152 cleanup_dead_nodes(rbtdb, locknum);
2153 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2155 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2156 isc_rwlocktype_write);
2158 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2160 isc_task_send(task, &event);
2162 isc_event_free(&event);
2163 isc_refcount_decrement(&rbtdb->references, &refs);
2165 maybe_free_rbtdb(rbtdb);
2170 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2171 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2172 rbtdb_version_t *version, *cleanup_version, *least_greater;
2173 isc_boolean_t rollback = ISC_FALSE;
2174 rbtdb_changedlist_t cleanup_list;
2175 rdatasetheaderlist_t resigned_list;
2176 rbtdb_changed_t *changed, *next_changed;
2177 rbtdb_serial_t serial, least_serial;
2178 dns_rbtnode_t *rbtnode;
2180 rdatasetheader_t *header;
2181 isc_boolean_t writer;
2183 REQUIRE(VALID_RBTDB(rbtdb));
2184 version = (rbtdb_version_t *)*versionp;
2186 cleanup_version = NULL;
2187 ISC_LIST_INIT(cleanup_list);
2188 ISC_LIST_INIT(resigned_list);
2190 isc_refcount_decrement(&version->references, &refs);
2191 if (refs > 0) { /* typical and easy case first */
2193 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2194 INSIST(!version->writer);
2195 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2200 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2201 serial = version->serial;
2202 writer = version->writer;
2203 if (version->writer) {
2206 rbtdb_version_t *cur_version;
2208 INSIST(version->commit_ok);
2209 INSIST(version == rbtdb->future_version);
2211 * The current version is going to be replaced.
2212 * Release the (likely last) reference to it from the
2213 * DB itself and unlink it from the open list.
2215 cur_version = rbtdb->current_version;
2216 isc_refcount_decrement(&cur_version->references,
2219 if (cur_version->serial == rbtdb->least_serial)
2220 INSIST(EMPTY(cur_version->changed_list));
2221 UNLINK(rbtdb->open_versions,
2224 if (EMPTY(rbtdb->open_versions)) {
2226 * We're going to become the least open
2229 make_least_version(rbtdb, version,
2233 * Some other open version is the
2234 * least version. We can't cleanup
2235 * records that were changed in this
2236 * version because the older versions
2237 * may still be in use by an open
2240 * We can, however, discard the
2241 * changed records for things that
2242 * we've added that didn't exist in
2245 cleanup_nondirty(version, &cleanup_list);
2248 * If the (soon to be former) current version
2249 * isn't being used by anyone, we can clean
2253 cleanup_version = cur_version;
2254 APPENDLIST(version->changed_list,
2255 cleanup_version->changed_list,
2259 * Become the current version.
2261 version->writer = ISC_FALSE;
2262 rbtdb->current_version = version;
2263 rbtdb->current_serial = version->serial;
2264 rbtdb->future_version = NULL;
2267 * Keep the current version in the open list, and
2268 * gain a reference for the DB itself (see the DB
2269 * creation function below). This must be the only
2270 * case where we need to increment the counter from
2271 * zero and need to use isc_refcount_increment0().
2273 isc_refcount_increment0(&version->references,
2275 INSIST(cur_ref == 1);
2276 PREPEND(rbtdb->open_versions,
2277 rbtdb->current_version, link);
2278 resigned_list = version->resigned_list;
2279 ISC_LIST_INIT(version->resigned_list);
2282 * We're rolling back this transaction.
2284 cleanup_list = version->changed_list;
2285 ISC_LIST_INIT(version->changed_list);
2286 resigned_list = version->resigned_list;
2287 ISC_LIST_INIT(version->resigned_list);
2288 rollback = ISC_TRUE;
2289 cleanup_version = version;
2290 rbtdb->future_version = NULL;
2293 if (version != rbtdb->current_version) {
2295 * There are no external or internal references
2296 * to this version and it can be cleaned up.
2298 cleanup_version = version;
2301 * Find the version with the least serial
2302 * number greater than ours.
2304 least_greater = PREV(version, link);
2305 if (least_greater == NULL)
2306 least_greater = rbtdb->current_version;
2308 INSIST(version->serial < least_greater->serial);
2310 * Is this the least open version?
2312 if (version->serial == rbtdb->least_serial) {
2314 * Yes. Install the new least open
2317 make_least_version(rbtdb,
2322 * Add any unexecuted cleanups to
2323 * those of the least greater version.
2325 APPENDLIST(least_greater->changed_list,
2326 version->changed_list,
2329 } else if (version->serial == rbtdb->least_serial)
2330 INSIST(EMPTY(version->changed_list));
2331 UNLINK(rbtdb->open_versions, version, link);
2333 least_serial = rbtdb->least_serial;
2334 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2337 * Update the zone's secure status.
2339 if (writer && commit && !IS_CACHE(rbtdb))
2340 iszonesecure(db, version, rbtdb->origin_node);
2342 if (cleanup_version != NULL) {
2343 INSIST(EMPTY(cleanup_version->changed_list));
2344 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2345 sizeof(*cleanup_version));
2349 * Commit/rollback re-signed headers.
2351 for (header = HEAD(resigned_list);
2353 header = HEAD(resigned_list)) {
2356 ISC_LIST_UNLINK(resigned_list, header, link);
2358 lock = &rbtdb->node_locks[header->node->locknum].lock;
2359 NODE_LOCK(lock, isc_rwlocktype_write);
2361 resign_insert(rbtdb, header->node->locknum, header);
2362 decrement_reference(rbtdb, header->node, least_serial,
2363 isc_rwlocktype_write, isc_rwlocktype_none,
2365 NODE_UNLOCK(lock, isc_rwlocktype_write);
2368 if (!EMPTY(cleanup_list)) {
2369 isc_event_t *event = NULL;
2370 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2372 if (rbtdb->task != NULL)
2373 event = isc_event_allocate(rbtdb->common.mctx, NULL,
2374 DNS_EVENT_RBTDEADNODES,
2375 cleanup_dead_nodes_callback,
2376 rbtdb, sizeof(isc_event_t));
2377 if (event == NULL) {
2379 * We acquire a tree write lock here in order to make
2380 * sure that stale nodes will be removed in
2381 * decrement_reference(). If we didn't have the lock,
2382 * those nodes could miss the chance to be removed
2383 * until the server stops. The write lock is
2384 * expensive, but this event should be rare enough
2385 * to justify the cost.
2387 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2388 tlock = isc_rwlocktype_write;
2391 for (changed = HEAD(cleanup_list);
2393 changed = next_changed) {
2396 next_changed = NEXT(changed, link);
2397 rbtnode = changed->node;
2398 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2400 NODE_LOCK(lock, isc_rwlocktype_write);
2402 * This is a good opportunity to purge any dead nodes,
2406 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2409 rollback_node(rbtnode, serial);
2410 decrement_reference(rbtdb, rbtnode, least_serial,
2411 isc_rwlocktype_write, tlock,
2414 NODE_UNLOCK(lock, isc_rwlocktype_write);
2416 isc_mem_put(rbtdb->common.mctx, changed,
2419 if (event != NULL) {
2420 isc_refcount_increment(&rbtdb->references, NULL);
2421 isc_task_send(rbtdb->task, &event);
2423 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2431 * Add the necessary magic for the wildcard name 'name'
2432 * to be found in 'rbtdb'.
2434 * In order for wildcard matching to work correctly in
2435 * zone_find(), we must ensure that a node for the wildcarding
2436 * level exists in the database, and has its 'find_callback'
2437 * and 'wild' bits set.
2439 * E.g. if the wildcard name is "*.sub.example." then we
2440 * must ensure that "sub.example." exists and is marked as
2444 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2445 isc_result_t result;
2446 dns_name_t foundname;
2447 dns_offsets_t offsets;
2449 dns_rbtnode_t *node = NULL;
2451 dns_name_init(&foundname, offsets);
2452 n = dns_name_countlabels(name);
2455 dns_name_getlabelsequence(name, 1, n, &foundname);
2456 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2457 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2459 if (result == ISC_R_SUCCESS)
2460 node->nsec = DNS_RBT_NSEC_NORMAL;
2461 node->find_callback = 1;
2463 return (ISC_R_SUCCESS);
2467 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2468 isc_result_t result;
2469 dns_name_t foundname;
2470 dns_offsets_t offsets;
2471 unsigned int n, l, i;
2473 dns_name_init(&foundname, offsets);
2474 n = dns_name_countlabels(name);
2475 l = dns_name_countlabels(&rbtdb->common.origin);
2478 dns_rbtnode_t *node = NULL; /* dummy */
2479 dns_name_getlabelsequence(name, n - i, i, &foundname);
2480 if (dns_name_iswildcard(&foundname)) {
2481 result = add_wildcard_magic(rbtdb, &foundname);
2482 if (result != ISC_R_SUCCESS)
2484 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2486 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2488 if (result == ISC_R_SUCCESS)
2489 node->nsec = DNS_RBT_NSEC_NORMAL;
2493 return (ISC_R_SUCCESS);
2497 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2498 dns_dbnode_t **nodep)
2500 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2501 dns_rbtnode_t *node = NULL;
2502 dns_name_t nodename;
2503 isc_result_t result;
2504 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2506 REQUIRE(VALID_RBTDB(rbtdb));
2508 dns_name_init(&nodename, NULL);
2509 RWLOCK(&rbtdb->tree_lock, locktype);
2510 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2511 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2512 if (result != ISC_R_SUCCESS) {
2513 RWUNLOCK(&rbtdb->tree_lock, locktype);
2515 if (result == DNS_R_PARTIALMATCH)
2516 result = ISC_R_NOTFOUND;
2520 * It would be nice to try to upgrade the lock instead of
2521 * unlocking then relocking.
2523 locktype = isc_rwlocktype_write;
2524 RWLOCK(&rbtdb->tree_lock, locktype);
2526 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2527 if (result == ISC_R_SUCCESS) {
2529 if (rbtdb->rpz_cidr != NULL) {
2530 dns_fixedname_t fnamef;
2533 dns_fixedname_init(&fnamef);
2534 fname = dns_fixedname_name(&fnamef);
2535 dns_rbt_fullnamefromnode(node, fname);
2536 dns_rpz_cidr_addip(rbtdb->rpz_cidr, fname);
2539 dns_rbt_namefromnode(node, &nodename);
2540 #ifdef DNS_RBT_USEHASH
2541 node->locknum = node->hashval % rbtdb->node_lock_count;
2543 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2544 rbtdb->node_lock_count;
2546 add_empty_wildcards(rbtdb, name);
2548 if (dns_name_iswildcard(name)) {
2549 result = add_wildcard_magic(rbtdb, name);
2550 if (result != ISC_R_SUCCESS) {
2551 RWUNLOCK(&rbtdb->tree_lock, locktype);
2555 } else if (result != ISC_R_EXISTS) {
2556 RWUNLOCK(&rbtdb->tree_lock, locktype);
2560 reactivate_node(rbtdb, node, locktype);
2561 RWUNLOCK(&rbtdb->tree_lock, locktype);
2563 *nodep = (dns_dbnode_t *)node;
2565 return (ISC_R_SUCCESS);
2569 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2570 dns_dbnode_t **nodep)
2572 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2573 dns_rbtnode_t *node = NULL;
2574 dns_name_t nodename;
2575 isc_result_t result;
2576 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2578 REQUIRE(VALID_RBTDB(rbtdb));
2580 dns_name_init(&nodename, NULL);
2581 RWLOCK(&rbtdb->tree_lock, locktype);
2582 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2583 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2584 if (result != ISC_R_SUCCESS) {
2585 RWUNLOCK(&rbtdb->tree_lock, locktype);
2587 if (result == DNS_R_PARTIALMATCH)
2588 result = ISC_R_NOTFOUND;
2592 * It would be nice to try to upgrade the lock instead of
2593 * unlocking then relocking.
2595 locktype = isc_rwlocktype_write;
2596 RWLOCK(&rbtdb->tree_lock, locktype);
2598 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2599 if (result == ISC_R_SUCCESS) {
2600 dns_rbt_namefromnode(node, &nodename);
2601 #ifdef DNS_RBT_USEHASH
2602 node->locknum = node->hashval % rbtdb->node_lock_count;
2604 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2605 rbtdb->node_lock_count;
2607 node->nsec = DNS_RBT_NSEC_NSEC3;
2608 } else if (result != ISC_R_EXISTS) {
2609 RWUNLOCK(&rbtdb->tree_lock, locktype);
2613 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2615 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2616 new_reference(rbtdb, node);
2617 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2618 RWUNLOCK(&rbtdb->tree_lock, locktype);
2620 *nodep = (dns_dbnode_t *)node;
2622 return (ISC_R_SUCCESS);
2626 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2627 rbtdb_search_t *search = arg;
2628 rdatasetheader_t *header, *header_next;
2629 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2630 rdatasetheader_t *found;
2631 isc_result_t result;
2632 dns_rbtnode_t *onode;
2635 * We only want to remember the topmost zone cut, since it's the one
2636 * that counts, so we'll just continue if we've already found a
2639 if (search->zonecut != NULL)
2640 return (DNS_R_CONTINUE);
2643 result = DNS_R_CONTINUE;
2644 onode = search->rbtdb->origin_node;
2646 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2647 isc_rwlocktype_read);
2650 * Look for an NS or DNAME rdataset active in our version.
2653 dname_header = NULL;
2654 sigdname_header = NULL;
2655 for (header = node->data; header != NULL; header = header_next) {
2656 header_next = header->next;
2657 if (header->type == dns_rdatatype_ns ||
2658 header->type == dns_rdatatype_dname ||
2659 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2661 if (header->serial <= search->serial &&
2664 * Is this a "this rdataset doesn't
2667 if (NONEXISTENT(header))
2671 header = header->down;
2672 } while (header != NULL);
2673 if (header != NULL) {
2674 if (header->type == dns_rdatatype_dname)
2675 dname_header = header;
2676 else if (header->type ==
2677 RBTDB_RDATATYPE_SIGDNAME)
2678 sigdname_header = header;
2679 else if (node != onode ||
2680 IS_STUB(search->rbtdb)) {
2682 * We've found an NS rdataset that
2683 * isn't at the origin node. We check
2684 * that they're not at the origin node,
2685 * because otherwise we'd erroneously
2686 * treat the zone top as if it were
2696 * Did we find anything?
2698 if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2699 ns_header != NULL) {
2701 * Note that NS has precedence over DNAME if both exist
2702 * in a zone. Otherwise DNAME take precedence over NS.
2705 search->zonecut_sigrdataset = NULL;
2706 } else if (dname_header != NULL) {
2707 found = dname_header;
2708 search->zonecut_sigrdataset = sigdname_header;
2709 } else if (ns_header != NULL) {
2711 search->zonecut_sigrdataset = NULL;
2714 if (found != NULL) {
2716 * We increment the reference count on node to ensure that
2717 * search->zonecut_rdataset will still be valid later.
2719 new_reference(search->rbtdb, node);
2720 search->zonecut = node;
2721 search->zonecut_rdataset = found;
2722 search->need_cleanup = ISC_TRUE;
2724 * Since we've found a zonecut, anything beneath it is
2725 * glue and is not subject to wildcard matching, so we
2726 * may clear search->wild.
2728 search->wild = ISC_FALSE;
2729 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2731 * If the caller does not want to find glue, then
2732 * this is the best answer and the search should
2735 result = DNS_R_PARTIALMATCH;
2740 * The search will continue beneath the zone cut.
2741 * This may or may not be the best match. In case it
2742 * is, we need to remember the node name.
2744 zcname = dns_fixedname_name(&search->zonecut_name);
2745 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2747 search->copy_name = ISC_TRUE;
2751 * There is no zonecut at this node which is active in this
2754 * If this is a "wild" node and the caller hasn't disabled
2755 * wildcard matching, remember that we've seen a wild node
2756 * in case we need to go searching for wildcard matches
2759 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2760 search->wild = ISC_TRUE;
2763 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2764 isc_rwlocktype_read);
2770 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2771 rdatasetheader_t *header, isc_stdtime_t now,
2772 dns_rdataset_t *rdataset)
2774 unsigned char *raw; /* RDATASLAB */
2777 * Caller must be holding the node reader lock.
2778 * XXXJT: technically, we need a writer lock, since we'll increment
2779 * the header count below. However, since the actual counter value
2780 * doesn't matter, we prioritize performance here. (We may want to
2781 * use atomic increment when available).
2784 if (rdataset == NULL)
2787 new_reference(rbtdb, node);
2789 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2791 rdataset->methods = &rdataset_methods;
2792 rdataset->rdclass = rbtdb->common.rdclass;
2793 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2794 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2795 rdataset->ttl = header->rdh_ttl - now;
2796 rdataset->trust = header->trust;
2797 if (NEGATIVE(header))
2798 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2799 if (NXDOMAIN(header))
2800 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2802 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2803 rdataset->private1 = rbtdb;
2804 rdataset->private2 = node;
2805 raw = (unsigned char *)header + sizeof(*header);
2806 rdataset->private3 = raw;
2807 rdataset->count = header->count++;
2808 if (rdataset->count == ISC_UINT32_MAX)
2809 rdataset->count = 0;
2812 * Reset iterator state.
2814 rdataset->privateuint4 = 0;
2815 rdataset->private5 = NULL;
2818 * Add noqname proof.
2820 rdataset->private6 = header->noqname;
2821 if (rdataset->private6 != NULL)
2822 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2823 rdataset->private7 = header->closest;
2824 if (rdataset->private7 != NULL)
2825 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2828 * Copy out re-signing information.
2830 if (RESIGN(header)) {
2831 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2832 rdataset->resign = header->resign;
2834 rdataset->resign = 0;
2837 static inline isc_result_t
2838 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2839 dns_name_t *foundname, dns_rdataset_t *rdataset,
2840 dns_rdataset_t *sigrdataset)
2842 isc_result_t result;
2844 rbtdb_rdatatype_t type;
2845 dns_rbtnode_t *node;
2848 * The caller MUST NOT be holding any node locks.
2851 node = search->zonecut;
2852 type = search->zonecut_rdataset->type;
2855 * If we have to set foundname, we do it before anything else.
2856 * If we were to set foundname after we had set nodep or bound the
2857 * rdataset, then we'd have to undo that work if dns_name_copy()
2858 * failed. By setting foundname first, there's nothing to undo if
2861 if (foundname != NULL && search->copy_name) {
2862 zcname = dns_fixedname_name(&search->zonecut_name);
2863 result = dns_name_copy(zcname, foundname, NULL);
2864 if (result != ISC_R_SUCCESS)
2867 if (nodep != NULL) {
2869 * Note that we don't have to increment the node's reference
2870 * count here because we're going to use the reference we
2871 * already have in the search block.
2874 search->need_cleanup = ISC_FALSE;
2876 if (rdataset != NULL) {
2877 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2878 isc_rwlocktype_read);
2879 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2880 search->now, rdataset);
2881 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2882 bind_rdataset(search->rbtdb, node,
2883 search->zonecut_sigrdataset,
2884 search->now, sigrdataset);
2885 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2886 isc_rwlocktype_read);
2889 if (type == dns_rdatatype_dname)
2890 return (DNS_R_DNAME);
2891 return (DNS_R_DELEGATION);
2894 static inline isc_boolean_t
2895 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2896 dns_rbtnode_t *node)
2898 unsigned char *raw; /* RDATASLAB */
2899 unsigned int count, size;
2901 isc_boolean_t valid = ISC_FALSE;
2902 dns_offsets_t offsets;
2903 isc_region_t region;
2904 rdatasetheader_t *header;
2907 * No additional locking is required.
2911 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2912 * if it occurs at a zone cut, but is not valid below it.
2914 if (type == dns_rdatatype_ns) {
2915 if (node != search->zonecut) {
2918 } else if (type != dns_rdatatype_a &&
2919 type != dns_rdatatype_aaaa &&
2920 type != dns_rdatatype_a6) {
2924 header = search->zonecut_rdataset;
2925 raw = (unsigned char *)header + sizeof(*header);
2926 count = raw[0] * 256 + raw[1];
2927 #if DNS_RDATASET_FIXED
2928 raw += 2 + (4 * count);
2935 size = raw[0] * 256 + raw[1];
2936 #if DNS_RDATASET_FIXED
2942 region.length = size;
2945 * XXX Until we have rdata structures, we have no choice but
2946 * to directly access the rdata format.
2948 dns_name_init(&ns_name, offsets);
2949 dns_name_fromregion(&ns_name, ®ion);
2950 if (dns_name_compare(&ns_name, name) == 0) {
2959 static inline isc_boolean_t
2960 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2963 dns_fixedname_t fnext;
2964 dns_fixedname_t forigin;
2969 dns_rbtnode_t *node;
2970 isc_result_t result;
2971 isc_boolean_t answer = ISC_FALSE;
2972 rdatasetheader_t *header;
2974 rbtdb = search->rbtdb;
2976 dns_name_init(&prefix, NULL);
2977 dns_fixedname_init(&fnext);
2978 next = dns_fixedname_name(&fnext);
2979 dns_fixedname_init(&forigin);
2980 origin = dns_fixedname_name(&forigin);
2982 result = dns_rbtnodechain_next(chain, NULL, NULL);
2983 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2985 result = dns_rbtnodechain_current(chain, &prefix,
2987 if (result != ISC_R_SUCCESS)
2989 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2990 isc_rwlocktype_read);
2991 for (header = node->data;
2993 header = header->next) {
2994 if (header->serial <= search->serial &&
2995 !IGNORE(header) && EXISTS(header))
2998 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2999 isc_rwlocktype_read);
3002 result = dns_rbtnodechain_next(chain, NULL, NULL);
3004 if (result == ISC_R_SUCCESS)
3005 result = dns_name_concatenate(&prefix, origin, next, NULL);
3006 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
3011 static inline isc_boolean_t
3012 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
3013 dns_fixedname_t fnext;
3014 dns_fixedname_t forigin;
3015 dns_fixedname_t fprev;
3023 dns_rbtnode_t *node;
3024 dns_rbtnodechain_t chain;
3025 isc_boolean_t check_next = ISC_TRUE;
3026 isc_boolean_t check_prev = ISC_TRUE;
3027 isc_boolean_t answer = ISC_FALSE;
3028 isc_result_t result;
3029 rdatasetheader_t *header;
3032 rbtdb = search->rbtdb;
3034 dns_name_init(&name, NULL);
3035 dns_name_init(&tname, NULL);
3036 dns_name_init(&rname, NULL);
3037 dns_fixedname_init(&fnext);
3038 next = dns_fixedname_name(&fnext);
3039 dns_fixedname_init(&fprev);
3040 prev = dns_fixedname_name(&fprev);
3041 dns_fixedname_init(&forigin);
3042 origin = dns_fixedname_name(&forigin);
3045 * Find if qname is at or below a empty node.
3046 * Use our own copy of the chain.
3049 chain = search->chain;
3052 result = dns_rbtnodechain_current(&chain, &name,
3054 if (result != ISC_R_SUCCESS)
3056 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3057 isc_rwlocktype_read);
3058 for (header = node->data;
3060 header = header->next) {
3061 if (header->serial <= search->serial &&
3062 !IGNORE(header) && EXISTS(header))
3065 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3066 isc_rwlocktype_read);
3069 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3070 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3071 if (result == ISC_R_SUCCESS)
3072 result = dns_name_concatenate(&name, origin, prev, NULL);
3073 if (result != ISC_R_SUCCESS)
3074 check_prev = ISC_FALSE;
3076 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3077 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3079 result = dns_rbtnodechain_current(&chain, &name,
3081 if (result != ISC_R_SUCCESS)
3083 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3084 isc_rwlocktype_read);
3085 for (header = node->data;
3087 header = header->next) {
3088 if (header->serial <= search->serial &&
3089 !IGNORE(header) && EXISTS(header))
3092 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3093 isc_rwlocktype_read);
3096 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3098 if (result == ISC_R_SUCCESS)
3099 result = dns_name_concatenate(&name, origin, next, NULL);
3100 if (result != ISC_R_SUCCESS)
3101 check_next = ISC_FALSE;
3103 dns_name_clone(qname, &rname);
3106 * Remove the wildcard label to find the terminal name.
3108 n = dns_name_countlabels(wname);
3109 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3112 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3113 (check_next && dns_name_issubdomain(next, &rname))) {
3118 * Remove the left hand label.
3120 n = dns_name_countlabels(&rname);
3121 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3122 } while (!dns_name_equal(&rname, &tname));
3126 static inline isc_result_t
3127 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3131 dns_rbtnode_t *node, *level_node, *wnode;
3132 rdatasetheader_t *header;
3133 isc_result_t result = ISC_R_NOTFOUND;
3136 dns_fixedname_t fwname;
3138 isc_boolean_t done, wild, active;
3139 dns_rbtnodechain_t wchain;
3142 * Caller must be holding the tree lock and MUST NOT be holding
3147 * Examine each ancestor level. If the level's wild bit
3148 * is set, then construct the corresponding wildcard name and
3149 * search for it. If the wildcard node exists, and is active in
3150 * this version, we're done. If not, then we next check to see
3151 * if the ancestor is active in this version. If so, then there
3152 * can be no possible wildcard match and again we're done. If not,
3153 * continue the search.
3156 rbtdb = search->rbtdb;
3157 i = search->chain.level_matches;
3161 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3162 isc_rwlocktype_read);
3165 * First we try to figure out if this node is active in
3166 * the search's version. We do this now, even though we
3167 * may not need the information, because it simplifies the
3168 * locking and code flow.
3170 for (header = node->data;
3172 header = header->next) {
3173 if (header->serial <= search->serial &&
3174 !IGNORE(header) && EXISTS(header))
3187 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3188 isc_rwlocktype_read);
3192 * Construct the wildcard name for this level.
3194 dns_name_init(&name, NULL);
3195 dns_rbt_namefromnode(node, &name);
3196 dns_fixedname_init(&fwname);
3197 wname = dns_fixedname_name(&fwname);
3198 result = dns_name_concatenate(dns_wildcardname, &name,
3201 while (result == ISC_R_SUCCESS && j != 0) {
3203 level_node = search->chain.levels[j];
3204 dns_name_init(&name, NULL);
3205 dns_rbt_namefromnode(level_node, &name);
3206 result = dns_name_concatenate(wname,
3211 if (result != ISC_R_SUCCESS)
3215 dns_rbtnodechain_init(&wchain, NULL);
3216 result = dns_rbt_findnode(rbtdb->tree, wname,
3217 NULL, &wnode, &wchain,
3218 DNS_RBTFIND_EMPTYDATA,
3220 if (result == ISC_R_SUCCESS) {
3224 * We have found the wildcard node. If it
3225 * is active in the search's version, we're
3228 lock = &rbtdb->node_locks[wnode->locknum].lock;
3229 NODE_LOCK(lock, isc_rwlocktype_read);
3230 for (header = wnode->data;
3232 header = header->next) {
3233 if (header->serial <= search->serial &&
3234 !IGNORE(header) && EXISTS(header))
3237 NODE_UNLOCK(lock, isc_rwlocktype_read);
3238 if (header != NULL ||
3239 activeempty(search, &wchain, wname)) {
3240 if (activeemtpynode(search, qname,
3242 return (ISC_R_NOTFOUND);
3245 * The wildcard node is active!
3247 * Note: result is still ISC_R_SUCCESS
3248 * so we don't have to set it.
3253 } else if (result != ISC_R_NOTFOUND &&
3254 result != DNS_R_PARTIALMATCH) {
3256 * An error has occurred. Bail out.
3264 * The level node is active. Any wildcarding
3265 * present at higher levels has no
3266 * effect and we're done.
3268 result = ISC_R_NOTFOUND;
3274 node = search->chain.levels[i];
3282 static isc_boolean_t
3283 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3285 dns_rdata_t rdata = DNS_RDATA_INIT;
3286 dns_rdata_nsec3_t nsec3;
3287 unsigned char *raw; /* RDATASLAB */
3288 unsigned int rdlen, count;
3289 isc_region_t region;
3290 isc_result_t result;
3292 REQUIRE(header->type == dns_rdatatype_nsec3);
3294 raw = (unsigned char *)header + sizeof(*header);
3295 count = raw[0] * 256 + raw[1]; /* count */
3296 #if DNS_RDATASET_FIXED
3297 raw += count * 4 + 2;
3301 while (count-- > 0) {
3302 rdlen = raw[0] * 256 + raw[1];
3303 #if DNS_RDATASET_FIXED
3309 region.length = rdlen;
3310 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3311 dns_rdatatype_nsec3, ®ion);
3313 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3314 INSIST(result == ISC_R_SUCCESS);
3315 if (nsec3.hash == search->rbtversion->hash &&
3316 nsec3.iterations == search->rbtversion->iterations &&
3317 nsec3.salt_length == search->rbtversion->salt_length &&
3318 memcmp(nsec3.salt, search->rbtversion->salt,
3319 nsec3.salt_length) == 0)
3321 dns_rdata_reset(&rdata);
3327 * Find node of the NSEC/NSEC3 record that is 'name'.
3329 static inline isc_result_t
3330 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3331 dns_name_t *name, dns_name_t *origin,
3332 dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3333 isc_boolean_t *firstp)
3335 dns_fixedname_t ftarget;
3337 dns_rbtnode_t *nsecnode;
3338 isc_result_t result;
3340 REQUIRE(nodep != NULL && *nodep == NULL);
3342 if (type == dns_rdatatype_nsec3) {
3343 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3344 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3346 result = dns_rbtnodechain_current(&search->chain, name, origin,
3351 dns_fixedname_init(&ftarget);
3352 target = dns_fixedname_name(&ftarget);
3357 * Construct the name of the second node to check.
3358 * It is the first node sought in the NSEC tree.
3360 *firstp = ISC_FALSE;
3361 dns_rbtnodechain_init(nsecchain, NULL);
3362 result = dns_name_concatenate(name, origin,
3364 if (result != ISC_R_SUCCESS)
3367 result = dns_rbt_findnode(search->rbtdb->nsec,
3369 &nsecnode, nsecchain,
3370 DNS_RBTFIND_NOOPTIONS,
3372 if (result == ISC_R_SUCCESS) {
3374 * Since this was the first loop, finding the
3375 * name in the NSEC tree implies that the first
3376 * node checked in the main tree had an
3377 * unacceptable NSEC record.
3378 * Try the previous node in the NSEC tree.
3380 result = dns_rbtnodechain_prev(nsecchain,
3382 if (result == DNS_R_NEWORIGIN)
3383 result = ISC_R_SUCCESS;
3384 } else if (result == ISC_R_NOTFOUND ||
3385 result == DNS_R_PARTIALMATCH) {
3386 result = dns_rbtnodechain_current(nsecchain,
3387 name, origin, NULL);
3388 if (result == ISC_R_NOTFOUND)
3389 result = ISC_R_NOMORE;
3393 * This is a second or later trip through the auxiliary
3394 * tree for the name of a third or earlier NSEC node in
3395 * the main tree. Previous trips through the NSEC tree
3396 * must have found nodes in the main tree with NSEC
3397 * records. Perhaps they lacked signature records.
3399 result = dns_rbtnodechain_prev(nsecchain, name, origin);
3400 if (result == DNS_R_NEWORIGIN)
3401 result = ISC_R_SUCCESS;
3403 if (result != ISC_R_SUCCESS)
3407 * Construct the name to seek in the main tree.
3409 result = dns_name_concatenate(name, origin, target, NULL);
3410 if (result != ISC_R_SUCCESS)
3414 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3415 nodep, &search->chain,
3416 DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3417 if (result == ISC_R_SUCCESS)
3421 * There should always be a node in the main tree with the
3422 * same name as the node in the auxiliary NSEC tree, except for
3423 * nodes in the auxiliary tree that are awaiting deletion.
3425 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3426 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3427 DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3428 "previous_closest_nsec(): %s",
3429 isc_result_totext(result));
3430 return (DNS_R_BADDB);
3436 * Find the NSEC/NSEC3 which is or before the current point on the
3437 * search chain. For NSEC3 records only NSEC3 records that match the
3438 * current NSEC3PARAM record are considered.
3440 static inline isc_result_t
3441 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3442 dns_name_t *foundname, dns_rdataset_t *rdataset,
3443 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3444 dns_db_secure_t secure)
3446 dns_rbtnode_t *node, *prevnode;
3447 rdatasetheader_t *header, *header_next, *found, *foundsig;
3448 dns_rbtnodechain_t nsecchain;
3449 isc_boolean_t empty_node;
3450 isc_result_t result;
3451 dns_fixedname_t fname, forigin;
3452 dns_name_t *name, *origin;
3453 dns_rdatatype_t type;
3454 rbtdb_rdatatype_t sigtype;
3455 isc_boolean_t wraps;
3456 isc_boolean_t first = ISC_TRUE;
3457 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3459 if (tree == search->rbtdb->nsec3) {
3460 type = dns_rdatatype_nsec3;
3461 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3464 type = dns_rdatatype_nsec;
3465 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3470 * Use the auxiliary tree only starting with the second node in the
3471 * hope that the original node will be right much of the time.
3473 dns_fixedname_init(&fname);
3474 name = dns_fixedname_name(&fname);
3475 dns_fixedname_init(&forigin);
3476 origin = dns_fixedname_name(&forigin);
3480 result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3481 if (result != ISC_R_SUCCESS)
3484 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3485 isc_rwlocktype_read);
3488 empty_node = ISC_TRUE;
3489 for (header = node->data;
3491 header = header_next) {
3492 header_next = header->next;
3494 * Look for an active, extant NSEC or RRSIG NSEC.
3497 if (header->serial <= search->serial &&
3500 * Is this a "this rdataset doesn't
3503 if (NONEXISTENT(header))
3507 header = header->down;
3508 } while (header != NULL);
3509 if (header != NULL) {
3511 * We now know that there is at least one
3512 * active rdataset at this node.
3514 empty_node = ISC_FALSE;
3515 if (header->type == type) {
3517 if (foundsig != NULL)
3519 } else if (header->type == sigtype) {
3527 if (found != NULL && search->rbtversion->havensec3 &&
3528 found->type == dns_rdatatype_nsec3 &&
3529 !matchparams(found, search)) {
3530 empty_node = ISC_TRUE;
3533 result = previous_closest_nsec(type, search,
3537 } else if (found != NULL &&
3538 (foundsig != NULL || !need_sig)) {
3540 * We've found the right NSEC/NSEC3 record.
3542 * Note: for this to really be the right
3543 * NSEC record, it's essential that the NSEC
3544 * records of any nodes obscured by a zone
3545 * cut have been removed; we assume this is
3548 result = dns_name_concatenate(name, origin,
3550 if (result == ISC_R_SUCCESS) {
3551 if (nodep != NULL) {
3552 new_reference(search->rbtdb,
3556 bind_rdataset(search->rbtdb, node,
3559 if (foundsig != NULL)
3560 bind_rdataset(search->rbtdb,
3566 } else if (found == NULL && foundsig == NULL) {
3568 * This node is active, but has no NSEC or
3569 * RRSIG NSEC. That means it's glue or
3570 * other obscured zone data that isn't
3571 * relevant for our search. Treat the
3572 * node as if it were empty and keep looking.
3574 empty_node = ISC_TRUE;
3575 result = previous_closest_nsec(type, search,
3582 * We found an active node, but either the
3583 * NSEC or the RRSIG NSEC is missing. This
3586 result = DNS_R_BADDB;
3590 * This node isn't active. We've got to keep
3593 result = previous_closest_nsec(type, search,
3594 name, origin, &prevnode,
3595 &nsecchain, &first);
3597 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3598 isc_rwlocktype_read);
3601 } while (empty_node && result == ISC_R_SUCCESS);
3604 dns_rbtnodechain_invalidate(&nsecchain);
3606 if (result == ISC_R_NOMORE && wraps) {
3607 result = dns_rbtnodechain_last(&search->chain, tree,
3609 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3616 * If the result is ISC_R_NOMORE, then we got to the beginning of
3617 * the database and didn't find a NSEC record. This shouldn't
3620 if (result == ISC_R_NOMORE)
3621 result = DNS_R_BADDB;
3627 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3628 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3629 dns_dbnode_t **nodep, dns_name_t *foundname,
3630 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3632 dns_rbtnode_t *node = NULL;
3633 isc_result_t result;
3634 rbtdb_search_t search;
3635 isc_boolean_t cname_ok = ISC_TRUE;
3636 isc_boolean_t close_version = ISC_FALSE;
3637 isc_boolean_t maybe_zonecut = ISC_FALSE;
3638 isc_boolean_t at_zonecut = ISC_FALSE;
3640 isc_boolean_t empty_node;
3641 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3642 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3643 rbtdb_rdatatype_t sigtype;
3644 isc_boolean_t active;
3645 dns_rbtnodechain_t chain;
3649 search.rbtdb = (dns_rbtdb_t *)db;
3651 REQUIRE(VALID_RBTDB(search.rbtdb));
3654 * We don't care about 'now'.
3659 * If the caller didn't supply a version, attach to the current
3662 if (version == NULL) {
3663 currentversion(db, &version);
3664 close_version = ISC_TRUE;
3667 search.rbtversion = version;
3668 search.serial = search.rbtversion->serial;
3669 search.options = options;
3670 search.copy_name = ISC_FALSE;
3671 search.need_cleanup = ISC_FALSE;
3672 search.wild = ISC_FALSE;
3673 search.zonecut = NULL;
3674 dns_fixedname_init(&search.zonecut_name);
3675 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3679 * 'wild' will be true iff. we've matched a wildcard.
3683 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3686 * Search down from the root of the tree. If, while going down, we
3687 * encounter a callback node, zone_zonecut_callback() will search the
3688 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3690 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3692 result = dns_rbt_findnode(tree, name, foundname, &node,
3693 &search.chain, DNS_RBTFIND_EMPTYDATA,
3694 zone_zonecut_callback, &search);
3696 if (result == DNS_R_PARTIALMATCH) {
3698 if (search.zonecut != NULL) {
3699 result = setup_delegation(&search, nodep, foundname,
3700 rdataset, sigrdataset);
3706 * At least one of the levels in the search chain
3707 * potentially has a wildcard. For each such level,
3708 * we must see if there's a matching wildcard active
3709 * in the current version.
3711 result = find_wildcard(&search, &node, name);
3712 if (result == ISC_R_SUCCESS) {
3713 result = dns_name_copy(name, foundname, NULL);
3714 if (result != ISC_R_SUCCESS)
3719 else if (result != ISC_R_NOTFOUND)
3723 chain = search.chain;
3724 active = activeempty(&search, &chain, name);
3727 * If we're here, then the name does not exist, is not
3728 * beneath a zonecut, and there's no matching wildcard.
3730 if ((search.rbtversion->secure == dns_db_secure &&
3731 !search.rbtversion->havensec3) ||
3732 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3733 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3735 result = find_closest_nsec(&search, nodep, foundname,
3736 rdataset, sigrdataset, tree,
3737 search.rbtversion->secure);
3738 if (result == ISC_R_SUCCESS)
3739 result = active ? DNS_R_EMPTYNAME :
3742 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3744 } else if (result != ISC_R_SUCCESS)
3749 * We have found a node whose name is the desired name, or we
3750 * have matched a wildcard.
3753 if (search.zonecut != NULL) {
3755 * If we're beneath a zone cut, we don't want to look for
3756 * CNAMEs because they're not legitimate zone glue.
3758 cname_ok = ISC_FALSE;
3761 * The node may be a zone cut itself. If it might be one,
3762 * make sure we check for it later.
3764 * DS records live above the zone cut in ordinary zone so
3765 * we want to ignore any referral.
3767 * Stub zones don't have anything "above" the delgation so
3768 * we always return a referral.
3770 if (node->find_callback &&
3771 ((node != search.rbtdb->origin_node &&
3772 !dns_rdatatype_atparent(type)) ||
3773 IS_STUB(search.rbtdb)))
3774 maybe_zonecut = ISC_TRUE;
3778 * Certain DNSSEC types are not subject to CNAME matching
3779 * (RFC4035, section 2.5 and RFC3007).
3781 * We don't check for RRSIG, because we don't store RRSIG records
3784 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3785 cname_ok = ISC_FALSE;
3788 * We now go looking for rdata...
3791 lock = &search.rbtdb->node_locks[node->locknum].lock;
3792 NODE_LOCK(lock, isc_rwlocktype_read);
3796 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3800 empty_node = ISC_TRUE;
3801 for (header = node->data; header != NULL; header = header_next) {
3802 header_next = header->next;
3804 * Look for an active, extant rdataset.
3807 if (header->serial <= search.serial &&
3810 * Is this a "this rdataset doesn't
3813 if (NONEXISTENT(header))
3817 header = header->down;
3818 } while (header != NULL);
3819 if (header != NULL) {
3821 * We now know that there is at least one active
3822 * rdataset at this node.
3824 empty_node = ISC_FALSE;
3827 * Do special zone cut handling, if requested.
3829 if (maybe_zonecut &&
3830 header->type == dns_rdatatype_ns) {
3832 * We increment the reference count on node to
3833 * ensure that search->zonecut_rdataset will
3834 * still be valid later.
3836 new_reference(search.rbtdb, node);
3837 search.zonecut = node;
3838 search.zonecut_rdataset = header;
3839 search.zonecut_sigrdataset = NULL;
3840 search.need_cleanup = ISC_TRUE;
3841 maybe_zonecut = ISC_FALSE;
3842 at_zonecut = ISC_TRUE;
3844 * It is not clear if KEY should still be
3845 * allowed at the parent side of the zone
3846 * cut or not. It is needed for RFC3007
3847 * validated updates.
3849 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3850 && type != dns_rdatatype_nsec
3851 && type != dns_rdatatype_key) {
3853 * Glue is not OK, but any answer we
3854 * could return would be glue. Return
3860 if (found != NULL && foundsig != NULL)
3866 * If the NSEC3 record doesn't match the chain
3867 * we are using behave as if it isn't here.
3869 if (header->type == dns_rdatatype_nsec3 &&
3870 !matchparams(header, &search)) {
3871 NODE_UNLOCK(lock, isc_rwlocktype_read);
3875 * If we found a type we were looking for,
3878 if (header->type == type ||
3879 type == dns_rdatatype_any ||
3880 (header->type == dns_rdatatype_cname &&
3883 * We've found the answer!
3886 if (header->type == dns_rdatatype_cname &&
3889 * We may be finding a CNAME instead
3890 * of the desired type.
3892 * If we've already got the CNAME RRSIG,
3893 * use it, otherwise change sigtype
3894 * so that we find it.
3896 if (cnamesig != NULL)
3897 foundsig = cnamesig;
3900 RBTDB_RDATATYPE_SIGCNAME;
3903 * If we've got all we need, end the search.
3905 if (!maybe_zonecut && foundsig != NULL)
3907 } else if (header->type == sigtype) {
3909 * We've found the RRSIG rdataset for our
3910 * target type. Remember it.
3914 * If we've got all we need, end the search.
3916 if (!maybe_zonecut && found != NULL)
3918 } else if (header->type == dns_rdatatype_nsec &&
3919 !search.rbtversion->havensec3) {
3921 * Remember a NSEC rdataset even if we're
3922 * not specifically looking for it, because
3923 * we might need it later.
3925 nsecheader = header;
3926 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3927 !search.rbtversion->havensec3) {
3929 * If we need the NSEC rdataset, we'll also
3930 * need its signature.
3933 } else if (cname_ok &&
3934 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3936 * If we get a CNAME match, we'll also need
3946 * We have an exact match for the name, but there are no
3947 * active rdatasets in the desired version. That means that
3948 * this node doesn't exist in the desired version, and that
3949 * we really have a partial match.
3952 NODE_UNLOCK(lock, isc_rwlocktype_read);
3958 * If we didn't find what we were looking for...
3960 if (found == NULL) {
3961 if (search.zonecut != NULL) {
3963 * We were trying to find glue at a node beneath a
3964 * zone cut, but didn't.
3966 * Return the delegation.
3968 NODE_UNLOCK(lock, isc_rwlocktype_read);
3969 result = setup_delegation(&search, nodep, foundname,
3970 rdataset, sigrdataset);
3974 * The desired type doesn't exist.
3976 result = DNS_R_NXRRSET;
3977 if (search.rbtversion->secure == dns_db_secure &&
3978 !search.rbtversion->havensec3 &&
3979 (nsecheader == NULL || nsecsig == NULL)) {
3981 * The zone is secure but there's no NSEC,
3982 * or the NSEC has no signature!
3985 result = DNS_R_BADDB;
3989 NODE_UNLOCK(lock, isc_rwlocktype_read);
3990 result = find_closest_nsec(&search, nodep, foundname,
3991 rdataset, sigrdataset,
3993 search.rbtversion->secure);
3994 if (result == ISC_R_SUCCESS)
3995 result = DNS_R_EMPTYWILD;
3998 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
4002 * There's no NSEC record, and we were told
4005 result = DNS_R_BADDB;
4008 if (nodep != NULL) {
4009 new_reference(search.rbtdb, node);
4012 if ((search.rbtversion->secure == dns_db_secure &&
4013 !search.rbtversion->havensec3) ||
4014 (search.options & DNS_DBFIND_FORCENSEC) != 0)
4016 bind_rdataset(search.rbtdb, node, nsecheader,
4018 if (nsecsig != NULL)
4019 bind_rdataset(search.rbtdb, node,
4020 nsecsig, 0, sigrdataset);
4023 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4028 * We found what we were looking for, or we found a CNAME.
4031 if (type != found->type &&
4032 type != dns_rdatatype_any &&
4033 found->type == dns_rdatatype_cname) {
4035 * We weren't doing an ANY query and we found a CNAME instead
4036 * of the type we were looking for, so we need to indicate
4037 * that result to the caller.
4039 result = DNS_R_CNAME;
4040 } else if (search.zonecut != NULL) {
4042 * If we're beneath a zone cut, we must indicate that the
4043 * result is glue, unless we're actually at the zone cut
4044 * and the type is NSEC or KEY.
4046 if (search.zonecut == node) {
4048 * It is not clear if KEY should still be
4049 * allowed at the parent side of the zone
4050 * cut or not. It is needed for RFC3007
4051 * validated updates.
4053 if (type == dns_rdatatype_nsec ||
4054 type == dns_rdatatype_nsec3 ||
4055 type == dns_rdatatype_key)
4056 result = ISC_R_SUCCESS;
4057 else if (type == dns_rdatatype_any)
4058 result = DNS_R_ZONECUT;
4060 result = DNS_R_GLUE;
4062 result = DNS_R_GLUE;
4064 * We might have found data that isn't glue, but was occluded
4065 * by a dynamic update. If the caller cares about this, they
4066 * will have told us to validate glue.
4068 * XXX We should cache the glue validity state!
4070 if (result == DNS_R_GLUE &&
4071 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4072 !valid_glue(&search, foundname, type, node)) {
4073 NODE_UNLOCK(lock, isc_rwlocktype_read);
4074 result = setup_delegation(&search, nodep, foundname,
4075 rdataset, sigrdataset);
4080 * An ordinary successful query!
4082 result = ISC_R_SUCCESS;
4085 if (nodep != NULL) {
4087 new_reference(search.rbtdb, node);
4089 search.need_cleanup = ISC_FALSE;
4093 if (type != dns_rdatatype_any) {
4094 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4095 if (foundsig != NULL)
4096 bind_rdataset(search.rbtdb, node, foundsig, 0,
4101 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4104 NODE_UNLOCK(lock, isc_rwlocktype_read);
4107 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4110 * If we found a zonecut but aren't going to use it, we have to
4113 if (search.need_cleanup) {
4114 node = search.zonecut;
4115 INSIST(node != NULL);
4116 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4118 NODE_LOCK(lock, isc_rwlocktype_read);
4119 decrement_reference(search.rbtdb, node, 0,
4120 isc_rwlocktype_read, isc_rwlocktype_none,
4122 NODE_UNLOCK(lock, isc_rwlocktype_read);
4126 closeversion(db, &version, ISC_FALSE);
4128 dns_rbtnodechain_reset(&search.chain);
4134 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4135 isc_stdtime_t now, dns_dbnode_t **nodep,
4136 dns_name_t *foundname,
4137 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4146 UNUSED(sigrdataset);
4148 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4151 return (ISC_R_NOTIMPLEMENTED);
4155 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4156 rbtdb_search_t *search = arg;
4157 rdatasetheader_t *header, *header_prev, *header_next;
4158 rdatasetheader_t *dname_header, *sigdname_header;
4159 isc_result_t result;
4161 isc_rwlocktype_t locktype;
4165 REQUIRE(search->zonecut == NULL);
4168 * Keep compiler silent.
4172 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4173 locktype = isc_rwlocktype_read;
4174 NODE_LOCK(lock, locktype);
4177 * Look for a DNAME or RRSIG DNAME rdataset.
4179 dname_header = NULL;
4180 sigdname_header = NULL;
4182 for (header = node->data; header != NULL; header = header_next) {
4183 header_next = header->next;
4184 if (header->rdh_ttl <= search->now) {
4186 * This rdataset is stale. If no one else is
4187 * using the node, we can clean it up right
4188 * now, otherwise we mark it as stale, and
4189 * the node as dirty, so it will get cleaned
4192 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4193 (locktype == isc_rwlocktype_write ||
4194 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4196 * We update the node's status only when we
4197 * can get write access; otherwise, we leave
4198 * others to this work. Periodical cleaning
4199 * will eventually take the job as the last
4201 * We won't downgrade the lock, since other
4202 * rdatasets are probably stale, too.
4204 locktype = isc_rwlocktype_write;
4206 if (dns_rbtnode_refcurrent(node) == 0) {
4210 * header->down can be non-NULL if the
4211 * refcount has just decremented to 0
4212 * but decrement_reference() has not
4213 * performed clean_cache_node(), in
4214 * which case we need to purge the
4215 * stale headers first.
4217 mctx = search->rbtdb->common.mctx;
4218 clean_stale_headers(search->rbtdb,
4221 if (header_prev != NULL)
4225 node->data = header->next;
4226 free_rdataset(search->rbtdb, mctx,
4229 header->attributes |=
4230 RDATASET_ATTR_STALE;
4232 header_prev = header;
4235 header_prev = header;
4236 } else if (header->type == dns_rdatatype_dname &&
4238 dname_header = header;
4239 header_prev = header;
4240 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4242 sigdname_header = header;
4243 header_prev = header;
4245 header_prev = header;
4248 if (dname_header != NULL &&
4249 (!DNS_TRUST_PENDING(dname_header->trust) ||
4250 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4252 * We increment the reference count on node to ensure that
4253 * search->zonecut_rdataset will still be valid later.
4255 new_reference(search->rbtdb, node);
4256 INSIST(!ISC_LINK_LINKED(node, deadlink));
4257 search->zonecut = node;
4258 search->zonecut_rdataset = dname_header;
4259 search->zonecut_sigrdataset = sigdname_header;
4260 search->need_cleanup = ISC_TRUE;
4261 result = DNS_R_PARTIALMATCH;
4263 result = DNS_R_CONTINUE;
4265 NODE_UNLOCK(lock, locktype);
4270 static inline isc_result_t
4271 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4272 dns_dbnode_t **nodep, dns_name_t *foundname,
4273 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4276 dns_rbtnode_t *level_node;
4277 rdatasetheader_t *header, *header_prev, *header_next;
4278 rdatasetheader_t *found, *foundsig;
4279 isc_result_t result = ISC_R_NOTFOUND;
4284 isc_rwlocktype_t locktype;
4287 * Caller must be holding the tree lock.
4290 rbtdb = search->rbtdb;
4291 i = search->chain.level_matches;
4294 locktype = isc_rwlocktype_read;
4295 lock = &rbtdb->node_locks[node->locknum].lock;
4296 NODE_LOCK(lock, locktype);
4299 * Look for NS and RRSIG NS rdatasets.
4304 for (header = node->data;
4306 header = header_next) {
4307 header_next = header->next;
4308 if (header->rdh_ttl <= search->now) {
4310 * This rdataset is stale. If no one else is
4311 * using the node, we can clean it up right
4312 * now, otherwise we mark it as stale, and
4313 * the node as dirty, so it will get cleaned
4316 if ((header->rdh_ttl <= search->now -
4318 (locktype == isc_rwlocktype_write ||
4319 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4321 * We update the node's status only
4322 * when we can get write access.
4324 locktype = isc_rwlocktype_write;
4326 if (dns_rbtnode_refcurrent(node)
4330 m = search->rbtdb->common.mctx;
4331 clean_stale_headers(
4334 if (header_prev != NULL)
4340 free_rdataset(rbtdb, m,
4343 header->attributes |=
4344 RDATASET_ATTR_STALE;
4346 header_prev = header;
4349 header_prev = header;
4350 } else if (EXISTS(header)) {
4352 * We've found an extant rdataset. See if
4353 * we're interested in it.
4355 if (header->type == dns_rdatatype_ns) {
4357 if (foundsig != NULL)
4359 } else if (header->type ==
4360 RBTDB_RDATATYPE_SIGNS) {
4365 header_prev = header;
4367 header_prev = header;
4370 if (found != NULL) {
4372 * If we have to set foundname, we do it before
4373 * anything else. If we were to set foundname after
4374 * we had set nodep or bound the rdataset, then we'd
4375 * have to undo that work if dns_name_concatenate()
4376 * failed. By setting foundname first, there's
4377 * nothing to undo if we have trouble.
4379 if (foundname != NULL) {
4380 dns_name_init(&name, NULL);
4381 dns_rbt_namefromnode(node, &name);
4382 result = dns_name_copy(&name, foundname, NULL);
4383 while (result == ISC_R_SUCCESS && i > 0) {
4385 level_node = search->chain.levels[i];
4386 dns_name_init(&name, NULL);
4387 dns_rbt_namefromnode(level_node,
4390 dns_name_concatenate(foundname,
4395 if (result != ISC_R_SUCCESS) {
4400 result = DNS_R_DELEGATION;
4401 if (nodep != NULL) {
4402 new_reference(search->rbtdb, node);
4405 bind_rdataset(search->rbtdb, node, found, search->now,
4407 if (foundsig != NULL)
4408 bind_rdataset(search->rbtdb, node, foundsig,
4409 search->now, sigrdataset);
4410 if (need_headerupdate(found, search->now) ||
4411 (foundsig != NULL &&
4412 need_headerupdate(foundsig, search->now))) {
4413 if (locktype != isc_rwlocktype_write) {
4414 NODE_UNLOCK(lock, locktype);
4415 NODE_LOCK(lock, isc_rwlocktype_write);
4416 locktype = isc_rwlocktype_write;
4419 if (need_headerupdate(found, search->now))
4420 update_header(search->rbtdb, found,
4422 if (foundsig != NULL &&
4423 need_headerupdate(foundsig, search->now)) {
4424 update_header(search->rbtdb, foundsig,
4431 NODE_UNLOCK(lock, locktype);
4433 if (found == NULL && i > 0) {
4435 node = search->chain.levels[i];
4445 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4446 isc_stdtime_t now, dns_name_t *foundname,
4447 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4449 dns_rbtnode_t *node;
4450 rdatasetheader_t *header, *header_next, *header_prev;
4451 rdatasetheader_t *found, *foundsig;
4452 isc_boolean_t empty_node;
4453 isc_result_t result;
4454 dns_fixedname_t fname, forigin;
4455 dns_name_t *name, *origin;
4456 rbtdb_rdatatype_t matchtype, sigmatchtype;
4458 isc_rwlocktype_t locktype;
4460 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4461 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4462 dns_rdatatype_nsec);
4466 dns_fixedname_init(&fname);
4467 name = dns_fixedname_name(&fname);
4468 dns_fixedname_init(&forigin);
4469 origin = dns_fixedname_name(&forigin);
4470 result = dns_rbtnodechain_current(&search->chain, name,
4472 if (result != ISC_R_SUCCESS)
4474 locktype = isc_rwlocktype_read;
4475 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4476 NODE_LOCK(lock, locktype);
4479 empty_node = ISC_TRUE;
4481 for (header = node->data;
4483 header = header_next) {
4484 header_next = header->next;
4485 if (header->rdh_ttl <= now) {
4487 * This rdataset is stale. If no one else is
4488 * using the node, we can clean it up right
4489 * now, otherwise we mark it as stale, and the
4490 * node as dirty, so it will get cleaned up
4493 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4494 (locktype == isc_rwlocktype_write ||
4495 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4497 * We update the node's status only
4498 * when we can get write access.
4500 locktype = isc_rwlocktype_write;
4502 if (dns_rbtnode_refcurrent(node)
4506 m = search->rbtdb->common.mctx;
4507 clean_stale_headers(
4510 if (header_prev != NULL)
4514 node->data = header->next;
4515 free_rdataset(search->rbtdb, m,
4518 header->attributes |=
4519 RDATASET_ATTR_STALE;
4521 header_prev = header;
4524 header_prev = header;
4527 if (NONEXISTENT(header) ||
4528 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4529 header_prev = header;
4532 empty_node = ISC_FALSE;
4533 if (header->type == matchtype)
4535 else if (header->type == sigmatchtype)
4537 header_prev = header;
4539 if (found != NULL) {
4540 result = dns_name_concatenate(name, origin,
4542 if (result != ISC_R_SUCCESS)
4544 bind_rdataset(search->rbtdb, node, found,
4546 if (foundsig != NULL)
4547 bind_rdataset(search->rbtdb, node, foundsig,
4549 new_reference(search->rbtdb, node);
4551 result = DNS_R_COVERINGNSEC;
4552 } else if (!empty_node) {
4553 result = ISC_R_NOTFOUND;
4555 result = dns_rbtnodechain_prev(&search->chain, NULL,
4558 NODE_UNLOCK(lock, locktype);
4559 } while (empty_node && result == ISC_R_SUCCESS);
4564 * Mark a database for response policy rewriting.
4568 get_rpz_enabled(dns_db_t *db, dns_rpz_st_t *st)
4572 rbtdb = (dns_rbtdb_t *)db;
4573 REQUIRE(VALID_RBTDB(rbtdb));
4574 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4575 dns_rpz_enabled(rbtdb->rpz_cidr, st);
4576 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4580 * Search the CDIR block tree of a response policy tree of trees for all of
4581 * the IP addresses in an A or AAAA rdataset.
4582 * Among the policies for all IPv4 and IPv6 addresses for a name, choose
4583 * the longest prefix. Among those with the longest prefix, the first
4584 * configured policy. Among answers for with the longest prefixes for
4585 * two or more IP addresses in the A and AAAA rdatasets the lexically
4589 rpz_findips(dns_rpz_zone_t *rpz, dns_rpz_type_t rpz_type,
4590 dns_zone_t *zone, dns_db_t *db, dns_dbversion_t *version,
4591 dns_rdataset_t *ardataset, dns_rpz_st_t *st)
4595 struct in6_addr in6a;
4596 isc_netaddr_t netaddr;
4597 dns_fixedname_t selfnamef, qnamef;
4598 dns_name_t *selfname, *qname;
4599 dns_rbtnode_t *node;
4600 dns_rdataset_t zrdataset;
4601 dns_rpz_cidr_bits_t prefix;
4602 isc_result_t result;
4603 dns_rpz_policy_t rpz_policy;
4606 rbtdb = (dns_rbtdb_t *)db;
4607 REQUIRE(VALID_RBTDB(rbtdb));
4608 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4610 if (rbtdb->rpz_cidr == NULL) {
4611 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4613 dns_zone_detach(&zone);
4614 return (ISC_R_UNEXPECTED);
4617 dns_fixedname_init(&selfnamef);
4618 dns_fixedname_init(&qnamef);
4619 selfname = dns_fixedname_name(&selfnamef);
4620 qname = dns_fixedname_name(&qnamef);
4622 for (result = dns_rdataset_first(ardataset);
4623 result == ISC_R_SUCCESS;
4624 result = dns_rdataset_next(ardataset)) {
4625 dns_rdata_t rdata = DNS_RDATA_INIT;
4626 dns_rdataset_current(ardataset, &rdata);
4627 switch (rdata.type) {
4628 case dns_rdatatype_a:
4629 INSIST(rdata.length == 4);
4630 memcpy(&ina.s_addr, rdata.data, 4);
4631 isc_netaddr_fromin(&netaddr, &ina);
4633 case dns_rdatatype_aaaa:
4634 INSIST(rdata.length == 16);
4635 memcpy(in6a.s6_addr, rdata.data, 16);
4636 isc_netaddr_fromin6(&netaddr, &in6a);
4642 result = dns_rpz_cidr_find(rbtdb->rpz_cidr, &netaddr, rpz_type,
4643 selfname, qname, &prefix);
4644 if (result != ISC_R_SUCCESS)
4648 * Choose the policy with the longest matching prefix.
4649 * Between policies with the same prefix, choose the first
4652 if (st->m.policy != DNS_RPZ_POLICY_MISS) {
4653 if (prefix < st->m.prefix)
4655 if (prefix == st->m.prefix &&
4656 rpz->num > st->m.rpz->num)
4661 * We have rpz_st an entry with a prefix at least as long as
4662 * the prefix of the entry we had before. Find the node
4663 * corresponding to CDIR tree entry.
4666 result = dns_rbt_findnode(rbtdb->tree, qname, NULL,
4667 &node, NULL, 0, NULL, NULL);
4668 if (result != ISC_R_SUCCESS) {
4669 char namebuf[DNS_NAME_FORMATSIZE];
4671 dns_name_format(qname, namebuf, sizeof(namebuf));
4672 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4673 DNS_LOGMODULE_CACHE, DNS_RPZ_ERROR_LEVEL,
4674 "rpz_findips findnode(%s): %s",
4675 namebuf, isc_result_totext(result));
4679 * First look for a simple rewrite of the IP address.
4680 * If that fails, look for a CNAME. If we cannot find
4681 * a CNAME or the CNAME is neither of the special forms
4682 * "*" or ".", treat it like a real CNAME.
4684 dns_rdataset_init(&zrdataset);
4685 result = dns_db_findrdataset(db, node, version, ardataset->type,
4686 0, 0, &zrdataset, NULL);
4687 if (result != ISC_R_SUCCESS)
4688 result = dns_db_findrdataset(db, node, version,
4689 dns_rdatatype_cname,
4690 0, 0, &zrdataset, NULL);
4691 if (result == ISC_R_SUCCESS) {
4692 if (zrdataset.type != dns_rdatatype_cname) {
4693 rpz_policy = DNS_RPZ_POLICY_RECORD;
4695 rpz_policy = dns_rpz_decode_cname(&zrdataset,
4697 if (rpz_policy == DNS_RPZ_POLICY_RECORD)
4698 result = DNS_R_CNAME;
4700 ttl = zrdataset.ttl;
4702 rpz_policy = DNS_RPZ_POLICY_RECORD;
4703 result = DNS_R_NXRRSET;
4704 ttl = DNS_RPZ_TTL_DEFAULT;
4708 * Use an overriding action specified in the configuration file
4710 if (rpz->policy != DNS_RPZ_POLICY_GIVEN &&
4711 rpz_policy != DNS_RPZ_POLICY_NO_OP)
4712 rpz_policy = rpz->policy;
4715 * We know the new prefix is at least as long as the current.
4716 * Prefer the new answer if the new prefix is longer.
4717 * Prefer the zone configured first if the prefixes are equal.
4718 * With two actions from the same zone, prefer the action
4719 * on the "smallest" name.
4721 if (st->m.policy == DNS_RPZ_POLICY_MISS ||
4722 prefix > st->m.prefix ||
4723 rpz->num <= st->m.rpz->num ||
4724 0 > dns_name_compare(qname, st->qname)) {
4725 if (dns_rdataset_isassociated(st->m.rdataset))
4726 dns_rdataset_disassociate(st->m.rdataset);
4727 if (st->m.node != NULL)
4728 dns_db_detachnode(st->m.db, &st->m.node);
4729 if (st->m.db != NULL)
4730 dns_db_detach(&st->m.db);
4731 if (st->m.zone != NULL)
4732 dns_zone_detach(&st->m.zone);
4734 st->m.type = rpz_type;
4735 st->m.prefix = prefix;
4736 st->m.policy = rpz_policy;
4738 st->m.result = result;
4739 dns_name_copy(qname, st->qname, NULL);
4740 if (rpz_policy == DNS_RPZ_POLICY_RECORD &&
4741 result != DNS_R_NXRRSET) {
4742 dns_rdataset_clone(&zrdataset,st->m.rdataset);
4743 dns_db_attachnode(db, node, &st->m.node);
4745 dns_db_attach(db, &st->m.db);
4746 dns_zone_attach(zone, &st->m.zone);
4748 if (dns_rdataset_isassociated(&zrdataset))
4749 dns_rdataset_disassociate(&zrdataset);
4752 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4753 return (ISC_R_SUCCESS);
4758 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4759 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4760 dns_dbnode_t **nodep, dns_name_t *foundname,
4761 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4763 dns_rbtnode_t *node = NULL;
4764 isc_result_t result;
4765 rbtdb_search_t search;
4766 isc_boolean_t cname_ok = ISC_TRUE;
4767 isc_boolean_t empty_node;
4769 isc_rwlocktype_t locktype;
4770 rdatasetheader_t *header, *header_prev, *header_next;
4771 rdatasetheader_t *found, *nsheader;
4772 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4773 rdatasetheader_t *update, *updatesig;
4774 rbtdb_rdatatype_t sigtype, negtype;
4778 search.rbtdb = (dns_rbtdb_t *)db;
4780 REQUIRE(VALID_RBTDB(search.rbtdb));
4781 REQUIRE(version == NULL);
4784 isc_stdtime_get(&now);
4786 search.rbtversion = NULL;
4788 search.options = options;
4789 search.copy_name = ISC_FALSE;
4790 search.need_cleanup = ISC_FALSE;
4791 search.wild = ISC_FALSE;
4792 search.zonecut = NULL;
4793 dns_fixedname_init(&search.zonecut_name);
4794 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4799 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4802 * Search down from the root of the tree. If, while going down, we
4803 * encounter a callback node, cache_zonecut_callback() will search the
4804 * rdatasets at the zone cut for a DNAME rdataset.
4806 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4807 &search.chain, DNS_RBTFIND_EMPTYDATA,
4808 cache_zonecut_callback, &search);
4810 if (result == DNS_R_PARTIALMATCH) {
4811 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4812 result = find_coveringnsec(&search, nodep, now,
4813 foundname, rdataset,
4815 if (result == DNS_R_COVERINGNSEC)
4818 if (search.zonecut != NULL) {
4819 result = setup_delegation(&search, nodep, foundname,
4820 rdataset, sigrdataset);
4824 result = find_deepest_zonecut(&search, node, nodep,
4825 foundname, rdataset,
4829 } else if (result != ISC_R_SUCCESS)
4833 * Certain DNSSEC types are not subject to CNAME matching
4834 * (RFC4035, section 2.5 and RFC3007).
4836 * We don't check for RRSIG, because we don't store RRSIG records
4839 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4840 cname_ok = ISC_FALSE;
4843 * We now go looking for rdata...
4846 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4847 locktype = isc_rwlocktype_read;
4848 NODE_LOCK(lock, locktype);
4852 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4853 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4857 empty_node = ISC_TRUE;
4859 for (header = node->data; header != NULL; header = header_next) {
4860 header_next = header->next;
4861 if (header->rdh_ttl <= now) {
4863 * This rdataset is stale. If no one else is using the
4864 * node, we can clean it up right now, otherwise we
4865 * mark it as stale, and the node as dirty, so it will
4866 * get cleaned up later.
4868 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4869 (locktype == isc_rwlocktype_write ||
4870 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4872 * We update the node's status only when we
4873 * can get write access.
4875 locktype = isc_rwlocktype_write;
4877 if (dns_rbtnode_refcurrent(node) == 0) {
4880 mctx = search.rbtdb->common.mctx;
4881 clean_stale_headers(search.rbtdb, mctx,
4883 if (header_prev != NULL)
4887 node->data = header->next;
4888 free_rdataset(search.rbtdb, mctx,
4891 header->attributes |=
4892 RDATASET_ATTR_STALE;
4894 header_prev = header;
4897 header_prev = header;
4898 } else if (EXISTS(header)) {
4900 * We now know that there is at least one active
4901 * non-stale rdataset at this node.
4903 empty_node = ISC_FALSE;
4906 * If we found a type we were looking for, remember
4909 if (header->type == type ||
4910 (type == dns_rdatatype_any &&
4911 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4912 (cname_ok && header->type ==
4913 dns_rdatatype_cname)) {
4915 * We've found the answer.
4918 if (header->type == dns_rdatatype_cname &&
4922 * If we've already got the
4923 * CNAME RRSIG, use it.
4925 foundsig = cnamesig;
4927 } else if (header->type == sigtype) {
4929 * We've found the RRSIG rdataset for our
4930 * target type. Remember it.
4933 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4934 header->type == negtype) {
4936 * We've found a negative cache entry.
4939 } else if (header->type == dns_rdatatype_ns) {
4941 * Remember a NS rdataset even if we're
4942 * not specifically looking for it, because
4943 * we might need it later.
4946 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4948 * If we need the NS rdataset, we'll also
4949 * need its signature.
4952 } else if (cname_ok &&
4953 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4955 * If we get a CNAME match, we'll also need
4960 header_prev = header;
4962 header_prev = header;
4967 * We have an exact match for the name, but there are no
4968 * extant rdatasets. That means that this node doesn't
4969 * meaningfully exist, and that we really have a partial match.
4971 NODE_UNLOCK(lock, locktype);
4976 * If we didn't find what we were looking for...
4978 if (found == NULL ||
4979 (DNS_TRUST_ADDITIONAL(found->trust) &&
4980 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4981 (found->trust == dns_trust_glue &&
4982 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4983 (DNS_TRUST_PENDING(found->trust) &&
4984 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4986 * If there is an NS rdataset at this node, then this is the
4989 if (nsheader != NULL) {
4990 if (nodep != NULL) {
4991 new_reference(search.rbtdb, node);
4992 INSIST(!ISC_LINK_LINKED(node, deadlink));
4995 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4997 if (need_headerupdate(nsheader, search.now))
4999 if (nssig != NULL) {
5000 bind_rdataset(search.rbtdb, node, nssig,
5001 search.now, sigrdataset);
5002 if (need_headerupdate(nssig, search.now))
5005 result = DNS_R_DELEGATION;
5010 * Go find the deepest zone cut.
5012 NODE_UNLOCK(lock, locktype);
5017 * We found what we were looking for, or we found a CNAME.
5020 if (nodep != NULL) {
5021 new_reference(search.rbtdb, node);
5022 INSIST(!ISC_LINK_LINKED(node, deadlink));
5026 if (NEGATIVE(found)) {
5028 * We found a negative cache entry.
5030 if (NXDOMAIN(found))
5031 result = DNS_R_NCACHENXDOMAIN;
5033 result = DNS_R_NCACHENXRRSET;
5034 } else if (type != found->type &&
5035 type != dns_rdatatype_any &&
5036 found->type == dns_rdatatype_cname) {
5038 * We weren't doing an ANY query and we found a CNAME instead
5039 * of the type we were looking for, so we need to indicate
5040 * that result to the caller.
5042 result = DNS_R_CNAME;
5045 * An ordinary successful query!
5047 result = ISC_R_SUCCESS;
5050 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5051 result == DNS_R_NCACHENXRRSET) {
5052 bind_rdataset(search.rbtdb, node, found, search.now,
5054 if (need_headerupdate(found, search.now))
5056 if (!NEGATIVE(found) && foundsig != NULL) {
5057 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5059 if (need_headerupdate(foundsig, search.now))
5060 updatesig = foundsig;
5065 if ((update != NULL || updatesig != NULL) &&
5066 locktype != isc_rwlocktype_write) {
5067 NODE_UNLOCK(lock, locktype);
5068 NODE_LOCK(lock, isc_rwlocktype_write);
5069 locktype = isc_rwlocktype_write;
5072 if (update != NULL && need_headerupdate(update, search.now))
5073 update_header(search.rbtdb, update, search.now);
5074 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5075 update_header(search.rbtdb, updatesig, search.now);
5077 NODE_UNLOCK(lock, locktype);
5080 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5083 * If we found a zonecut but aren't going to use it, we have to
5086 if (search.need_cleanup) {
5087 node = search.zonecut;
5088 INSIST(node != NULL);
5089 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5091 NODE_LOCK(lock, isc_rwlocktype_read);
5092 decrement_reference(search.rbtdb, node, 0,
5093 isc_rwlocktype_read, isc_rwlocktype_none,
5095 NODE_UNLOCK(lock, isc_rwlocktype_read);
5098 dns_rbtnodechain_reset(&search.chain);
5104 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5105 isc_stdtime_t now, dns_dbnode_t **nodep,
5106 dns_name_t *foundname,
5107 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5109 dns_rbtnode_t *node = NULL;
5111 isc_result_t result;
5112 rbtdb_search_t search;
5113 rdatasetheader_t *header, *header_prev, *header_next;
5114 rdatasetheader_t *found, *foundsig;
5115 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5116 isc_rwlocktype_t locktype;
5118 search.rbtdb = (dns_rbtdb_t *)db;
5120 REQUIRE(VALID_RBTDB(search.rbtdb));
5123 isc_stdtime_get(&now);
5125 search.rbtversion = NULL;
5127 search.options = options;
5128 search.copy_name = ISC_FALSE;
5129 search.need_cleanup = ISC_FALSE;
5130 search.wild = ISC_FALSE;
5131 search.zonecut = NULL;
5132 dns_fixedname_init(&search.zonecut_name);
5133 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5136 if ((options & DNS_DBFIND_NOEXACT) != 0)
5137 rbtoptions |= DNS_RBTFIND_NOEXACT;
5139 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5142 * Search down from the root of the tree.
5144 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5145 &search.chain, rbtoptions, NULL, &search);
5147 if (result == DNS_R_PARTIALMATCH) {
5149 result = find_deepest_zonecut(&search, node, nodep, foundname,
5150 rdataset, sigrdataset);
5152 } else if (result != ISC_R_SUCCESS)
5156 * We now go looking for an NS rdataset at the node.
5159 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5160 locktype = isc_rwlocktype_read;
5161 NODE_LOCK(lock, locktype);
5166 for (header = node->data; header != NULL; header = header_next) {
5167 header_next = header->next;
5168 if (header->rdh_ttl <= now) {
5170 * This rdataset is stale. If no one else is using the
5171 * node, we can clean it up right now, otherwise we
5172 * mark it as stale, and the node as dirty, so it will
5173 * get cleaned up later.
5175 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5176 (locktype == isc_rwlocktype_write ||
5177 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5179 * We update the node's status only when we
5180 * can get write access.
5182 locktype = isc_rwlocktype_write;
5184 if (dns_rbtnode_refcurrent(node) == 0) {
5187 mctx = search.rbtdb->common.mctx;
5188 clean_stale_headers(search.rbtdb, mctx,
5190 if (header_prev != NULL)
5194 node->data = header->next;
5195 free_rdataset(search.rbtdb, mctx,
5198 header->attributes |=
5199 RDATASET_ATTR_STALE;
5201 header_prev = header;
5204 header_prev = header;
5205 } else if (EXISTS(header)) {
5207 * If we found a type we were looking for, remember
5210 if (header->type == dns_rdatatype_ns) {
5212 * Remember a NS rdataset even if we're
5213 * not specifically looking for it, because
5214 * we might need it later.
5217 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5219 * If we need the NS rdataset, we'll also
5220 * need its signature.
5224 header_prev = header;
5226 header_prev = header;
5229 if (found == NULL) {
5231 * No NS records here.
5233 NODE_UNLOCK(lock, locktype);
5237 if (nodep != NULL) {
5238 new_reference(search.rbtdb, node);
5239 INSIST(!ISC_LINK_LINKED(node, deadlink));
5243 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5244 if (foundsig != NULL)
5245 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5248 if (need_headerupdate(found, search.now) ||
5249 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
5250 if (locktype != isc_rwlocktype_write) {
5251 NODE_UNLOCK(lock, locktype);
5252 NODE_LOCK(lock, isc_rwlocktype_write);
5253 locktype = isc_rwlocktype_write;
5256 if (need_headerupdate(found, search.now))
5257 update_header(search.rbtdb, found, search.now);
5258 if (foundsig != NULL &&
5259 need_headerupdate(foundsig, search.now)) {
5260 update_header(search.rbtdb, foundsig, search.now);
5264 NODE_UNLOCK(lock, locktype);
5267 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5269 INSIST(!search.need_cleanup);
5271 dns_rbtnodechain_reset(&search.chain);
5273 if (result == DNS_R_DELEGATION)
5274 result = ISC_R_SUCCESS;
5280 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5281 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5282 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5285 REQUIRE(VALID_RBTDB(rbtdb));
5286 REQUIRE(targetp != NULL && *targetp == NULL);
5288 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5289 dns_rbtnode_refincrement(node, &refs);
5291 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5297 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5298 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5299 dns_rbtnode_t *node;
5300 isc_boolean_t want_free = ISC_FALSE;
5301 isc_boolean_t inactive = ISC_FALSE;
5302 rbtdb_nodelock_t *nodelock;
5304 REQUIRE(VALID_RBTDB(rbtdb));
5305 REQUIRE(targetp != NULL && *targetp != NULL);
5307 node = (dns_rbtnode_t *)(*targetp);
5308 nodelock = &rbtdb->node_locks[node->locknum];
5310 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5312 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5313 isc_rwlocktype_none, ISC_FALSE)) {
5314 if (isc_refcount_current(&nodelock->references) == 0 &&
5315 nodelock->exiting) {
5316 inactive = ISC_TRUE;
5320 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5325 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5327 if (rbtdb->active == 0)
5328 want_free = ISC_TRUE;
5329 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5331 char buf[DNS_NAME_FORMATSIZE];
5332 if (dns_name_dynamic(&rbtdb->common.origin))
5333 dns_name_format(&rbtdb->common.origin, buf,
5336 strcpy(buf, "<UNKNOWN>");
5337 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5338 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5339 "calling free_rbtdb(%s)", buf);
5340 free_rbtdb(rbtdb, ISC_TRUE, NULL);
5346 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5347 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5348 dns_rbtnode_t *rbtnode = node;
5349 rdatasetheader_t *header;
5350 isc_boolean_t force_expire = ISC_FALSE;
5352 * These are the category and module used by the cache cleaner.
5354 isc_boolean_t log = ISC_FALSE;
5355 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5356 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5357 int level = ISC_LOG_DEBUG(2);
5358 char printname[DNS_NAME_FORMATSIZE];
5360 REQUIRE(VALID_RBTDB(rbtdb));
5363 * Caller must hold a tree lock.
5367 isc_stdtime_get(&now);
5369 if (isc_mem_isovermem(rbtdb->common.mctx)) {
5372 isc_random_get(&val);
5374 * XXXDCL Could stand to have a better policy, like LRU.
5376 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5379 * Note that 'log' can be true IFF overmem is also true.
5380 * overmem can currently only be true for cache
5381 * databases -- hence all of the "overmem cache" log strings.
5383 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5385 isc_log_write(dns_lctx, category, module, level,
5386 "overmem cache: %s %s",
5387 force_expire ? "FORCE" : "check",
5388 dns_rbt_formatnodename(rbtnode,
5390 sizeof(printname)));
5394 * We may not need write access, but this code path is not performance
5395 * sensitive, so it should be okay to always lock as a writer.
5397 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5398 isc_rwlocktype_write);
5400 for (header = rbtnode->data; header != NULL; header = header->next)
5401 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5403 * We don't check if refcurrent(rbtnode) == 0 and try
5404 * to free like we do in cache_find(), because
5405 * refcurrent(rbtnode) must be non-zero. This is so
5406 * because 'node' is an argument to the function.
5408 header->attributes |= RDATASET_ATTR_STALE;
5411 isc_log_write(dns_lctx, category, module,
5412 level, "overmem cache: stale %s",
5414 } else if (force_expire) {
5415 if (! RETAIN(header)) {
5416 set_ttl(rbtdb, header, 0);
5417 header->attributes |= RDATASET_ATTR_STALE;
5420 isc_log_write(dns_lctx, category, module,
5421 level, "overmem cache: "
5422 "reprieve by RETAIN() %s",
5425 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5426 isc_log_write(dns_lctx, category, module, level,
5427 "overmem cache: saved %s", printname);
5429 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5430 isc_rwlocktype_write);
5432 return (ISC_R_SUCCESS);
5436 overmem(dns_db_t *db, isc_boolean_t overmem) {
5437 /* This is an empty callback. See adb.c:water() */
5446 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5447 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5448 dns_rbtnode_t *rbtnode = node;
5449 isc_boolean_t first;
5451 REQUIRE(VALID_RBTDB(rbtdb));
5453 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5454 isc_rwlocktype_read);
5456 fprintf(out, "node %p, %u references, locknum = %u\n",
5457 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5459 if (rbtnode->data != NULL) {
5460 rdatasetheader_t *current, *top_next;
5462 for (current = rbtnode->data; current != NULL;
5463 current = top_next) {
5464 top_next = current->next;
5466 fprintf(out, "\ttype %u", current->type);
5472 "\tserial = %lu, ttl = %u, "
5473 "trust = %u, attributes = %u, "
5475 (unsigned long)current->serial,
5478 current->attributes,
5480 current = current->down;
5481 } while (current != NULL);
5484 fprintf(out, "(empty)\n");
5486 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5487 isc_rwlocktype_read);
5491 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5493 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5494 rbtdb_dbiterator_t *rbtdbiter;
5496 REQUIRE(VALID_RBTDB(rbtdb));
5498 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5499 if (rbtdbiter == NULL)
5500 return (ISC_R_NOMEMORY);
5502 rbtdbiter->common.methods = &dbiterator_methods;
5503 rbtdbiter->common.db = NULL;
5504 dns_db_attach(db, &rbtdbiter->common.db);
5505 rbtdbiter->common.relative_names =
5506 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5507 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5508 rbtdbiter->common.cleaning = ISC_FALSE;
5509 rbtdbiter->paused = ISC_TRUE;
5510 rbtdbiter->tree_locked = isc_rwlocktype_none;
5511 rbtdbiter->result = ISC_R_SUCCESS;
5512 dns_fixedname_init(&rbtdbiter->name);
5513 dns_fixedname_init(&rbtdbiter->origin);
5514 rbtdbiter->node = NULL;
5515 rbtdbiter->delete = 0;
5516 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5517 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5518 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5519 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5520 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5521 if (rbtdbiter->nsec3only)
5522 rbtdbiter->current = &rbtdbiter->nsec3chain;
5524 rbtdbiter->current = &rbtdbiter->chain;
5526 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5528 return (ISC_R_SUCCESS);
5532 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5533 dns_rdatatype_t type, dns_rdatatype_t covers,
5534 isc_stdtime_t now, dns_rdataset_t *rdataset,
5535 dns_rdataset_t *sigrdataset)
5537 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5538 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5539 rdatasetheader_t *header, *header_next, *found, *foundsig;
5540 rbtdb_serial_t serial;
5541 rbtdb_version_t *rbtversion = version;
5542 isc_boolean_t close_version = ISC_FALSE;
5543 rbtdb_rdatatype_t matchtype, sigmatchtype;
5545 REQUIRE(VALID_RBTDB(rbtdb));
5546 REQUIRE(type != dns_rdatatype_any);
5548 if (rbtversion == NULL) {
5549 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5550 close_version = ISC_TRUE;
5552 serial = rbtversion->serial;
5555 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5556 isc_rwlocktype_read);
5560 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5562 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5566 for (header = rbtnode->data; header != NULL; header = header_next) {
5567 header_next = header->next;
5569 if (header->serial <= serial &&
5572 * Is this a "this rdataset doesn't
5575 if (NONEXISTENT(header))
5579 header = header->down;
5580 } while (header != NULL);
5581 if (header != NULL) {
5583 * We have an active, extant rdataset. If it's a
5584 * type we're looking for, remember it.
5586 if (header->type == matchtype) {
5588 if (foundsig != NULL)
5590 } else if (header->type == sigmatchtype) {
5597 if (found != NULL) {
5598 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5599 if (foundsig != NULL)
5600 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5604 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5605 isc_rwlocktype_read);
5608 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5612 return (ISC_R_NOTFOUND);
5614 return (ISC_R_SUCCESS);
5618 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5619 dns_rdatatype_t type, dns_rdatatype_t covers,
5620 isc_stdtime_t now, dns_rdataset_t *rdataset,
5621 dns_rdataset_t *sigrdataset)
5623 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5624 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5625 rdatasetheader_t *header, *header_next, *found, *foundsig;
5626 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5627 isc_result_t result;
5629 isc_rwlocktype_t locktype;
5631 REQUIRE(VALID_RBTDB(rbtdb));
5632 REQUIRE(type != dns_rdatatype_any);
5636 result = ISC_R_SUCCESS;
5639 isc_stdtime_get(&now);
5641 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5642 locktype = isc_rwlocktype_read;
5643 NODE_LOCK(lock, locktype);
5647 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5648 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5650 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5654 for (header = rbtnode->data; header != NULL; header = header_next) {
5655 header_next = header->next;
5656 if (header->rdh_ttl <= now) {
5657 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5658 (locktype == isc_rwlocktype_write ||
5659 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5661 * We update the node's status only when we
5662 * can get write access.
5664 locktype = isc_rwlocktype_write;
5667 * We don't check if refcurrent(rbtnode) == 0
5668 * and try to free like we do in cache_find(),
5669 * because refcurrent(rbtnode) must be
5670 * non-zero. This is so because 'node' is an
5671 * argument to the function.
5673 header->attributes |= RDATASET_ATTR_STALE;
5676 } else if (EXISTS(header)) {
5677 if (header->type == matchtype)
5679 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5680 header->type == negtype)
5682 else if (header->type == sigmatchtype)
5686 if (found != NULL) {
5687 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5688 if (!NEGATIVE(found) && foundsig != NULL)
5689 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5693 NODE_UNLOCK(lock, locktype);
5696 return (ISC_R_NOTFOUND);
5698 if (NEGATIVE(found)) {
5700 * We found a negative cache entry.
5702 if (NXDOMAIN(found))
5703 result = DNS_R_NCACHENXDOMAIN;
5705 result = DNS_R_NCACHENXRRSET;
5712 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5713 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5715 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5716 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5717 rbtdb_version_t *rbtversion = version;
5718 rbtdb_rdatasetiter_t *iterator;
5721 REQUIRE(VALID_RBTDB(rbtdb));
5723 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5724 if (iterator == NULL)
5725 return (ISC_R_NOMEMORY);
5727 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5729 if (rbtversion == NULL)
5731 (dns_dbversion_t **) (void *)(&rbtversion));
5735 isc_refcount_increment(&rbtversion->references,
5741 isc_stdtime_get(&now);
5745 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5746 iterator->common.methods = &rdatasetiter_methods;
5747 iterator->common.db = db;
5748 iterator->common.node = node;
5749 iterator->common.version = (dns_dbversion_t *)rbtversion;
5750 iterator->common.now = now;
5752 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5754 dns_rbtnode_refincrement(rbtnode, &refs);
5757 iterator->current = NULL;
5759 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5761 *iteratorp = (dns_rdatasetiter_t *)iterator;
5763 return (ISC_R_SUCCESS);
5766 static isc_boolean_t
5767 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5768 rdatasetheader_t *header, *header_next;
5769 isc_boolean_t cname, other_data;
5770 dns_rdatatype_t rdtype;
5773 * The caller must hold the node lock.
5777 * Look for CNAME and "other data" rdatasets active in our version.
5780 other_data = ISC_FALSE;
5781 for (header = node->data; header != NULL; header = header_next) {
5782 header_next = header->next;
5783 if (header->type == dns_rdatatype_cname) {
5785 * Look for an active extant CNAME.
5788 if (header->serial <= serial &&
5791 * Is this a "this rdataset doesn't
5794 if (NONEXISTENT(header))
5798 header = header->down;
5799 } while (header != NULL);
5804 * Look for active extant "other data".
5806 * "Other data" is any rdataset whose type is not
5807 * KEY, NSEC, SIG or RRSIG.
5809 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5810 if (rdtype != dns_rdatatype_key &&
5811 rdtype != dns_rdatatype_sig &&
5812 rdtype != dns_rdatatype_nsec &&
5813 rdtype != dns_rdatatype_rrsig) {
5815 * Is it active and extant?
5818 if (header->serial <= serial &&
5821 * Is this a "this rdataset
5822 * doesn't exist" record?
5824 if (NONEXISTENT(header))
5828 header = header->down;
5829 } while (header != NULL);
5831 other_data = ISC_TRUE;
5836 if (cname && other_data)
5843 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5844 isc_result_t result;
5846 INSIST(!IS_CACHE(rbtdb));
5847 INSIST(newheader->heap_index == 0);
5848 INSIST(!ISC_LINK_LINKED(newheader, link));
5850 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5855 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5856 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5857 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5859 rbtdb_changed_t *changed = NULL;
5860 rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5861 unsigned char *merged;
5862 isc_result_t result;
5863 isc_boolean_t header_nx;
5864 isc_boolean_t newheader_nx;
5865 isc_boolean_t merge;
5866 dns_rdatatype_t rdtype, covers;
5867 rbtdb_rdatatype_t negtype, sigtype;
5872 * Add an rdatasetheader_t to a node.
5876 * Caller must be holding the node lock.
5879 if ((options & DNS_DBADD_MERGE) != 0) {
5880 REQUIRE(rbtversion != NULL);
5885 if ((options & DNS_DBADD_FORCE) != 0)
5886 trust = dns_trust_ultimate;
5888 trust = newheader->trust;
5890 if (rbtversion != NULL && !loading) {
5892 * We always add a changed record, even if no changes end up
5893 * being made to this node, because it's harmless and
5894 * simplifies the code.
5896 changed = add_changed(rbtdb, rbtversion, rbtnode);
5897 if (changed == NULL) {
5898 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5899 return (ISC_R_NOMEMORY);
5903 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5904 topheader_prev = NULL;
5907 if (rbtversion == NULL && !newheader_nx) {
5908 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5909 if (NEGATIVE(newheader)) {
5911 * We're adding a negative cache entry.
5913 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5914 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5916 for (topheader = rbtnode->data;
5918 topheader = topheader->next) {
5920 * If we're adding an negative cache entry
5921 * which covers all types (NXDOMAIN,
5922 * NODATA(QTYPE=ANY)).
5924 * We make all other data stale so that the
5925 * only rdataset that can be found at this
5926 * node is the negative cache entry.
5928 * Otherwise look for any RRSIGs of the
5929 * given type so they can be marked stale
5932 if (covers == dns_rdatatype_any) {
5933 set_ttl(rbtdb, topheader, 0);
5934 topheader->attributes |=
5935 RDATASET_ATTR_STALE;
5937 } else if (topheader->type == sigtype)
5938 sigheader = topheader;
5940 if (covers == dns_rdatatype_any)
5942 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5945 * We're adding something that isn't a
5946 * negative cache entry. Look for an extant
5947 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5950 for (topheader = rbtnode->data;
5952 topheader = topheader->next) {
5953 if (topheader->type ==
5954 RBTDB_RDATATYPE_NCACHEANY)
5957 if (topheader != NULL && EXISTS(topheader) &&
5958 topheader->rdh_ttl > now) {
5962 if (trust < topheader->trust) {
5964 * The NXDOMAIN/NODATA(QTYPE=ANY)
5967 free_rdataset(rbtdb,
5970 if (addedrdataset != NULL)
5971 bind_rdataset(rbtdb, rbtnode,
5974 return (DNS_R_UNCHANGED);
5977 * The new rdataset is better. Expire the
5978 * NXDOMAIN/NODATA(QTYPE=ANY).
5980 set_ttl(rbtdb, topheader, 0);
5981 topheader->attributes |= RDATASET_ATTR_STALE;
5986 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5990 for (topheader = rbtnode->data;
5992 topheader = topheader->next) {
5993 if (topheader->type == newheader->type ||
5994 topheader->type == negtype)
5996 topheader_prev = topheader;
6001 * If header isn't NULL, we've found the right type. There may be
6002 * IGNORE rdatasets between the top of the chain and the first real
6003 * data. We skip over them.
6006 while (header != NULL && IGNORE(header))
6007 header = header->down;
6008 if (header != NULL) {
6009 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6012 * Deleting an already non-existent rdataset has no effect.
6014 if (header_nx && newheader_nx) {
6015 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6016 return (DNS_R_UNCHANGED);
6020 * Trying to add an rdataset with lower trust to a cache DB
6021 * has no effect, provided that the cache data isn't stale.
6023 if (rbtversion == NULL && trust < header->trust &&
6024 (header->rdh_ttl > now || header_nx)) {
6025 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6026 if (addedrdataset != NULL)
6027 bind_rdataset(rbtdb, rbtnode, header, now,
6029 return (DNS_R_UNCHANGED);
6033 * Don't merge if a nonexistent rdataset is involved.
6035 if (merge && (header_nx || newheader_nx))
6039 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6040 * that is the union of 'newheader' and 'header'.
6043 unsigned int flags = 0;
6044 INSIST(rbtversion->serial >= header->serial);
6046 result = ISC_R_SUCCESS;
6048 if ((options & DNS_DBADD_EXACT) != 0)
6049 flags |= DNS_RDATASLAB_EXACT;
6050 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6051 newheader->rdh_ttl != header->rdh_ttl)
6052 result = DNS_R_NOTEXACT;
6053 else if (newheader->rdh_ttl != header->rdh_ttl)
6054 flags |= DNS_RDATASLAB_FORCE;
6055 if (result == ISC_R_SUCCESS)
6056 result = dns_rdataslab_merge(
6057 (unsigned char *)header,
6058 (unsigned char *)newheader,
6059 (unsigned int)(sizeof(*newheader)),
6061 rbtdb->common.rdclass,
6062 (dns_rdatatype_t)header->type,
6064 if (result == ISC_R_SUCCESS) {
6066 * If 'header' has the same serial number as
6067 * we do, we could clean it up now if we knew
6068 * that our caller had no references to it.
6069 * We don't know this, however, so we leave it
6070 * alone. It will get cleaned up when
6071 * clean_zone_node() runs.
6073 free_rdataset(rbtdb, rbtdb->common.mctx,
6075 newheader = (rdatasetheader_t *)merged;
6076 init_rdataset(rbtdb, newheader);
6077 if (loading && RESIGN(newheader) &&
6079 header->resign < newheader->resign)
6080 newheader->resign = header->resign;
6082 free_rdataset(rbtdb, rbtdb->common.mctx,
6088 * Don't replace existing NS, A and AAAA RRsets
6089 * in the cache if they are already exist. This
6090 * prevents named being locked to old servers.
6091 * Don't lower trust of existing record if the
6094 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6095 header->type == dns_rdatatype_ns &&
6096 !header_nx && !newheader_nx &&
6097 header->trust >= newheader->trust &&
6098 dns_rdataslab_equalx((unsigned char *)header,
6099 (unsigned char *)newheader,
6100 (unsigned int)(sizeof(*newheader)),
6101 rbtdb->common.rdclass,
6102 (dns_rdatatype_t)header->type)) {
6104 * Honour the new ttl if it is less than the
6107 if (header->rdh_ttl > newheader->rdh_ttl)
6108 set_ttl(rbtdb, header, newheader->rdh_ttl);
6109 if (header->noqname == NULL &&
6110 newheader->noqname != NULL) {
6111 header->noqname = newheader->noqname;
6112 newheader->noqname = NULL;
6114 if (header->closest == NULL &&
6115 newheader->closest != NULL) {
6116 header->closest = newheader->closest;
6117 newheader->closest = NULL;
6119 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6120 if (addedrdataset != NULL)
6121 bind_rdataset(rbtdb, rbtnode, header, now,
6123 return (ISC_R_SUCCESS);
6125 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6126 (header->type == dns_rdatatype_a ||
6127 header->type == dns_rdatatype_aaaa) &&
6128 !header_nx && !newheader_nx &&
6129 header->trust >= newheader->trust &&
6130 dns_rdataslab_equal((unsigned char *)header,
6131 (unsigned char *)newheader,
6132 (unsigned int)(sizeof(*newheader)))) {
6134 * Honour the new ttl if it is less than the
6137 if (header->rdh_ttl > newheader->rdh_ttl)
6138 set_ttl(rbtdb, header, newheader->rdh_ttl);
6139 if (header->noqname == NULL &&
6140 newheader->noqname != NULL) {
6141 header->noqname = newheader->noqname;
6142 newheader->noqname = NULL;
6144 if (header->closest == NULL &&
6145 newheader->closest != NULL) {
6146 header->closest = newheader->closest;
6147 newheader->closest = NULL;
6149 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6150 if (addedrdataset != NULL)
6151 bind_rdataset(rbtdb, rbtnode, header, now,
6153 return (ISC_R_SUCCESS);
6155 INSIST(rbtversion == NULL ||
6156 rbtversion->serial >= topheader->serial);
6157 if (topheader_prev != NULL)
6158 topheader_prev->next = newheader;
6160 rbtnode->data = newheader;
6161 newheader->next = topheader->next;
6164 * There are no other references to 'header' when
6165 * loading, so we MAY clean up 'header' now.
6166 * Since we don't generate changed records when
6167 * loading, we MUST clean up 'header' now.
6169 newheader->down = NULL;
6170 free_rdataset(rbtdb, rbtdb->common.mctx, header);
6172 newheader->down = topheader;
6173 topheader->next = newheader;
6175 if (changed != NULL)
6176 changed->dirty = ISC_TRUE;
6177 if (rbtversion == NULL) {
6178 set_ttl(rbtdb, header, 0);
6179 header->attributes |= RDATASET_ATTR_STALE;
6180 if (sigheader != NULL) {
6181 set_ttl(rbtdb, sigheader, 0);
6182 sigheader->attributes |=
6183 RDATASET_ATTR_STALE;
6186 idx = newheader->node->locknum;
6187 if (IS_CACHE(rbtdb)) {
6188 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6191 * XXXMLG We don't check the return value
6192 * here. If it fails, we will not do TTL
6193 * based expiry on this node. However, we
6194 * will do it on the LRU side, so memory
6195 * will not leak... for long.
6197 isc_heap_insert(rbtdb->heaps[idx], newheader);
6198 } else if (RESIGN(newheader))
6199 resign_insert(rbtdb, idx, newheader);
6203 * No non-IGNORED rdatasets of the given type exist at
6208 * If we're trying to delete the type, don't bother.
6211 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6212 return (DNS_R_UNCHANGED);
6215 if (topheader != NULL) {
6217 * We have an list of rdatasets of the given type,
6218 * but they're all marked IGNORE. We simply insert
6219 * the new rdataset at the head of the list.
6221 * Ignored rdatasets cannot occur during loading, so
6225 INSIST(rbtversion == NULL ||
6226 rbtversion->serial >= topheader->serial);
6227 if (topheader_prev != NULL)
6228 topheader_prev->next = newheader;
6230 rbtnode->data = newheader;
6231 newheader->next = topheader->next;
6232 newheader->down = topheader;
6233 topheader->next = newheader;
6235 if (changed != NULL)
6236 changed->dirty = ISC_TRUE;
6239 * No rdatasets of the given type exist at the node.
6241 newheader->next = rbtnode->data;
6242 newheader->down = NULL;
6243 rbtnode->data = newheader;
6245 idx = newheader->node->locknum;
6246 if (IS_CACHE(rbtdb)) {
6247 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6249 isc_heap_insert(rbtdb->heaps[idx], newheader);
6250 } else if (RESIGN(newheader)) {
6251 resign_insert(rbtdb, idx, newheader);
6256 * Check if the node now contains CNAME and other data.
6258 if (rbtversion != NULL &&
6259 cname_and_other_data(rbtnode, rbtversion->serial))
6260 return (DNS_R_CNAMEANDOTHER);
6262 if (addedrdataset != NULL)
6263 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6265 return (ISC_R_SUCCESS);
6268 static inline isc_boolean_t
6269 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6270 rbtdb_rdatatype_t type)
6272 if (IS_CACHE(rbtdb)) {
6273 if (type == dns_rdatatype_dname)
6277 } else if (type == dns_rdatatype_dname ||
6278 (type == dns_rdatatype_ns &&
6279 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6284 static inline isc_result_t
6285 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6286 dns_rdataset_t *rdataset)
6288 struct noqname *noqname;
6289 isc_mem_t *mctx = rbtdb->common.mctx;
6291 dns_rdataset_t neg, negsig;
6292 isc_result_t result;
6295 dns_name_init(&name, NULL);
6296 dns_rdataset_init(&neg);
6297 dns_rdataset_init(&negsig);
6299 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6300 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6302 noqname = isc_mem_get(mctx, sizeof(*noqname));
6303 if (noqname == NULL) {
6304 result = ISC_R_NOMEMORY;
6307 dns_name_init(&noqname->name, NULL);
6308 noqname->neg = NULL;
6309 noqname->negsig = NULL;
6310 noqname->type = neg.type;
6311 result = dns_name_dup(&name, mctx, &noqname->name);
6312 if (result != ISC_R_SUCCESS)
6314 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6315 if (result != ISC_R_SUCCESS)
6317 noqname->neg = r.base;
6318 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6319 if (result != ISC_R_SUCCESS)
6321 noqname->negsig = r.base;
6322 dns_rdataset_disassociate(&neg);
6323 dns_rdataset_disassociate(&negsig);
6324 newheader->noqname = noqname;
6325 return (ISC_R_SUCCESS);
6328 dns_rdataset_disassociate(&neg);
6329 dns_rdataset_disassociate(&negsig);
6330 free_noqname(mctx, &noqname);
6334 static inline isc_result_t
6335 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6336 dns_rdataset_t *rdataset)
6338 struct noqname *closest;
6339 isc_mem_t *mctx = rbtdb->common.mctx;
6341 dns_rdataset_t neg, negsig;
6342 isc_result_t result;
6345 dns_name_init(&name, NULL);
6346 dns_rdataset_init(&neg);
6347 dns_rdataset_init(&negsig);
6349 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6350 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6352 closest = isc_mem_get(mctx, sizeof(*closest));
6353 if (closest == NULL) {
6354 result = ISC_R_NOMEMORY;
6357 dns_name_init(&closest->name, NULL);
6358 closest->neg = NULL;
6359 closest->negsig = NULL;
6360 closest->type = neg.type;
6361 result = dns_name_dup(&name, mctx, &closest->name);
6362 if (result != ISC_R_SUCCESS)
6364 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6365 if (result != ISC_R_SUCCESS)
6367 closest->neg = r.base;
6368 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6369 if (result != ISC_R_SUCCESS)
6371 closest->negsig = r.base;
6372 dns_rdataset_disassociate(&neg);
6373 dns_rdataset_disassociate(&negsig);
6374 newheader->closest = closest;
6375 return (ISC_R_SUCCESS);
6378 dns_rdataset_disassociate(&neg);
6379 dns_rdataset_disassociate(&negsig);
6380 free_noqname(mctx, &closest);
6384 static dns_dbmethods_t zone_methods;
6387 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6388 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6389 dns_rdataset_t *addedrdataset)
6391 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6392 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6393 rbtdb_version_t *rbtversion = version;
6394 isc_region_t region;
6395 rdatasetheader_t *newheader;
6396 rdatasetheader_t *header;
6397 isc_result_t result;
6398 isc_boolean_t delegating;
6399 isc_boolean_t newnsec;
6400 isc_boolean_t tree_locked = ISC_FALSE;
6401 isc_boolean_t cache_is_overmem = ISC_FALSE;
6403 REQUIRE(VALID_RBTDB(rbtdb));
6405 if (rbtdb->common.methods == &zone_methods)
6406 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6407 (rdataset->type == dns_rdatatype_nsec3 ||
6408 rdataset->covers == dns_rdatatype_nsec3)) ||
6409 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6410 rdataset->type != dns_rdatatype_nsec3 &&
6411 rdataset->covers != dns_rdatatype_nsec3)));
6413 if (rbtversion == NULL) {
6415 isc_stdtime_get(&now);
6419 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6421 sizeof(rdatasetheader_t));
6422 if (result != ISC_R_SUCCESS)
6425 newheader = (rdatasetheader_t *)region.base;
6426 init_rdataset(rbtdb, newheader);
6427 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6428 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6430 newheader->attributes = 0;
6431 newheader->noqname = NULL;
6432 newheader->closest = NULL;
6433 newheader->count = init_count++;
6434 newheader->trust = rdataset->trust;
6435 newheader->additional_auth = NULL;
6436 newheader->additional_glue = NULL;
6437 newheader->last_used = now;
6438 newheader->node = rbtnode;
6439 if (rbtversion != NULL) {
6440 newheader->serial = rbtversion->serial;
6443 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6444 newheader->attributes |= RDATASET_ATTR_RESIGN;
6445 newheader->resign = rdataset->resign;
6447 newheader->resign = 0;
6449 newheader->serial = 1;
6450 newheader->resign = 0;
6451 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6452 newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6453 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6454 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6455 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6456 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6457 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6458 result = addnoqname(rbtdb, newheader, rdataset);
6459 if (result != ISC_R_SUCCESS) {
6460 free_rdataset(rbtdb, rbtdb->common.mctx,
6465 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6466 result = addclosest(rbtdb, newheader, rdataset);
6467 if (result != ISC_R_SUCCESS) {
6468 free_rdataset(rbtdb, rbtdb->common.mctx,
6476 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6477 * just DNAME for the cache), then we need to set the callback bit
6480 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6481 delegating = ISC_TRUE;
6483 delegating = ISC_FALSE;
6486 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6488 if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6489 rdataset->type == dns_rdatatype_nsec)
6492 newnsec = ISC_FALSE;
6495 * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6496 * or the DB is a cache in an overmem state, hold an exclusive lock on
6497 * the tree. In the latter case the lock does not necessarily have to
6498 * be acquired but it will help purge stale entries more effectively.
6500 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6501 cache_is_overmem = ISC_TRUE;
6502 if (delegating || newnsec || cache_is_overmem) {
6503 tree_locked = ISC_TRUE;
6504 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6507 if (cache_is_overmem)
6508 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6510 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6511 isc_rwlocktype_write);
6513 if (rbtdb->rrsetstats != NULL) {
6514 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6515 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6518 if (IS_CACHE(rbtdb)) {
6520 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6522 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6523 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6524 expire_header(rbtdb, header, tree_locked);
6527 * If we've been holding a write lock on the tree just for
6528 * cleaning, we can release it now. However, we still need the
6531 if (tree_locked && !delegating && !newnsec) {
6532 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6533 tree_locked = ISC_FALSE;
6537 result = ISC_R_SUCCESS;
6539 dns_fixedname_t fname;
6541 dns_rbtnode_t *nsecnode;
6543 dns_fixedname_init(&fname);
6544 name = dns_fixedname_name(&fname);
6545 dns_rbt_fullnamefromnode(rbtnode, name);
6547 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6548 if (result == ISC_R_SUCCESS) {
6549 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6550 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6551 } else if (result == ISC_R_EXISTS) {
6552 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6553 result = ISC_R_SUCCESS;
6557 if (result == ISC_R_SUCCESS)
6558 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6559 ISC_FALSE, addedrdataset, now);
6560 if (result == ISC_R_SUCCESS && delegating)
6561 rbtnode->find_callback = 1;
6563 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6564 isc_rwlocktype_write);
6567 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6570 * Update the zone's secure status. If version is non-NULL
6571 * this is deferred until closeversion() is called.
6573 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6574 iszonesecure(db, version, rbtdb->origin_node);
6580 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6581 dns_rdataset_t *rdataset, unsigned int options,
6582 dns_rdataset_t *newrdataset)
6584 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6585 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6586 rbtdb_version_t *rbtversion = version;
6587 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6588 unsigned char *subresult;
6589 isc_region_t region;
6590 isc_result_t result;
6591 rbtdb_changed_t *changed;
6593 REQUIRE(VALID_RBTDB(rbtdb));
6595 if (rbtdb->common.methods == &zone_methods)
6596 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6597 (rdataset->type == dns_rdatatype_nsec3 ||
6598 rdataset->covers == dns_rdatatype_nsec3)) ||
6599 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6600 rdataset->type != dns_rdatatype_nsec3 &&
6601 rdataset->covers != dns_rdatatype_nsec3)));
6603 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6605 sizeof(rdatasetheader_t));
6606 if (result != ISC_R_SUCCESS)
6608 newheader = (rdatasetheader_t *)region.base;
6609 init_rdataset(rbtdb, newheader);
6610 set_ttl(rbtdb, newheader, rdataset->ttl);
6611 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6613 newheader->attributes = 0;
6614 newheader->serial = rbtversion->serial;
6615 newheader->trust = 0;
6616 newheader->noqname = NULL;
6617 newheader->closest = NULL;
6618 newheader->count = init_count++;
6619 newheader->additional_auth = NULL;
6620 newheader->additional_glue = NULL;
6621 newheader->last_used = 0;
6622 newheader->node = rbtnode;
6623 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6624 newheader->attributes |= RDATASET_ATTR_RESIGN;
6625 newheader->resign = rdataset->resign;
6627 newheader->resign = 0;
6629 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6630 isc_rwlocktype_write);
6632 changed = add_changed(rbtdb, rbtversion, rbtnode);
6633 if (changed == NULL) {
6634 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6635 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6636 isc_rwlocktype_write);
6637 return (ISC_R_NOMEMORY);
6640 topheader_prev = NULL;
6641 for (topheader = rbtnode->data;
6643 topheader = topheader->next) {
6644 if (topheader->type == newheader->type)
6646 topheader_prev = topheader;
6649 * If header isn't NULL, we've found the right type. There may be
6650 * IGNORE rdatasets between the top of the chain and the first real
6651 * data. We skip over them.
6654 while (header != NULL && IGNORE(header))
6655 header = header->down;
6656 if (header != NULL && EXISTS(header)) {
6657 unsigned int flags = 0;
6659 result = ISC_R_SUCCESS;
6660 if ((options & DNS_DBSUB_EXACT) != 0) {
6661 flags |= DNS_RDATASLAB_EXACT;
6662 if (newheader->rdh_ttl != header->rdh_ttl)
6663 result = DNS_R_NOTEXACT;
6665 if (result == ISC_R_SUCCESS)
6666 result = dns_rdataslab_subtract(
6667 (unsigned char *)header,
6668 (unsigned char *)newheader,
6669 (unsigned int)(sizeof(*newheader)),
6671 rbtdb->common.rdclass,
6672 (dns_rdatatype_t)header->type,
6674 if (result == ISC_R_SUCCESS) {
6675 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6676 newheader = (rdatasetheader_t *)subresult;
6677 init_rdataset(rbtdb, newheader);
6679 * We have to set the serial since the rdataslab
6680 * subtraction routine copies the reserved portion of
6681 * header, not newheader.
6683 newheader->serial = rbtversion->serial;
6685 * XXXJT: dns_rdataslab_subtract() copied the pointers
6686 * to additional info. We need to clear these fields
6687 * to avoid having duplicated references.
6689 newheader->additional_auth = NULL;
6690 newheader->additional_glue = NULL;
6691 } else if (result == DNS_R_NXRRSET) {
6693 * This subtraction would remove all of the rdata;
6694 * add a nonexistent header instead.
6696 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6697 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6698 if (newheader == NULL) {
6699 result = ISC_R_NOMEMORY;
6702 set_ttl(rbtdb, newheader, 0);
6703 newheader->type = topheader->type;
6704 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6705 newheader->trust = 0;
6706 newheader->serial = rbtversion->serial;
6707 newheader->noqname = NULL;
6708 newheader->closest = NULL;
6709 newheader->count = 0;
6710 newheader->additional_auth = NULL;
6711 newheader->additional_glue = NULL;
6712 newheader->node = rbtnode;
6713 newheader->resign = 0;
6714 newheader->last_used = 0;
6716 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6721 * If we're here, we want to link newheader in front of
6724 INSIST(rbtversion->serial >= topheader->serial);
6725 if (topheader_prev != NULL)
6726 topheader_prev->next = newheader;
6728 rbtnode->data = newheader;
6729 newheader->next = topheader->next;
6730 newheader->down = topheader;
6731 topheader->next = newheader;
6733 changed->dirty = ISC_TRUE;
6736 * The rdataset doesn't exist, so we don't need to do anything
6737 * to satisfy the deletion request.
6739 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6740 if ((options & DNS_DBSUB_EXACT) != 0)
6741 result = DNS_R_NOTEXACT;
6743 result = DNS_R_UNCHANGED;
6746 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6747 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6750 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6751 isc_rwlocktype_write);
6754 * Update the zone's secure status. If version is non-NULL
6755 * this is deferred until closeversion() is called.
6757 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6758 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6764 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6765 dns_rdatatype_t type, dns_rdatatype_t covers)
6767 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6768 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6769 rbtdb_version_t *rbtversion = version;
6770 isc_result_t result;
6771 rdatasetheader_t *newheader;
6773 REQUIRE(VALID_RBTDB(rbtdb));
6775 if (type == dns_rdatatype_any)
6776 return (ISC_R_NOTIMPLEMENTED);
6777 if (type == dns_rdatatype_rrsig && covers == 0)
6778 return (ISC_R_NOTIMPLEMENTED);
6780 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6781 if (newheader == NULL)
6782 return (ISC_R_NOMEMORY);
6783 set_ttl(rbtdb, newheader, 0);
6784 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6785 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6786 newheader->trust = 0;
6787 newheader->noqname = NULL;
6788 newheader->closest = NULL;
6789 newheader->additional_auth = NULL;
6790 newheader->additional_glue = NULL;
6791 if (rbtversion != NULL)
6792 newheader->serial = rbtversion->serial;
6794 newheader->serial = 0;
6795 newheader->count = 0;
6796 newheader->last_used = 0;
6797 newheader->node = rbtnode;
6799 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6800 isc_rwlocktype_write);
6802 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6803 ISC_FALSE, NULL, 0);
6805 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6806 isc_rwlocktype_write);
6809 * Update the zone's secure status. If version is non-NULL
6810 * this is deferred until closeversion() is called.
6812 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6813 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6819 * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6822 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6823 isc_boolean_t hasnsec)
6825 isc_result_t noderesult, nsecresult;
6826 dns_rbtnode_t *nsecnode;
6828 noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6831 if (noderesult == ISC_R_SUCCESS)
6832 dns_rpz_cidr_addip(rbtdb->rpz_cidr, name);
6836 return (noderesult);
6837 if (noderesult == ISC_R_EXISTS) {
6839 * Add a node to the auxiliary NSEC tree for an old node
6840 * just now getting an NSEC record.
6842 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6843 return (noderesult);
6844 } else if (noderesult != ISC_R_SUCCESS) {
6845 return (noderesult);
6849 * Build the auxiliary tree for NSECs as we go.
6850 * This tree speeds searches for closest NSECs that would otherwise
6851 * need to examine many irrelevant nodes in large TLDs.
6853 * Add nodes to the auxiliary tree after corresponding nodes have
6854 * been added to the main tree.
6857 nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6858 if (nsecresult == ISC_R_SUCCESS) {
6859 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6860 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6861 return (noderesult);
6864 if (nsecresult == ISC_R_EXISTS) {
6866 isc_log_write(dns_lctx,
6867 DNS_LOGCATEGORY_DATABASE,
6868 DNS_LOGMODULE_CACHE,
6870 "addnode: NSEC node already exists");
6872 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6873 return (noderesult);
6876 nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6877 if (nsecresult != ISC_R_SUCCESS)
6878 isc_log_write(dns_lctx,
6879 DNS_LOGCATEGORY_DATABASE,
6880 DNS_LOGMODULE_CACHE,
6882 "loading_addrdataset: "
6883 "dns_rbt_deletenode: %s after "
6884 "dns_rbt_addnode(NSEC): %s",
6885 isc_result_totext(nsecresult),
6886 isc_result_totext(noderesult));
6887 return (noderesult);
6891 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6892 rbtdb_load_t *loadctx = arg;
6893 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6894 dns_rbtnode_t *node;
6895 isc_result_t result;
6896 isc_region_t region;
6897 rdatasetheader_t *newheader;
6900 * This routine does no node locking. See comments in
6901 * 'load' below for more information on loading and
6907 * SOA records are only allowed at top of zone.
6909 if (rdataset->type == dns_rdatatype_soa &&
6910 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6911 return (DNS_R_NOTZONETOP);
6913 if (rdataset->type != dns_rdatatype_nsec3 &&
6914 rdataset->covers != dns_rdatatype_nsec3)
6915 add_empty_wildcards(rbtdb, name);
6917 if (dns_name_iswildcard(name)) {
6919 * NS record owners cannot legally be wild cards.
6921 if (rdataset->type == dns_rdatatype_ns)
6922 return (DNS_R_INVALIDNS);
6924 * NSEC3 record owners cannot legally be wild cards.
6926 if (rdataset->type == dns_rdatatype_nsec3)
6927 return (DNS_R_INVALIDNSEC3);
6928 result = add_wildcard_magic(rbtdb, name);
6929 if (result != ISC_R_SUCCESS)
6934 if (rdataset->type == dns_rdatatype_nsec3 ||
6935 rdataset->covers == dns_rdatatype_nsec3) {
6936 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6937 if (result == ISC_R_SUCCESS)
6938 node->nsec = DNS_RBT_NSEC_NSEC3;
6939 } else if (rdataset->type == dns_rdatatype_nsec) {
6940 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6942 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6944 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6946 if (result == ISC_R_SUCCESS) {
6947 dns_name_t foundname;
6948 dns_name_init(&foundname, NULL);
6949 dns_rbt_namefromnode(node, &foundname);
6950 #ifdef DNS_RBT_USEHASH
6951 node->locknum = node->hashval % rbtdb->node_lock_count;
6953 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6954 rbtdb->node_lock_count;
6958 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6960 sizeof(rdatasetheader_t));
6961 if (result != ISC_R_SUCCESS)
6963 newheader = (rdatasetheader_t *)region.base;
6964 init_rdataset(rbtdb, newheader);
6965 set_ttl(rbtdb, newheader,
6966 rdataset->ttl + loadctx->now); /* XXX overflow check */
6967 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6969 newheader->attributes = 0;
6970 newheader->trust = rdataset->trust;
6971 newheader->serial = 1;
6972 newheader->noqname = NULL;
6973 newheader->closest = NULL;
6974 newheader->count = init_count++;
6975 newheader->additional_auth = NULL;
6976 newheader->additional_glue = NULL;
6977 newheader->last_used = 0;
6978 newheader->node = node;
6979 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6980 newheader->attributes |= RDATASET_ATTR_RESIGN;
6981 newheader->resign = rdataset->resign;
6983 newheader->resign = 0;
6985 result = add(rbtdb, node, rbtdb->current_version, newheader,
6986 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6987 if (result == ISC_R_SUCCESS &&
6988 delegating_type(rbtdb, node, rdataset->type))
6989 node->find_callback = 1;
6990 else if (result == DNS_R_UNCHANGED)
6991 result = ISC_R_SUCCESS;
6997 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6998 rbtdb_load_t *loadctx;
7001 rbtdb = (dns_rbtdb_t *)db;
7003 REQUIRE(VALID_RBTDB(rbtdb));
7005 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
7006 if (loadctx == NULL)
7007 return (ISC_R_NOMEMORY);
7009 loadctx->rbtdb = rbtdb;
7010 if (IS_CACHE(rbtdb))
7011 isc_stdtime_get(&loadctx->now);
7015 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7017 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7019 rbtdb->attributes |= RBTDB_ATTR_LOADING;
7021 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7023 *addp = loading_addrdataset;
7026 return (ISC_R_SUCCESS);
7030 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
7031 rbtdb_load_t *loadctx;
7032 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7034 REQUIRE(VALID_RBTDB(rbtdb));
7035 REQUIRE(dbloadp != NULL);
7037 REQUIRE(loadctx->rbtdb == rbtdb);
7039 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7041 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7042 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7044 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7045 rbtdb->attributes |= RBTDB_ATTR_LOADED;
7047 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7050 * If there's a KEY rdataset at the zone origin containing a
7051 * zone key, we consider the zone secure.
7053 if (! IS_CACHE(rbtdb))
7054 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7058 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7060 return (ISC_R_SUCCESS);
7064 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7065 dns_masterformat_t masterformat) {
7068 rbtdb = (dns_rbtdb_t *)db;
7070 REQUIRE(VALID_RBTDB(rbtdb));
7073 return (dns_master_dump2(rbtdb->common.mctx, db, version,
7074 &dns_master_style_default,
7075 filename, masterformat));
7079 UNUSED(masterformat);
7081 return (ISC_R_NOTIMPLEMENTED);
7086 delete_callback(void *data, void *arg) {
7087 dns_rbtdb_t *rbtdb = arg;
7088 rdatasetheader_t *current, *next;
7089 unsigned int locknum;
7092 locknum = current->node->locknum;
7093 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7094 while (current != NULL) {
7095 next = current->next;
7096 free_rdataset(rbtdb, rbtdb->common.mctx, current);
7099 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7102 static isc_boolean_t
7103 issecure(dns_db_t *db) {
7105 isc_boolean_t secure;
7107 rbtdb = (dns_rbtdb_t *)db;
7109 REQUIRE(VALID_RBTDB(rbtdb));
7111 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7112 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7113 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7118 static isc_boolean_t
7119 isdnssec(dns_db_t *db) {
7121 isc_boolean_t dnssec;
7123 rbtdb = (dns_rbtdb_t *)db;
7125 REQUIRE(VALID_RBTDB(rbtdb));
7127 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7128 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7129 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7135 nodecount(dns_db_t *db) {
7139 rbtdb = (dns_rbtdb_t *)db;
7141 REQUIRE(VALID_RBTDB(rbtdb));
7143 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7144 count = dns_rbt_nodecount(rbtdb->tree);
7145 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7151 settask(dns_db_t *db, isc_task_t *task) {
7154 rbtdb = (dns_rbtdb_t *)db;
7156 REQUIRE(VALID_RBTDB(rbtdb));
7158 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7159 if (rbtdb->task != NULL)
7160 isc_task_detach(&rbtdb->task);
7162 isc_task_attach(task, &rbtdb->task);
7163 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7166 static isc_boolean_t
7167 ispersistent(dns_db_t *db) {
7173 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7174 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7175 dns_rbtnode_t *onode;
7176 isc_result_t result = ISC_R_SUCCESS;
7178 REQUIRE(VALID_RBTDB(rbtdb));
7179 REQUIRE(nodep != NULL && *nodep == NULL);
7181 /* Note that the access to origin_node doesn't require a DB lock */
7182 onode = (dns_rbtnode_t *)rbtdb->origin_node;
7183 if (onode != NULL) {
7184 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7185 new_reference(rbtdb, onode);
7186 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7188 *nodep = rbtdb->origin_node;
7190 INSIST(IS_CACHE(rbtdb));
7191 result = ISC_R_NOTFOUND;
7198 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7199 isc_uint8_t *flags, isc_uint16_t *iterations,
7200 unsigned char *salt, size_t *salt_length)
7203 isc_result_t result = ISC_R_NOTFOUND;
7204 rbtdb_version_t *rbtversion = version;
7206 rbtdb = (dns_rbtdb_t *)db;
7208 REQUIRE(VALID_RBTDB(rbtdb));
7210 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7212 if (rbtversion == NULL)
7213 rbtversion = rbtdb->current_version;
7215 if (rbtversion->havensec3) {
7217 *hash = rbtversion->hash;
7218 if (salt != NULL && salt_length != NULL) {
7219 REQUIRE(*salt_length >= rbtversion->salt_length);
7220 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
7222 if (salt_length != NULL)
7223 *salt_length = rbtversion->salt_length;
7224 if (iterations != NULL)
7225 *iterations = rbtversion->iterations;
7227 *flags = rbtversion->flags;
7228 result = ISC_R_SUCCESS;
7230 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7236 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7237 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7238 isc_stdtime_t oldresign;
7239 isc_result_t result = ISC_R_SUCCESS;
7240 rdatasetheader_t *header;
7242 REQUIRE(VALID_RBTDB(rbtdb));
7243 REQUIRE(!IS_CACHE(rbtdb));
7244 REQUIRE(rdataset != NULL);
7246 header = rdataset->private3;
7249 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7250 isc_rwlocktype_write);
7252 oldresign = header->resign;
7253 header->resign = resign;
7254 if (header->heap_index != 0) {
7255 INSIST(RESIGN(header));
7257 isc_heap_delete(rbtdb->heaps[header->node->locknum],
7258 header->heap_index);
7259 header->heap_index = 0;
7260 } else if (resign < oldresign)
7261 isc_heap_increased(rbtdb->heaps[header->node->locknum],
7262 header->heap_index);
7263 else if (resign > oldresign)
7264 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7265 header->heap_index);
7266 } else if (resign && header->heap_index == 0) {
7267 header->attributes |= RDATASET_ATTR_RESIGN;
7268 result = resign_insert(rbtdb, header->node->locknum, header);
7270 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7271 isc_rwlocktype_write);
7276 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7277 dns_name_t *foundname)
7279 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7280 rdatasetheader_t *header = NULL, *this;
7282 isc_result_t result = ISC_R_NOTFOUND;
7283 unsigned int locknum;
7285 REQUIRE(VALID_RBTDB(rbtdb));
7287 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7289 for (i = 0; i < rbtdb->node_lock_count; i++) {
7290 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7291 this = isc_heap_element(rbtdb->heaps[i], 1);
7293 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7294 isc_rwlocktype_read);
7299 else if (isc_serial_lt(this->resign, header->resign)) {
7300 locknum = header->node->locknum;
7301 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7302 isc_rwlocktype_read);
7305 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7306 isc_rwlocktype_read);
7312 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7314 if (foundname != NULL)
7315 dns_rbt_fullnamefromnode(header->node, foundname);
7317 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7318 isc_rwlocktype_read);
7320 result = ISC_R_SUCCESS;
7323 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7329 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7331 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7332 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7333 dns_rbtnode_t *node;
7334 rdatasetheader_t *header;
7336 REQUIRE(VALID_RBTDB(rbtdb));
7337 REQUIRE(rdataset != NULL);
7338 REQUIRE(rbtdb->future_version == rbtversion);
7339 REQUIRE(rbtversion->writer);
7341 node = rdataset->private2;
7342 header = rdataset->private3;
7345 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7346 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7347 isc_rwlocktype_write);
7349 * Delete from heap and save to re-signed list so that it can
7350 * be restored if we backout of this change.
7352 new_reference(rbtdb, node);
7353 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7354 header->heap_index = 0;
7355 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7357 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7358 isc_rwlocktype_write);
7359 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7362 static dns_stats_t *
7363 getrrsetstats(dns_db_t *db) {
7364 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7366 REQUIRE(VALID_RBTDB(rbtdb));
7367 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7369 return (rbtdb->rrsetstats);
7372 static dns_dbmethods_t zone_methods = {
7418 static dns_dbmethods_t cache_methods = {
7460 #ifdef DNS_RBTDB_VERSION64
7465 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7466 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7467 void *driverarg, dns_db_t **dbp)
7470 isc_result_t result;
7473 isc_boolean_t (*sooner)(void *, void *);
7474 isc_mem_t *hmctx = mctx;
7476 /* Keep the compiler happy. */
7479 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7481 return (ISC_R_NOMEMORY);
7484 * If argv[0] exists, it points to a memory context to use for heap
7487 hmctx = (isc_mem_t *) argv[0];
7489 memset(rbtdb, '\0', sizeof(*rbtdb));
7490 dns_name_init(&rbtdb->common.origin, NULL);
7491 rbtdb->common.attributes = 0;
7492 if (type == dns_dbtype_cache) {
7493 rbtdb->common.methods = &cache_methods;
7494 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7495 } else if (type == dns_dbtype_stub) {
7496 rbtdb->common.methods = &zone_methods;
7497 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7499 rbtdb->common.methods = &zone_methods;
7500 rbtdb->common.rdclass = rdclass;
7501 rbtdb->common.mctx = NULL;
7503 result = RBTDB_INITLOCK(&rbtdb->lock);
7504 if (result != ISC_R_SUCCESS)
7507 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7508 if (result != ISC_R_SUCCESS)
7512 * Initialize node_lock_count in a generic way to support future
7513 * extension which allows the user to specify this value on creation.
7514 * Note that when specified for a cache DB it must be larger than 1
7515 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7517 if (rbtdb->node_lock_count == 0) {
7518 if (IS_CACHE(rbtdb))
7519 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7521 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7522 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7523 result = ISC_R_RANGE;
7524 goto cleanup_tree_lock;
7526 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7527 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7528 sizeof(rbtdb_nodelock_t));
7529 if (rbtdb->node_locks == NULL) {
7530 result = ISC_R_NOMEMORY;
7531 goto cleanup_tree_lock;
7534 rbtdb->rrsetstats = NULL;
7535 if (IS_CACHE(rbtdb)) {
7536 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7537 if (result != ISC_R_SUCCESS)
7538 goto cleanup_node_locks;
7539 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7540 sizeof(rdatasetheaderlist_t));
7541 if (rbtdb->rdatasets == NULL) {
7542 result = ISC_R_NOMEMORY;
7543 goto cleanup_rrsetstats;
7545 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7546 ISC_LIST_INIT(rbtdb->rdatasets[i]);
7548 rbtdb->rdatasets = NULL;
7553 rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7554 sizeof(isc_heap_t *));
7555 if (rbtdb->heaps == NULL) {
7556 result = ISC_R_NOMEMORY;
7557 goto cleanup_rdatasets;
7559 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7560 rbtdb->heaps[i] = NULL;
7561 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7562 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7563 result = isc_heap_create(hmctx, sooner, set_index, 0,
7565 if (result != ISC_R_SUCCESS)
7570 * Create deadnode lists.
7572 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7573 sizeof(rbtnodelist_t));
7574 if (rbtdb->deadnodes == NULL) {
7575 result = ISC_R_NOMEMORY;
7578 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7579 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7581 rbtdb->active = rbtdb->node_lock_count;
7583 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7584 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7585 if (result == ISC_R_SUCCESS) {
7586 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7587 if (result != ISC_R_SUCCESS)
7588 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7590 if (result != ISC_R_SUCCESS) {
7592 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7593 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7594 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7596 goto cleanup_deadnodes;
7598 rbtdb->node_locks[i].exiting = ISC_FALSE;
7602 * Attach to the mctx. The database will persist so long as there
7603 * are references to it, and attaching to the mctx ensures that our
7604 * mctx won't disappear out from under us.
7606 isc_mem_attach(mctx, &rbtdb->common.mctx);
7607 isc_mem_attach(hmctx, &rbtdb->hmctx);
7610 * Must be initialized before free_rbtdb() is called.
7612 isc_ondestroy_init(&rbtdb->common.ondest);
7615 * Make a copy of the origin name.
7617 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7618 if (result != ISC_R_SUCCESS) {
7619 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7624 * Make the Red-Black Trees.
7626 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7627 if (result != ISC_R_SUCCESS) {
7628 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7632 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7633 if (result != ISC_R_SUCCESS) {
7634 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7638 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7639 if (result != ISC_R_SUCCESS) {
7640 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7646 * Get ready for response policy IP address searching if at least one
7647 * zone has been configured as a response policy zone and this
7648 * is not a cache zone.
7649 * It would be better to know that this database is for a policy
7650 * zone named for a view, but that would require knowledge from
7651 * above such as an argv[] set from data in the zone.
7653 if (type == dns_dbtype_zone && !dns_name_equal(origin, dns_rootname)) {
7654 result = dns_rpz_new_cidr(mctx, origin, &rbtdb->rpz_cidr);
7655 if (result != ISC_R_SUCCESS) {
7656 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7663 * In order to set the node callback bit correctly in zone databases,
7664 * we need to know if the node has the origin name of the zone.
7665 * In loading_addrdataset() we could simply compare the new name
7666 * to the origin name, but this is expensive. Also, we don't know the
7667 * node name in addrdataset(), so we need another way of knowing the
7670 * We now explicitly create a node for the zone's origin, and then
7671 * we simply remember the node's address. This is safe, because
7672 * the top-of-zone node can never be deleted, nor can its address
7675 if (!IS_CACHE(rbtdb)) {
7676 dns_rbtnode_t *nsec3node;
7678 rbtdb->origin_node = NULL;
7679 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7680 &rbtdb->origin_node);
7681 if (result != ISC_R_SUCCESS) {
7682 INSIST(result != ISC_R_EXISTS);
7683 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7686 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7688 * We need to give the origin node the right locknum.
7690 dns_name_init(&name, NULL);
7691 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7692 #ifdef DNS_RBT_USEHASH
7693 rbtdb->origin_node->locknum =
7694 rbtdb->origin_node->hashval %
7695 rbtdb->node_lock_count;
7697 rbtdb->origin_node->locknum =
7698 dns_name_hash(&name, ISC_TRUE) %
7699 rbtdb->node_lock_count;
7702 * Add an apex node to the NSEC3 tree so that NSEC3 searches
7703 * return partial matches when there is only a single NSEC3
7704 * record in the tree.
7707 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7709 if (result != ISC_R_SUCCESS) {
7710 INSIST(result != ISC_R_EXISTS);
7711 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7714 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
7716 * We need to give the nsec3 origin node the right locknum.
7718 dns_name_init(&name, NULL);
7719 dns_rbt_namefromnode(nsec3node, &name);
7720 #ifdef DNS_RBT_USEHASH
7721 nsec3node->locknum = nsec3node->hashval %
7722 rbtdb->node_lock_count;
7724 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7725 rbtdb->node_lock_count;
7730 * Misc. Initialization.
7732 result = isc_refcount_init(&rbtdb->references, 1);
7733 if (result != ISC_R_SUCCESS) {
7734 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7737 rbtdb->attributes = 0;
7741 * Version Initialization.
7743 rbtdb->current_serial = 1;
7744 rbtdb->least_serial = 1;
7745 rbtdb->next_serial = 2;
7746 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7747 if (rbtdb->current_version == NULL) {
7748 isc_refcount_decrement(&rbtdb->references, NULL);
7749 isc_refcount_destroy(&rbtdb->references);
7750 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7751 return (ISC_R_NOMEMORY);
7753 rbtdb->current_version->secure = dns_db_insecure;
7754 rbtdb->current_version->havensec3 = ISC_FALSE;
7755 rbtdb->current_version->flags = 0;
7756 rbtdb->current_version->iterations = 0;
7757 rbtdb->current_version->hash = 0;
7758 rbtdb->current_version->salt_length = 0;
7759 memset(rbtdb->current_version->salt, 0,
7760 sizeof(rbtdb->current_version->salt));
7761 rbtdb->future_version = NULL;
7762 ISC_LIST_INIT(rbtdb->open_versions);
7764 * Keep the current version in the open list so that list operation
7765 * won't happen in normal lookup operations.
7767 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7769 rbtdb->common.magic = DNS_DB_MAGIC;
7770 rbtdb->common.impmagic = RBTDB_MAGIC;
7772 *dbp = (dns_db_t *)rbtdb;
7774 return (ISC_R_SUCCESS);
7777 isc_mem_put(mctx, rbtdb->deadnodes,
7778 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7781 if (rbtdb->heaps != NULL) {
7782 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7783 if (rbtdb->heaps[i] != NULL)
7784 isc_heap_destroy(&rbtdb->heaps[i]);
7785 isc_mem_put(mctx, rbtdb->heaps,
7786 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7790 if (rbtdb->rdatasets != NULL)
7791 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7792 sizeof(rdatasetheaderlist_t));
7794 if (rbtdb->rrsetstats != NULL)
7795 dns_stats_detach(&rbtdb->rrsetstats);
7798 isc_mem_put(mctx, rbtdb->node_locks,
7799 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7802 isc_rwlock_destroy(&rbtdb->tree_lock);
7805 RBTDB_DESTROYLOCK(&rbtdb->lock);
7808 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7814 * Slabbed Rdataset Methods
7818 rdataset_disassociate(dns_rdataset_t *rdataset) {
7819 dns_db_t *db = rdataset->private1;
7820 dns_dbnode_t *node = rdataset->private2;
7822 detachnode(db, &node);
7826 rdataset_first(dns_rdataset_t *rdataset) {
7827 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7830 count = raw[0] * 256 + raw[1];
7832 rdataset->private5 = NULL;
7833 return (ISC_R_NOMORE);
7836 #if DNS_RDATASET_FIXED
7837 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7838 raw += 2 + (4 * count);
7844 * The privateuint4 field is the number of rdata beyond the
7845 * cursor position, so we decrement the total count by one
7846 * before storing it.
7848 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7849 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7850 * to the first entry in the offset table.
7853 rdataset->privateuint4 = count;
7854 rdataset->private5 = raw;
7856 return (ISC_R_SUCCESS);
7860 rdataset_next(dns_rdataset_t *rdataset) {
7862 unsigned int length;
7863 unsigned char *raw; /* RDATASLAB */
7865 count = rdataset->privateuint4;
7867 return (ISC_R_NOMORE);
7869 rdataset->privateuint4 = count;
7872 * Skip forward one record (length + 4) or one offset (4).
7874 raw = rdataset->private5;
7875 #if DNS_RDATASET_FIXED
7876 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7878 length = raw[0] * 256 + raw[1];
7880 #if DNS_RDATASET_FIXED
7882 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7884 rdataset->private5 = raw + 2; /* length(2) */
7887 return (ISC_R_SUCCESS);
7891 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7892 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7893 #if DNS_RDATASET_FIXED
7894 unsigned int offset;
7896 unsigned int length;
7898 unsigned int flags = 0;
7900 REQUIRE(raw != NULL);
7903 * Find the start of the record if not already in private5
7904 * then skip the length and order fields.
7906 #if DNS_RDATASET_FIXED
7907 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7908 offset = (raw[0] << 24) + (raw[1] << 16) +
7909 (raw[2] << 8) + raw[3];
7910 raw = rdataset->private3;
7914 length = raw[0] * 256 + raw[1];
7915 #if DNS_RDATASET_FIXED
7920 if (rdataset->type == dns_rdatatype_rrsig) {
7921 if (*raw & DNS_RDATASLAB_OFFLINE)
7922 flags |= DNS_RDATA_OFFLINE;
7928 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7929 rdata->flags |= flags;
7933 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7934 dns_db_t *db = source->private1;
7935 dns_dbnode_t *node = source->private2;
7936 dns_dbnode_t *cloned_node = NULL;
7938 attachnode(db, node, &cloned_node);
7942 * Reset iterator state.
7944 target->privateuint4 = 0;
7945 target->private5 = NULL;
7949 rdataset_count(dns_rdataset_t *rdataset) {
7950 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7953 count = raw[0] * 256 + raw[1];
7959 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7960 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7962 dns_db_t *db = rdataset->private1;
7963 dns_dbnode_t *node = rdataset->private2;
7964 dns_dbnode_t *cloned_node;
7965 struct noqname *noqname = rdataset->private6;
7968 attachnode(db, node, &cloned_node);
7969 nsec->methods = &rdataset_methods;
7970 nsec->rdclass = db->rdclass;
7971 nsec->type = noqname->type;
7973 nsec->ttl = rdataset->ttl;
7974 nsec->trust = rdataset->trust;
7975 nsec->private1 = rdataset->private1;
7976 nsec->private2 = rdataset->private2;
7977 nsec->private3 = noqname->neg;
7978 nsec->privateuint4 = 0;
7979 nsec->private5 = NULL;
7980 nsec->private6 = NULL;
7981 nsec->private7 = NULL;
7984 attachnode(db, node, &cloned_node);
7985 nsecsig->methods = &rdataset_methods;
7986 nsecsig->rdclass = db->rdclass;
7987 nsecsig->type = dns_rdatatype_rrsig;
7988 nsecsig->covers = noqname->type;
7989 nsecsig->ttl = rdataset->ttl;
7990 nsecsig->trust = rdataset->trust;
7991 nsecsig->private1 = rdataset->private1;
7992 nsecsig->private2 = rdataset->private2;
7993 nsecsig->private3 = noqname->negsig;
7994 nsecsig->privateuint4 = 0;
7995 nsecsig->private5 = NULL;
7996 nsec->private6 = NULL;
7997 nsec->private7 = NULL;
7999 dns_name_clone(&noqname->name, name);
8001 return (ISC_R_SUCCESS);
8005 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
8006 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8008 dns_db_t *db = rdataset->private1;
8009 dns_dbnode_t *node = rdataset->private2;
8010 dns_dbnode_t *cloned_node;
8011 struct noqname *closest = rdataset->private7;
8014 attachnode(db, node, &cloned_node);
8015 nsec->methods = &rdataset_methods;
8016 nsec->rdclass = db->rdclass;
8017 nsec->type = closest->type;
8019 nsec->ttl = rdataset->ttl;
8020 nsec->trust = rdataset->trust;
8021 nsec->private1 = rdataset->private1;
8022 nsec->private2 = rdataset->private2;
8023 nsec->private3 = closest->neg;
8024 nsec->privateuint4 = 0;
8025 nsec->private5 = NULL;
8026 nsec->private6 = NULL;
8027 nsec->private7 = NULL;
8030 attachnode(db, node, &cloned_node);
8031 nsecsig->methods = &rdataset_methods;
8032 nsecsig->rdclass = db->rdclass;
8033 nsecsig->type = dns_rdatatype_rrsig;
8034 nsecsig->covers = closest->type;
8035 nsecsig->ttl = rdataset->ttl;
8036 nsecsig->trust = rdataset->trust;
8037 nsecsig->private1 = rdataset->private1;
8038 nsecsig->private2 = rdataset->private2;
8039 nsecsig->private3 = closest->negsig;
8040 nsecsig->privateuint4 = 0;
8041 nsecsig->private5 = NULL;
8042 nsec->private6 = NULL;
8043 nsec->private7 = NULL;
8045 dns_name_clone(&closest->name, name);
8047 return (ISC_R_SUCCESS);
8051 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8052 dns_rbtdb_t *rbtdb = rdataset->private1;
8053 dns_rbtnode_t *rbtnode = rdataset->private2;
8054 rdatasetheader_t *header = rdataset->private3;
8057 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8058 isc_rwlocktype_write);
8059 header->trust = rdataset->trust = trust;
8060 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8061 isc_rwlocktype_write);
8065 rdataset_expire(dns_rdataset_t *rdataset) {
8066 dns_rbtdb_t *rbtdb = rdataset->private1;
8067 dns_rbtnode_t *rbtnode = rdataset->private2;
8068 rdatasetheader_t *header = rdataset->private3;
8071 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8072 isc_rwlocktype_write);
8073 expire_header(rbtdb, header, ISC_FALSE);
8074 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8075 isc_rwlocktype_write);
8079 * Rdataset Iterator Methods
8083 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8084 rbtdb_rdatasetiter_t *rbtiterator;
8086 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8088 if (rbtiterator->common.version != NULL)
8089 closeversion(rbtiterator->common.db,
8090 &rbtiterator->common.version, ISC_FALSE);
8091 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8092 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8093 sizeof(*rbtiterator));
8099 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8100 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8101 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8102 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8103 rbtdb_version_t *rbtversion = rbtiterator->common.version;
8104 rdatasetheader_t *header, *top_next;
8105 rbtdb_serial_t serial;
8108 if (IS_CACHE(rbtdb)) {
8110 now = rbtiterator->common.now;
8112 serial = rbtversion->serial;
8116 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8117 isc_rwlocktype_read);
8119 for (header = rbtnode->data; header != NULL; header = top_next) {
8120 top_next = header->next;
8122 if (header->serial <= serial && !IGNORE(header)) {
8124 * Is this a "this rdataset doesn't exist"
8125 * record? Or is it too old in the cache?
8127 * Note: unlike everywhere else, we
8128 * check for now > header->rdh_ttl instead
8129 * of now >= header->rdh_ttl. This allows
8130 * ANY and RRSIG queries for 0 TTL
8131 * rdatasets to work.
8133 if (NONEXISTENT(header) ||
8134 (now != 0 && now > header->rdh_ttl))
8138 header = header->down;
8139 } while (header != NULL);
8144 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8145 isc_rwlocktype_read);
8147 rbtiterator->current = header;
8150 return (ISC_R_NOMORE);
8152 return (ISC_R_SUCCESS);
8156 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8157 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8158 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8159 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8160 rbtdb_version_t *rbtversion = rbtiterator->common.version;
8161 rdatasetheader_t *header, *top_next;
8162 rbtdb_serial_t serial;
8164 rbtdb_rdatatype_t type, negtype;
8165 dns_rdatatype_t rdtype, covers;
8167 header = rbtiterator->current;
8169 return (ISC_R_NOMORE);
8171 if (IS_CACHE(rbtdb)) {
8173 now = rbtiterator->common.now;
8175 serial = rbtversion->serial;
8179 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8180 isc_rwlocktype_read);
8182 type = header->type;
8183 rdtype = RBTDB_RDATATYPE_BASE(header->type);
8184 if (NEGATIVE(header)) {
8185 covers = RBTDB_RDATATYPE_EXT(header->type);
8186 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8188 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8189 for (header = header->next; header != NULL; header = top_next) {
8190 top_next = header->next;
8192 * If not walking back up the down list.
8194 if (header->type != type && header->type != negtype) {
8196 if (header->serial <= serial &&
8199 * Is this a "this rdataset doesn't
8202 * Note: unlike everywhere else, we
8203 * check for now > header->ttl instead
8204 * of now >= header->ttl. This allows
8205 * ANY and RRSIG queries for 0 TTL
8206 * rdatasets to work.
8208 if ((header->attributes &
8209 RDATASET_ATTR_NONEXISTENT) != 0 ||
8210 (now != 0 && now > header->rdh_ttl))
8214 header = header->down;
8215 } while (header != NULL);
8221 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8222 isc_rwlocktype_read);
8224 rbtiterator->current = header;
8227 return (ISC_R_NOMORE);
8229 return (ISC_R_SUCCESS);
8233 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8234 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8235 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8236 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8237 rdatasetheader_t *header;
8239 header = rbtiterator->current;
8240 REQUIRE(header != NULL);
8242 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8243 isc_rwlocktype_read);
8245 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8248 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8249 isc_rwlocktype_read);
8254 * Database Iterator Methods
8258 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8259 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8260 dns_rbtnode_t *node = rbtdbiter->node;
8265 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8266 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8270 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8271 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8272 dns_rbtnode_t *node = rbtdbiter->node;
8278 lock = &rbtdb->node_locks[node->locknum].lock;
8279 NODE_LOCK(lock, isc_rwlocktype_read);
8280 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8281 rbtdbiter->tree_locked, ISC_FALSE);
8282 NODE_UNLOCK(lock, isc_rwlocktype_read);
8284 rbtdbiter->node = NULL;
8288 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8289 dns_rbtnode_t *node;
8290 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8291 isc_boolean_t was_read_locked = ISC_FALSE;
8295 if (rbtdbiter->delete != 0) {
8297 * Note that "%d node of %d in tree" can report things like
8298 * "flush_deletions: 59 nodes of 41 in tree". This means
8299 * That some nodes appear on the deletions list more than
8300 * once. Only the last occurence will actually be deleted.
8302 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8303 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8304 "flush_deletions: %d nodes of %d in tree",
8306 dns_rbt_nodecount(rbtdb->tree));
8308 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8309 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8310 was_read_locked = ISC_TRUE;
8312 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8313 rbtdbiter->tree_locked = isc_rwlocktype_write;
8315 for (i = 0; i < rbtdbiter->delete; i++) {
8316 node = rbtdbiter->deletions[i];
8317 lock = &rbtdb->node_locks[node->locknum].lock;
8319 NODE_LOCK(lock, isc_rwlocktype_read);
8320 decrement_reference(rbtdb, node, 0,
8321 isc_rwlocktype_read,
8322 rbtdbiter->tree_locked, ISC_FALSE);
8323 NODE_UNLOCK(lock, isc_rwlocktype_read);
8326 rbtdbiter->delete = 0;
8328 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8329 if (was_read_locked) {
8330 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8331 rbtdbiter->tree_locked = isc_rwlocktype_read;
8334 rbtdbiter->tree_locked = isc_rwlocktype_none;
8340 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8341 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8343 REQUIRE(rbtdbiter->paused);
8344 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8346 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8347 rbtdbiter->tree_locked = isc_rwlocktype_read;
8349 rbtdbiter->paused = ISC_FALSE;
8353 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8354 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8355 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8356 dns_db_t *db = NULL;
8358 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8359 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8360 rbtdbiter->tree_locked = isc_rwlocktype_none;
8362 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8364 dereference_iter_node(rbtdbiter);
8366 flush_deletions(rbtdbiter);
8368 dns_db_attach(rbtdbiter->common.db, &db);
8369 dns_db_detach(&rbtdbiter->common.db);
8371 dns_rbtnodechain_reset(&rbtdbiter->chain);
8372 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8373 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8380 dbiterator_first(dns_dbiterator_t *iterator) {
8381 isc_result_t result;
8382 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8383 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8384 dns_name_t *name, *origin;
8386 if (rbtdbiter->result != ISC_R_SUCCESS &&
8387 rbtdbiter->result != ISC_R_NOMORE)
8388 return (rbtdbiter->result);
8390 if (rbtdbiter->paused)
8391 resume_iteration(rbtdbiter);
8393 dereference_iter_node(rbtdbiter);
8395 name = dns_fixedname_name(&rbtdbiter->name);
8396 origin = dns_fixedname_name(&rbtdbiter->origin);
8397 dns_rbtnodechain_reset(&rbtdbiter->chain);
8398 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8400 if (rbtdbiter->nsec3only) {
8401 rbtdbiter->current = &rbtdbiter->nsec3chain;
8402 result = dns_rbtnodechain_first(rbtdbiter->current,
8403 rbtdb->nsec3, name, origin);
8405 rbtdbiter->current = &rbtdbiter->chain;
8406 result = dns_rbtnodechain_first(rbtdbiter->current,
8407 rbtdb->tree, name, origin);
8408 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8409 rbtdbiter->current = &rbtdbiter->nsec3chain;
8410 result = dns_rbtnodechain_first(rbtdbiter->current,
8415 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8416 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8417 NULL, &rbtdbiter->node);
8418 if (result == ISC_R_SUCCESS) {
8419 rbtdbiter->new_origin = ISC_TRUE;
8420 reference_iter_node(rbtdbiter);
8423 INSIST(result == ISC_R_NOTFOUND);
8424 result = ISC_R_NOMORE; /* The tree is empty. */
8427 rbtdbiter->result = result;
8433 dbiterator_last(dns_dbiterator_t *iterator) {
8434 isc_result_t result;
8435 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8436 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8437 dns_name_t *name, *origin;
8439 if (rbtdbiter->result != ISC_R_SUCCESS &&
8440 rbtdbiter->result != ISC_R_NOMORE)
8441 return (rbtdbiter->result);
8443 if (rbtdbiter->paused)
8444 resume_iteration(rbtdbiter);
8446 dereference_iter_node(rbtdbiter);
8448 name = dns_fixedname_name(&rbtdbiter->name);
8449 origin = dns_fixedname_name(&rbtdbiter->origin);
8450 dns_rbtnodechain_reset(&rbtdbiter->chain);
8451 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8453 result = ISC_R_NOTFOUND;
8454 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8455 rbtdbiter->current = &rbtdbiter->nsec3chain;
8456 result = dns_rbtnodechain_last(rbtdbiter->current,
8457 rbtdb->nsec3, name, origin);
8459 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8460 rbtdbiter->current = &rbtdbiter->chain;
8461 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8464 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8465 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8466 NULL, &rbtdbiter->node);
8467 if (result == ISC_R_SUCCESS) {
8468 rbtdbiter->new_origin = ISC_TRUE;
8469 reference_iter_node(rbtdbiter);
8472 INSIST(result == ISC_R_NOTFOUND);
8473 result = ISC_R_NOMORE; /* The tree is empty. */
8476 rbtdbiter->result = result;
8482 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8483 isc_result_t result;
8484 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8485 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8486 dns_name_t *iname, *origin;
8488 if (rbtdbiter->result != ISC_R_SUCCESS &&
8489 rbtdbiter->result != ISC_R_NOTFOUND &&
8490 rbtdbiter->result != ISC_R_NOMORE)
8491 return (rbtdbiter->result);
8493 if (rbtdbiter->paused)
8494 resume_iteration(rbtdbiter);
8496 dereference_iter_node(rbtdbiter);
8498 iname = dns_fixedname_name(&rbtdbiter->name);
8499 origin = dns_fixedname_name(&rbtdbiter->origin);
8500 dns_rbtnodechain_reset(&rbtdbiter->chain);
8501 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8503 if (rbtdbiter->nsec3only) {
8504 rbtdbiter->current = &rbtdbiter->nsec3chain;
8505 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8508 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8509 } else if (rbtdbiter->nonsec3) {
8510 rbtdbiter->current = &rbtdbiter->chain;
8511 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8514 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8517 * Stay on main chain if not found on either chain.
8519 rbtdbiter->current = &rbtdbiter->chain;
8520 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8523 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8524 if (result == DNS_R_PARTIALMATCH) {
8525 dns_rbtnode_t *node = NULL;
8526 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8527 &node, &rbtdbiter->nsec3chain,
8528 DNS_RBTFIND_EMPTYDATA,
8530 if (result == ISC_R_SUCCESS) {
8531 rbtdbiter->node = node;
8532 rbtdbiter->current = &rbtdbiter->nsec3chain;
8538 if (result == ISC_R_SUCCESS) {
8539 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8541 if (result == ISC_R_SUCCESS) {
8542 rbtdbiter->new_origin = ISC_TRUE;
8543 reference_iter_node(rbtdbiter);
8545 } else if (result == DNS_R_PARTIALMATCH) {
8546 result = ISC_R_NOTFOUND;
8547 rbtdbiter->node = NULL;
8550 rbtdbiter->result = result;
8552 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8553 isc_result_t tresult;
8554 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8556 if (tresult == ISC_R_SUCCESS) {
8557 rbtdbiter->new_origin = ISC_TRUE;
8558 reference_iter_node(rbtdbiter);
8561 rbtdbiter->node = NULL;
8564 rbtdbiter->node = NULL;
8566 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8567 ISC_R_SUCCESS : result;
8574 dbiterator_prev(dns_dbiterator_t *iterator) {
8575 isc_result_t result;
8576 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8577 dns_name_t *name, *origin;
8578 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8580 REQUIRE(rbtdbiter->node != NULL);
8582 if (rbtdbiter->result != ISC_R_SUCCESS)
8583 return (rbtdbiter->result);
8585 if (rbtdbiter->paused)
8586 resume_iteration(rbtdbiter);
8588 name = dns_fixedname_name(&rbtdbiter->name);
8589 origin = dns_fixedname_name(&rbtdbiter->origin);
8590 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8591 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8592 !rbtdbiter->nonsec3 &&
8593 &rbtdbiter->nsec3chain == rbtdbiter->current) {
8594 rbtdbiter->current = &rbtdbiter->chain;
8595 dns_rbtnodechain_reset(rbtdbiter->current);
8596 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8598 if (result == ISC_R_NOTFOUND)
8599 result = ISC_R_NOMORE;
8602 dereference_iter_node(rbtdbiter);
8604 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8605 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8606 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8607 NULL, &rbtdbiter->node);
8610 if (result == ISC_R_SUCCESS)
8611 reference_iter_node(rbtdbiter);
8613 rbtdbiter->result = result;
8619 dbiterator_next(dns_dbiterator_t *iterator) {
8620 isc_result_t result;
8621 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8622 dns_name_t *name, *origin;
8623 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8625 REQUIRE(rbtdbiter->node != NULL);
8627 if (rbtdbiter->result != ISC_R_SUCCESS)
8628 return (rbtdbiter->result);
8630 if (rbtdbiter->paused)
8631 resume_iteration(rbtdbiter);
8633 name = dns_fixedname_name(&rbtdbiter->name);
8634 origin = dns_fixedname_name(&rbtdbiter->origin);
8635 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8636 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8637 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8638 rbtdbiter->current = &rbtdbiter->nsec3chain;
8639 dns_rbtnodechain_reset(rbtdbiter->current);
8640 result = dns_rbtnodechain_first(rbtdbiter->current,
8641 rbtdb->nsec3, name, origin);
8642 if (result == ISC_R_NOTFOUND)
8643 result = ISC_R_NOMORE;
8646 dereference_iter_node(rbtdbiter);
8648 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8649 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8650 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8651 NULL, &rbtdbiter->node);
8653 if (result == ISC_R_SUCCESS)
8654 reference_iter_node(rbtdbiter);
8656 rbtdbiter->result = result;
8662 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8665 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8666 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8667 dns_rbtnode_t *node = rbtdbiter->node;
8668 isc_result_t result;
8669 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8670 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8672 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8673 REQUIRE(rbtdbiter->node != NULL);
8675 if (rbtdbiter->paused)
8676 resume_iteration(rbtdbiter);
8679 if (rbtdbiter->common.relative_names)
8681 result = dns_name_concatenate(nodename, origin, name, NULL);
8682 if (result != ISC_R_SUCCESS)
8684 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8685 result = DNS_R_NEWORIGIN;
8687 result = ISC_R_SUCCESS;
8689 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8690 new_reference(rbtdb, node);
8691 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8693 *nodep = rbtdbiter->node;
8695 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8696 isc_result_t expire_result;
8699 * If the deletion array is full, flush it before trying
8700 * to expire the current node. The current node can't
8701 * fully deleted while the iteration cursor is still on it.
8703 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8704 flush_deletions(rbtdbiter);
8706 expire_result = expirenode(iterator->db, *nodep, 0);
8709 * expirenode() currently always returns success.
8711 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8714 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8715 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8716 dns_rbtnode_refincrement(node, &refs);
8718 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8726 dbiterator_pause(dns_dbiterator_t *iterator) {
8727 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8728 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8730 if (rbtdbiter->result != ISC_R_SUCCESS &&
8731 rbtdbiter->result != ISC_R_NOMORE)
8732 return (rbtdbiter->result);
8734 if (rbtdbiter->paused)
8735 return (ISC_R_SUCCESS);
8737 rbtdbiter->paused = ISC_TRUE;
8739 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8740 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8741 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8742 rbtdbiter->tree_locked = isc_rwlocktype_none;
8745 flush_deletions(rbtdbiter);
8747 return (ISC_R_SUCCESS);
8751 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8752 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8753 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8755 if (rbtdbiter->result != ISC_R_SUCCESS)
8756 return (rbtdbiter->result);
8758 return (dns_name_copy(origin, name, NULL));
8762 * Additional cache routines.
8765 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8766 dns_rdatatype_t qtype, dns_acache_t *acache,
8767 dns_zone_t **zonep, dns_db_t **dbp,
8768 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8769 dns_name_t *fname, dns_message_t *msg,
8785 return (ISC_R_NOTIMPLEMENTED);
8787 dns_rbtdb_t *rbtdb = rdataset->private1;
8788 dns_rbtnode_t *rbtnode = rdataset->private2;
8789 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8790 unsigned int current_count = rdataset->privateuint4;
8792 rdatasetheader_t *header;
8793 nodelock_t *nodelock;
8794 unsigned int total_count;
8795 acachectl_t *acarray;
8796 dns_acacheentry_t *entry;
8797 isc_result_t result;
8799 UNUSED(qtype); /* we do not use this value at least for now */
8802 header = (struct rdatasetheader *)(raw - sizeof(*header));
8804 total_count = raw[0] * 256 + raw[1];
8805 INSIST(total_count > current_count);
8806 count = total_count - current_count - 1;
8810 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8811 NODE_LOCK(nodelock, isc_rwlocktype_read);
8814 case dns_rdatasetadditional_fromauth:
8815 acarray = header->additional_auth;
8817 case dns_rdatasetadditional_fromcache:
8820 case dns_rdatasetadditional_fromglue:
8821 acarray = header->additional_glue;
8827 if (acarray == NULL) {
8828 if (type != dns_rdatasetadditional_fromcache)
8829 dns_acache_countquerymiss(acache);
8830 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8831 return (ISC_R_NOTFOUND);
8834 if (acarray[count].entry == NULL) {
8835 dns_acache_countquerymiss(acache);
8836 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8837 return (ISC_R_NOTFOUND);
8841 dns_acache_attachentry(acarray[count].entry, &entry);
8843 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8845 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8846 nodep, fname, msg, now);
8848 dns_acache_detachentry(&entry);
8854 acache_callback(dns_acacheentry_t *entry, void **arg) {
8856 dns_rbtnode_t *rbtnode;
8857 nodelock_t *nodelock;
8858 acachectl_t *acarray = NULL;
8859 acache_cbarg_t *cbarg;
8862 REQUIRE(arg != NULL);
8866 * The caller must hold the entry lock.
8869 rbtdb = (dns_rbtdb_t *)cbarg->db;
8870 rbtnode = (dns_rbtnode_t *)cbarg->node;
8872 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8873 NODE_LOCK(nodelock, isc_rwlocktype_write);
8875 switch (cbarg->type) {
8876 case dns_rdatasetadditional_fromauth:
8877 acarray = cbarg->header->additional_auth;
8879 case dns_rdatasetadditional_fromglue:
8880 acarray = cbarg->header->additional_glue;
8886 count = cbarg->count;
8887 if (acarray != NULL && acarray[count].entry == entry) {
8888 acarray[count].entry = NULL;
8889 INSIST(acarray[count].cbarg == cbarg);
8890 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8891 acarray[count].cbarg = NULL;
8893 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8895 dns_acache_detachentry(&entry);
8897 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8899 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8900 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8908 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8909 acache_cbarg_t **cbargp)
8911 acache_cbarg_t *cbarg;
8913 REQUIRE(mctx != NULL);
8914 REQUIRE(entry != NULL);
8915 REQUIRE(cbargp != NULL && *cbargp != NULL);
8919 dns_acache_cancelentry(entry);
8920 dns_db_detachnode(cbarg->db, &cbarg->node);
8921 dns_db_detach(&cbarg->db);
8923 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8930 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8931 dns_rdatatype_t qtype, dns_acache_t *acache,
8932 dns_zone_t *zone, dns_db_t *db,
8933 dns_dbversion_t *version, dns_dbnode_t *node,
8947 return (ISC_R_NOTIMPLEMENTED);
8949 dns_rbtdb_t *rbtdb = rdataset->private1;
8950 dns_rbtnode_t *rbtnode = rdataset->private2;
8951 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8952 unsigned int current_count = rdataset->privateuint4;
8953 rdatasetheader_t *header;
8954 unsigned int total_count, count;
8955 nodelock_t *nodelock;
8956 isc_result_t result;
8957 acachectl_t *acarray;
8958 dns_acacheentry_t *newentry, *oldentry = NULL;
8959 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8963 if (type == dns_rdatasetadditional_fromcache)
8964 return (ISC_R_SUCCESS);
8966 header = (struct rdatasetheader *)(raw - sizeof(*header));
8968 total_count = raw[0] * 256 + raw[1];
8969 INSIST(total_count > current_count);
8970 count = total_count - current_count - 1; /* should be private data */
8972 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8973 if (newcbarg == NULL)
8974 return (ISC_R_NOMEMORY);
8975 newcbarg->type = type;
8976 newcbarg->count = count;
8977 newcbarg->header = header;
8978 newcbarg->db = NULL;
8979 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8980 newcbarg->node = NULL;
8981 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8984 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8985 acache_callback, newcbarg, &newentry);
8986 if (result != ISC_R_SUCCESS)
8988 /* Set cache data in the new entry. */
8989 result = dns_acache_setentry(acache, newentry, zone, db,
8990 version, node, fname);
8991 if (result != ISC_R_SUCCESS)
8994 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8995 NODE_LOCK(nodelock, isc_rwlocktype_write);
8999 case dns_rdatasetadditional_fromauth:
9000 acarray = header->additional_auth;
9002 case dns_rdatasetadditional_fromglue:
9003 acarray = header->additional_glue;
9009 if (acarray == NULL) {
9012 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
9013 sizeof(acachectl_t));
9015 if (acarray == NULL) {
9016 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9020 for (i = 0; i < total_count; i++) {
9021 acarray[i].entry = NULL;
9022 acarray[i].cbarg = NULL;
9026 case dns_rdatasetadditional_fromauth:
9027 header->additional_auth = acarray;
9029 case dns_rdatasetadditional_fromglue:
9030 header->additional_glue = acarray;
9036 if (acarray[count].entry != NULL) {
9038 * Swap the entry. Delay cleaning-up the old entry since
9039 * it would require a node lock.
9041 oldentry = acarray[count].entry;
9042 INSIST(acarray[count].cbarg != NULL);
9043 oldcbarg = acarray[count].cbarg;
9045 acarray[count].entry = newentry;
9046 acarray[count].cbarg = newcbarg;
9048 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9050 if (oldentry != NULL) {
9051 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9052 dns_acache_detachentry(&oldentry);
9055 return (ISC_R_SUCCESS);
9058 if (newcbarg != NULL) {
9059 if (newentry != NULL) {
9060 acache_cancelentry(rbtdb->common.mctx, newentry,
9062 dns_acache_detachentry(&newentry);
9064 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9065 dns_db_detach(&newcbarg->db);
9066 isc_mem_put(rbtdb->common.mctx, newcbarg,
9076 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9077 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9085 return (ISC_R_NOTIMPLEMENTED);
9087 dns_rbtdb_t *rbtdb = rdataset->private1;
9088 dns_rbtnode_t *rbtnode = rdataset->private2;
9089 unsigned char *raw = rdataset->private3; /* RDATASLAB */
9090 unsigned int current_count = rdataset->privateuint4;
9091 rdatasetheader_t *header;
9092 nodelock_t *nodelock;
9093 unsigned int total_count, count;
9094 acachectl_t *acarray;
9095 dns_acacheentry_t *entry;
9096 acache_cbarg_t *cbarg;
9098 UNUSED(qtype); /* we do not use this value at least for now */
9101 if (type == dns_rdatasetadditional_fromcache)
9102 return (ISC_R_SUCCESS);
9104 header = (struct rdatasetheader *)(raw - sizeof(*header));
9106 total_count = raw[0] * 256 + raw[1];
9107 INSIST(total_count > current_count);
9108 count = total_count - current_count - 1;
9113 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9114 NODE_LOCK(nodelock, isc_rwlocktype_write);
9117 case dns_rdatasetadditional_fromauth:
9118 acarray = header->additional_auth;
9120 case dns_rdatasetadditional_fromglue:
9121 acarray = header->additional_glue;
9127 if (acarray == NULL) {
9128 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9129 return (ISC_R_NOTFOUND);
9132 entry = acarray[count].entry;
9133 if (entry == NULL) {
9134 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9135 return (ISC_R_NOTFOUND);
9138 acarray[count].entry = NULL;
9139 cbarg = acarray[count].cbarg;
9140 acarray[count].cbarg = NULL;
9142 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9144 if (entry != NULL) {
9146 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9147 dns_acache_detachentry(&entry);
9150 return (ISC_R_SUCCESS);
9155 * Routines for LRU-based cache management.
9159 * See if a given cache entry that is being reused needs to be updated
9160 * in the LRU-list. From the LRU management point of view, this function is
9161 * expected to return true for almost all cases. When used with threads,
9162 * however, this may cause a non-negligible performance penalty because a
9163 * writer lock will have to be acquired before updating the list.
9164 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9165 * function returns true if the entry has not been updated for some period of
9166 * time. We differentiate the NS or glue address case and the others since
9167 * experiments have shown that the former tends to be accessed relatively
9168 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9169 * may cause external queries at a higher level zone, involving more
9172 * Caller must hold the node (read or write) lock.
9174 static inline isc_boolean_t
9175 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9176 if ((header->attributes &
9177 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9180 #if DNS_RBTDB_LIMITLRUUPDATE
9181 if (header->type == dns_rdatatype_ns ||
9182 (header->trust == dns_trust_glue &&
9183 (header->type == dns_rdatatype_a ||
9184 header->type == dns_rdatatype_aaaa))) {
9186 * Glue records are updated if at least 60 seconds have passed
9187 * since the previous update time.
9189 return (header->last_used + 60 <= now);
9192 /* Other records are updated if 5 minutes have passed. */
9193 return (header->last_used + 300 <= now);
9202 * Update the timestamp of a given cache entry and move it to the head
9203 * of the corresponding LRU list.
9205 * Caller must hold the node (write) lock.
9207 * Note that the we do NOT touch the heap here, as the TTL has not changed.
9210 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9213 INSIST(IS_CACHE(rbtdb));
9215 /* To be checked: can we really assume this? XXXMLG */
9216 INSIST(ISC_LINK_LINKED(header, link));
9218 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9219 header->last_used = now;
9220 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9224 * Purge some expired and/or stale (i.e. unused for some period) cache entries
9225 * under an overmem condition. To recover from this condition quickly, up to
9226 * 2 entries will be purged. This process is triggered while adding a new
9227 * entry, and we specifically avoid purging entries in the same LRU bucket as
9228 * the one to which the new entry will belong. Otherwise, we might purge
9229 * entries of the same name of different RR types while adding RRsets from a
9230 * single response (consider the case where we're adding A and AAAA glue records
9231 * of the same NS name).
9234 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9235 isc_stdtime_t now, isc_boolean_t tree_locked)
9237 rdatasetheader_t *header, *header_prev;
9238 unsigned int locknum;
9241 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9242 locknum != locknum_start && purgecount > 0;
9243 locknum = (locknum + 1) % rbtdb->node_lock_count) {
9244 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9245 isc_rwlocktype_write);
9247 header = isc_heap_element(rbtdb->heaps[locknum], 1);
9248 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
9249 expire_header(rbtdb, header, tree_locked);
9253 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9254 header != NULL && purgecount > 0;
9255 header = header_prev) {
9256 header_prev = ISC_LIST_PREV(header, link);
9258 * Unlink the entry at this point to avoid checking it
9259 * again even if it's currently used someone else and
9260 * cannot be purged at this moment. This entry won't be
9261 * referenced any more (so unlinking is safe) since the
9262 * TTL was reset to 0.
9264 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9266 expire_header(rbtdb, header, tree_locked);
9270 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9271 isc_rwlocktype_write);
9276 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9277 isc_boolean_t tree_locked)
9279 set_ttl(rbtdb, header, 0);
9280 header->attributes |= RDATASET_ATTR_STALE;
9281 header->node->dirty = 1;
9284 * Caller must hold the node (write) lock.
9287 if (dns_rbtnode_refcurrent(header->node) == 0) {
9289 * If no one else is using the node, we can clean it up now.
9290 * We first need to gain a new reference to the node to meet a
9291 * requirement of decrement_reference().
9293 new_reference(rbtdb, header->node);
9294 decrement_reference(rbtdb, header->node, 0,
9295 isc_rwlocktype_write,
9296 tree_locked ? isc_rwlocktype_write :
9297 isc_rwlocktype_none, ISC_FALSE);