2 * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.270.12.26 2010-12-02 05:09:58 marka Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
65 #include <dns/zonekey.h>
67 #ifdef DNS_RBTDB_VERSION64
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
80 * Note that "impmagic" is not the first four bytes of the struct, so
81 * ISC_MAGIC_VALID cannot be used.
83 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
84 (rbtdb)->common.impmagic == RBTDB_MAGIC)
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t rbtdb_serial_t;
89 * Make casting easier in symbolic debuggers by using different names
90 * for the 64 bit version.
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
96 typedef isc_uint32_t rbtdb_serial_t;
99 typedef isc_uint32_t rbtdb_rdatatype_t;
101 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
105 #define RBTDB_RDATATYPE_SIGNSEC \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
119 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120 * Using rwlock is effective with regard to lookup performance only when
121 * it is implemented in an efficient way.
122 * Otherwise, it is generally wise to stick to the simple locking since rwlock
123 * would require more memory or can even make lookups slower due to its own
124 * overhead (when it internally calls mutex locks).
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
129 #define DNS_RBTDB_USERWLOCK 0
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
138 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t) LOCK(l)
141 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
145 * Since node locking is sensitive to both performance and memory footprint,
146 * we need some trick here. If we have both high-performance rwlock and
147 * high performance and small-memory reference counters, we use rwlock for
148 * node lock and isc_refcount for node references. In this case, we don't have
149 * to protect the access to the counters by locks.
150 * Otherwise, we simply use ordinary mutex lock for node locking, and use
151 * simple integers as reference counters which is protected by the lock.
152 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153 * NODE_UNLOCK. In some other cases, however, we need to protect reference
154 * counters first and then protect other parts of a node as read-only data.
155 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156 * provided for these special cases. When we can use the efficient backend
157 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159 * section including the access to the reference counter.
160 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161 * section is also protected by NODE_STRONGLOCK().
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
166 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t) RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
172 #define NODE_STRONGLOCK(l) ((void)0)
173 #define NODE_STRONGUNLOCK(l) ((void)0)
174 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
178 typedef isc_mutex_t nodelock_t;
180 #define NODE_INITLOCK(l) isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
182 #define NODE_LOCK(l, t) LOCK(l)
183 #define NODE_UNLOCK(l, t) UNLOCK(l)
184 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
186 #define NODE_STRONGLOCK(l) LOCK(l)
187 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t) ((void)0)
189 #define NODE_WEAKUNLOCK(l, t) ((void)0)
190 #define NODE_WEAKDOWNGRADE(l) ((void)0)
194 * Whether to rate-limit updating the LRU to avoid possible thread contention.
195 * Our performance measurement has shown the cost is marginal, so it's defined
196 * to be 0 by default either with or without threads.
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 * Allow clients with a virtual time of up to 5 minutes in the past to see
204 * records that would have otherwise have expired.
206 #define RBTDB_VIRTUAL 300
212 dns_rdatatype_t type;
215 typedef struct acachectl acachectl_t;
217 typedef struct rdatasetheader {
219 * Locked by the owning node's lock.
221 rbtdb_serial_t serial;
223 rbtdb_rdatatype_t type;
224 isc_uint16_t attributes;
226 struct noqname *noqname;
227 struct noqname *closest;
229 * We don't use the LIST macros, because the LIST structure has
230 * both head and tail pointers, and is doubly linked.
233 struct rdatasetheader *next;
235 * If this is the top header for an rdataset, 'next' points
236 * to the top header for the next rdataset (i.e., the next type).
237 * Otherwise, it points up to the header whose down pointer points
241 struct rdatasetheader *down;
243 * Points to the header for the next older version of
249 * Monotonously increased every time this rdataset is bound so that
250 * it is used as the base of the starting point in DNS responses
251 * when the "cyclic" rrset-order is required. Since the ordering
252 * should not be so crucial, no lock is set for the counter for
253 * performance reasons.
256 acachectl_t *additional_auth;
257 acachectl_t *additional_glue;
260 isc_stdtime_t last_used;
261 ISC_LINK(struct rdatasetheader) link;
263 unsigned int heap_index;
265 * Used for TTL-based cache cleaning.
267 isc_stdtime_t resign;
270 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
273 #define RDATASET_ATTR_NONEXISTENT 0x0001
274 #define RDATASET_ATTR_STALE 0x0002
275 #define RDATASET_ATTR_IGNORE 0x0004
276 #define RDATASET_ATTR_RETAIN 0x0008
277 #define RDATASET_ATTR_NXDOMAIN 0x0010
278 #define RDATASET_ATTR_RESIGN 0x0020
279 #define RDATASET_ATTR_STATCOUNT 0x0040
280 #define RDATASET_ATTR_OPTOUT 0x0080
282 typedef struct acache_cbarg {
283 dns_rdatasetadditional_t type;
287 rdatasetheader_t *header;
291 dns_acacheentry_t *entry;
292 acache_cbarg_t *cbarg;
297 * When the cache will pre-expire data (due to memory low or other
298 * situations) before the rdataset's TTL has expired, it MUST
299 * respect the RETAIN bit and not expire the data until its TTL is
303 #undef IGNORE /* WIN32 winbase.h defines this. */
305 #define EXISTS(header) \
306 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
323 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324 * There is a tradeoff issue about configuring this value: if this is too
325 * small, it may cause heavier contention between threads; if this is too large,
326 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327 * The default value should work well for most environments, but this can
328 * also be configurable at compilation time via the
329 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
330 * 1 due to the assumption of overmem_purge().
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
340 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
344 /* Protected in the refcount routines. */
345 isc_refcount_t references;
346 /* Locked by lock. */
347 isc_boolean_t exiting;
350 typedef struct rbtdb_changed {
351 dns_rbtnode_t * node;
353 ISC_LINK(struct rbtdb_changed) link;
356 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
364 typedef struct rbtdb_version {
366 rbtdb_serial_t serial;
368 * Protected in the refcount routines.
369 * XXXJT: should we change the lock policy based on the refcount
372 isc_refcount_t references;
373 /* Locked by database lock. */
374 isc_boolean_t writer;
375 isc_boolean_t commit_ok;
376 rbtdb_changedlist_t changed_list;
377 rdatasetheaderlist_t resigned_list;
378 ISC_LINK(struct rbtdb_version) link;
379 dns_db_secure_t secure;
380 isc_boolean_t havensec3;
381 /* NSEC3 parameters */
384 isc_uint16_t iterations;
385 isc_uint8_t salt_length;
386 unsigned char salt[DNS_NSEC3_SALTSIZE];
389 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
394 #if DNS_RBTDB_USERWLOCK
399 isc_rwlock_t tree_lock;
400 unsigned int node_lock_count;
401 rbtdb_nodelock_t * node_locks;
402 dns_rbtnode_t * origin_node;
403 dns_stats_t * rrsetstats; /* cache DB only */
404 /* Locked by lock. */
406 isc_refcount_t references;
407 unsigned int attributes;
408 rbtdb_serial_t current_serial;
409 rbtdb_serial_t least_serial;
410 rbtdb_serial_t next_serial;
411 rbtdb_version_t * current_version;
412 rbtdb_version_t * future_version;
413 rbtdb_versionlist_t open_versions;
415 dns_dbnode_t *soanode;
416 dns_dbnode_t *nsnode;
419 * This is a linked list used to implement the LRU cache. There will
420 * be node_lock_count linked lists here. Nodes in bucket 1 will be
421 * placed on the linked list rdatasets[1].
423 rdatasetheaderlist_t *rdatasets;
426 * Temporary storage for stale cache nodes and dynamically deleted
427 * nodes that await being cleaned up.
429 rbtnodelist_t *deadnodes;
432 * Heaps. Each of these is used for TTL based expiry.
436 /* Locked by tree_lock. */
441 unsigned int quantum;
444 #define RBTDB_ATTR_LOADED 0x01
445 #define RBTDB_ATTR_LOADING 0x02
452 rbtdb_version_t * rbtversion;
453 rbtdb_serial_t serial;
454 unsigned int options;
455 dns_rbtnodechain_t chain;
456 isc_boolean_t copy_name;
457 isc_boolean_t need_cleanup;
459 dns_rbtnode_t * zonecut;
460 rdatasetheader_t * zonecut_rdataset;
461 rdatasetheader_t * zonecut_sigrdataset;
462 dns_fixedname_t zonecut_name;
474 static void rdataset_disassociate(dns_rdataset_t *rdataset);
475 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
476 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
477 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
478 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
479 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
480 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
483 dns_rdataset_t *negsig);
484 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
487 dns_rdataset_t *negsig);
488 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
489 dns_rdatasetadditional_t type,
490 dns_rdatatype_t qtype,
491 dns_acache_t *acache,
494 dns_dbversion_t **versionp,
495 dns_dbnode_t **nodep,
499 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
500 dns_rdatasetadditional_t type,
501 dns_rdatatype_t qtype,
502 dns_acache_t *acache,
505 dns_dbversion_t *version,
508 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
509 dns_rdataset_t *rdataset,
510 dns_rdatasetadditional_t type,
511 dns_rdatatype_t qtype);
512 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
514 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
516 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
517 isc_boolean_t tree_locked);
518 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
519 isc_stdtime_t now, isc_boolean_t tree_locked);
520 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
521 rdatasetheader_t *newheader);
522 static void prune_tree(isc_task_t *task, isc_event_t *event);
523 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
524 static void rdataset_expire(dns_rdataset_t *rdataset);
526 static dns_rdatasetmethods_t rdataset_methods = {
527 rdataset_disassociate,
537 rdataset_getadditional,
538 rdataset_setadditional,
539 rdataset_putadditional,
544 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
545 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
546 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
547 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
548 dns_rdataset_t *rdataset);
550 static dns_rdatasetitermethods_t rdatasetiter_methods = {
551 rdatasetiter_destroy,
557 typedef struct rbtdb_rdatasetiter {
558 dns_rdatasetiter_t common;
559 rdatasetheader_t * current;
560 } rbtdb_rdatasetiter_t;
562 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
563 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
564 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
565 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
567 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
568 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
569 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
570 dns_dbnode_t **nodep,
572 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
573 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
576 static dns_dbiteratormethods_t dbiterator_methods = {
588 #define DELETION_BATCH_MAX 64
591 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
593 typedef struct rbtdb_dbiterator {
594 dns_dbiterator_t common;
595 isc_boolean_t paused;
596 isc_boolean_t new_origin;
597 isc_rwlocktype_t tree_locked;
599 dns_fixedname_t name;
600 dns_fixedname_t origin;
601 dns_rbtnodechain_t chain;
602 dns_rbtnodechain_t nsec3chain;
603 dns_rbtnodechain_t *current;
605 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
607 isc_boolean_t nsec3only;
608 isc_boolean_t nonsec3;
609 } rbtdb_dbiterator_t;
612 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
613 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
615 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
617 static void overmem(dns_db_t *db, isc_boolean_t overmem);
618 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
619 isc_boolean_t *nsec3createflag);
622 * 'init_count' is used to initialize 'newheader->count' which inturn
623 * is used to determine where in the cycle rrset-order cyclic starts.
624 * We don't lock this as we don't care about simultaneous updates.
627 * Both init_count and header->count can be ISC_UINT32_MAX.
628 * The count on the returned rdataset however can't be as
629 * that indicates that the database does not implement cyclic
632 static unsigned int init_count;
637 * If a routine is going to lock more than one lock in this module, then
638 * the locking must be done in the following order:
642 * Node Lock (Only one from the set may be locked at one time by
647 * Failure to follow this hierarchy can result in deadlock.
653 * For zone databases the node for the origin of the zone MUST NOT be deleted.
662 attach(dns_db_t *source, dns_db_t **targetp) {
663 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
665 REQUIRE(VALID_RBTDB(rbtdb));
667 isc_refcount_increment(&rbtdb->references, NULL);
673 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
674 dns_rbtdb_t *rbtdb = event->ev_arg;
678 free_rbtdb(rbtdb, ISC_TRUE, event);
682 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
683 isc_boolean_t increment)
685 dns_rdatastatstype_t statattributes = 0;
686 dns_rdatastatstype_t base = 0;
687 dns_rdatastatstype_t type;
689 /* At the moment we count statistics only for cache DB */
690 INSIST(IS_CACHE(rbtdb));
692 if (NXDOMAIN(header))
693 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
694 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
695 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
696 base = RBTDB_RDATATYPE_EXT(header->type);
698 base = RBTDB_RDATATYPE_BASE(header->type);
700 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
702 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
704 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
708 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
713 oldttl = header->rdh_ttl;
714 header->rdh_ttl = newttl;
716 if (!IS_CACHE(rbtdb))
720 * It's possible the rbtdb is not a cache. If this is the case,
721 * we will not have a heap, and we move on. If we do, though,
722 * we might need to adjust things.
724 if (header->heap_index == 0 || newttl == oldttl)
726 idx = header->node->locknum;
727 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
729 heap = rbtdb->heaps[idx];
732 isc_heap_increased(heap, header->heap_index);
734 isc_heap_decreased(heap, header->heap_index);
738 * These functions allow the heap code to rank the priority of each
739 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
742 ttl_sooner(void *v1, void *v2) {
743 rdatasetheader_t *h1 = v1;
744 rdatasetheader_t *h2 = v2;
746 if (h1->rdh_ttl < h2->rdh_ttl)
752 resign_sooner(void *v1, void *v2) {
753 rdatasetheader_t *h1 = v1;
754 rdatasetheader_t *h2 = v2;
756 if (h1->resign < h2->resign)
762 * This function sets the heap index into the header.
765 set_index(void *what, unsigned int index) {
766 rdatasetheader_t *h = what;
768 h->heap_index = index;
772 * Work out how many nodes can be deleted in the time between two
773 * requests to the nameserver. Smooth the resulting number and use it
774 * as a estimate for the number of nodes to be deleted in the next
778 adjust_quantum(unsigned int old, isc_time_t *start) {
779 unsigned int pps = dns_pps; /* packets per second */
780 unsigned int interval;
789 interval = 1000000 / pps; /* interval in usec */
792 usecs = isc_time_microdiff(&end, start);
795 * We were unable to measure the amount of time taken.
796 * Double the nodes deleted next time.
803 new = old * interval;
804 new /= (unsigned int)usecs;
811 new = (new + old * 3) / 4;
813 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
814 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
820 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
822 isc_ondestroy_t ondest;
824 char buf[DNS_NAME_FORMATSIZE];
827 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
828 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
830 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
831 REQUIRE(rbtdb->future_version == NULL);
833 if (rbtdb->current_version != NULL) {
836 isc_refcount_decrement(&rbtdb->current_version->references,
839 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
840 isc_refcount_destroy(&rbtdb->current_version->references);
841 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
842 sizeof(rbtdb_version_t));
846 * We assume the number of remaining dead nodes is reasonably small;
847 * the overhead of unlinking all nodes here should be negligible.
849 for (i = 0; i < rbtdb->node_lock_count; i++) {
852 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
853 while (node != NULL) {
854 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
855 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
860 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
862 if (rbtdb->tree != NULL) {
863 isc_time_now(&start);
864 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
865 if (result == ISC_R_QUOTA) {
866 INSIST(rbtdb->task != NULL);
867 if (rbtdb->quantum != 0)
868 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
871 event = isc_event_allocate(rbtdb->common.mctx,
873 DNS_EVENT_FREESTORAGE,
876 sizeof(isc_event_t));
879 isc_task_send(rbtdb->task, &event);
882 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
885 if (rbtdb->nsec3 != NULL) {
886 isc_time_now(&start);
887 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
888 if (result == ISC_R_QUOTA) {
889 INSIST(rbtdb->task != NULL);
890 if (rbtdb->quantum != 0)
891 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
894 event = isc_event_allocate(rbtdb->common.mctx,
896 DNS_EVENT_FREESTORAGE,
899 sizeof(isc_event_t));
902 isc_task_send(rbtdb->task, &event);
905 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
909 isc_event_free(&event);
911 if (dns_name_dynamic(&rbtdb->common.origin))
912 dns_name_format(&rbtdb->common.origin, buf,
915 strcpy(buf, "<UNKNOWN>");
916 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
917 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
918 "done free_rbtdb(%s)", buf);
920 if (dns_name_dynamic(&rbtdb->common.origin))
921 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
922 for (i = 0; i < rbtdb->node_lock_count; i++) {
923 isc_refcount_destroy(&rbtdb->node_locks[i].references);
924 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
928 * Clean up LRU / re-signing order lists.
930 if (rbtdb->rdatasets != NULL) {
931 for (i = 0; i < rbtdb->node_lock_count; i++)
932 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
933 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
934 rbtdb->node_lock_count *
935 sizeof(rdatasetheaderlist_t));
938 * Clean up dead node buckets.
940 if (rbtdb->deadnodes != NULL) {
941 for (i = 0; i < rbtdb->node_lock_count; i++)
942 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
943 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
944 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
947 * Clean up heap objects.
949 if (rbtdb->heaps != NULL) {
950 for (i = 0; i < rbtdb->node_lock_count; i++)
951 isc_heap_destroy(&rbtdb->heaps[i]);
952 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
953 rbtdb->node_lock_count *
954 sizeof(isc_heap_t *));
957 if (rbtdb->rrsetstats != NULL)
958 dns_stats_detach(&rbtdb->rrsetstats);
960 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
961 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
962 isc_rwlock_destroy(&rbtdb->tree_lock);
963 isc_refcount_destroy(&rbtdb->references);
964 if (rbtdb->task != NULL)
965 isc_task_detach(&rbtdb->task);
967 RBTDB_DESTROYLOCK(&rbtdb->lock);
968 rbtdb->common.magic = 0;
969 rbtdb->common.impmagic = 0;
970 ondest = rbtdb->common.ondest;
971 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
972 isc_ondestroy_notify(&ondest, rbtdb);
976 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
977 isc_boolean_t want_free = ISC_FALSE;
979 unsigned int inactive = 0;
981 /* XXX check for open versions here */
983 if (rbtdb->soanode != NULL)
984 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
985 if (rbtdb->nsnode != NULL)
986 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
989 * Even though there are no external direct references, there still
990 * may be nodes in use.
992 for (i = 0; i < rbtdb->node_lock_count; i++) {
993 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
994 rbtdb->node_locks[i].exiting = ISC_TRUE;
995 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
996 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1002 if (inactive != 0) {
1003 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1004 rbtdb->active -= inactive;
1005 if (rbtdb->active == 0)
1006 want_free = ISC_TRUE;
1007 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1009 char buf[DNS_NAME_FORMATSIZE];
1010 if (dns_name_dynamic(&rbtdb->common.origin))
1011 dns_name_format(&rbtdb->common.origin, buf,
1014 strcpy(buf, "<UNKNOWN>");
1015 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1016 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1017 "calling free_rbtdb(%s)", buf);
1018 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1024 detach(dns_db_t **dbp) {
1025 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1028 REQUIRE(VALID_RBTDB(rbtdb));
1030 isc_refcount_decrement(&rbtdb->references, &refs);
1033 maybe_free_rbtdb(rbtdb);
1039 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1040 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1041 rbtdb_version_t *version;
1044 REQUIRE(VALID_RBTDB(rbtdb));
1046 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1047 version = rbtdb->current_version;
1048 isc_refcount_increment(&version->references, &refs);
1049 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1051 *versionp = (dns_dbversion_t *)version;
1054 static inline rbtdb_version_t *
1055 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1056 unsigned int references, isc_boolean_t writer)
1058 isc_result_t result;
1059 rbtdb_version_t *version;
1061 version = isc_mem_get(mctx, sizeof(*version));
1062 if (version == NULL)
1064 version->serial = serial;
1065 result = isc_refcount_init(&version->references, references);
1066 if (result != ISC_R_SUCCESS) {
1067 isc_mem_put(mctx, version, sizeof(*version));
1070 version->writer = writer;
1071 version->commit_ok = ISC_FALSE;
1072 ISC_LIST_INIT(version->changed_list);
1073 ISC_LIST_INIT(version->resigned_list);
1074 ISC_LINK_INIT(version, link);
1080 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1081 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1082 rbtdb_version_t *version;
1084 REQUIRE(VALID_RBTDB(rbtdb));
1085 REQUIRE(versionp != NULL && *versionp == NULL);
1086 REQUIRE(rbtdb->future_version == NULL);
1088 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1089 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1090 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1092 if (version != NULL) {
1093 version->commit_ok = ISC_TRUE;
1094 version->secure = rbtdb->current_version->secure;
1095 version->havensec3 = rbtdb->current_version->havensec3;
1096 if (version->havensec3) {
1097 version->flags = rbtdb->current_version->flags;
1098 version->iterations =
1099 rbtdb->current_version->iterations;
1100 version->hash = rbtdb->current_version->hash;
1101 version->salt_length =
1102 rbtdb->current_version->salt_length;
1103 memcpy(version->salt, rbtdb->current_version->salt,
1104 version->salt_length);
1107 version->iterations = 0;
1109 version->salt_length = 0;
1110 memset(version->salt, 0, sizeof(version->salt));
1112 rbtdb->next_serial++;
1113 rbtdb->future_version = version;
1115 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1117 if (version == NULL)
1118 return (ISC_R_NOMEMORY);
1120 *versionp = version;
1122 return (ISC_R_SUCCESS);
1126 attachversion(dns_db_t *db, dns_dbversion_t *source,
1127 dns_dbversion_t **targetp)
1129 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1130 rbtdb_version_t *rbtversion = source;
1133 REQUIRE(VALID_RBTDB(rbtdb));
1135 isc_refcount_increment(&rbtversion->references, &refs);
1138 *targetp = rbtversion;
1141 static rbtdb_changed_t *
1142 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1143 dns_rbtnode_t *node)
1145 rbtdb_changed_t *changed;
1149 * Caller must be holding the node lock if its reference must be
1150 * protected by the lock.
1153 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1155 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1157 REQUIRE(version->writer);
1159 if (changed != NULL) {
1160 dns_rbtnode_refincrement(node, &refs);
1162 changed->node = node;
1163 changed->dirty = ISC_FALSE;
1164 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1166 version->commit_ok = ISC_FALSE;
1168 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1174 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1179 unsigned char *raw; /* RDATASLAB */
1182 * The caller must be holding the corresponding node lock.
1188 raw = (unsigned char *)header + sizeof(*header);
1189 count = raw[0] * 256 + raw[1];
1192 * Sanity check: since an additional cache entry has a reference to
1193 * the original DB node (in the callback arg), there should be no
1194 * acache entries when the node can be freed.
1196 for (i = 0; i < count; i++)
1197 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1199 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1203 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1205 if (dns_name_dynamic(&(*noqname)->name))
1206 dns_name_free(&(*noqname)->name, mctx);
1207 if ((*noqname)->neg != NULL)
1208 isc_mem_put(mctx, (*noqname)->neg,
1209 dns_rdataslab_size((*noqname)->neg, 0));
1210 if ((*noqname)->negsig != NULL)
1211 isc_mem_put(mctx, (*noqname)->negsig,
1212 dns_rdataslab_size((*noqname)->negsig, 0));
1213 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1218 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1220 ISC_LINK_INIT(h, link);
1224 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1225 fprintf(stderr, "initialized header: %p\n", h);
1231 static inline rdatasetheader_t *
1232 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1234 rdatasetheader_t *h;
1236 h = isc_mem_get(mctx, sizeof(*h));
1241 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1242 fprintf(stderr, "allocated header: %p\n", h);
1244 init_rdataset(rbtdb, h);
1249 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1254 if (EXISTS(rdataset) &&
1255 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1256 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1259 idx = rdataset->node->locknum;
1260 if (ISC_LINK_LINKED(rdataset, link)) {
1261 INSIST(IS_CACHE(rbtdb));
1262 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1264 if (rdataset->heap_index != 0)
1265 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1266 rdataset->heap_index = 0;
1268 if (rdataset->noqname != NULL)
1269 free_noqname(mctx, &rdataset->noqname);
1270 if (rdataset->closest != NULL)
1271 free_noqname(mctx, &rdataset->closest);
1273 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1274 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1276 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1277 size = sizeof(*rdataset);
1279 size = dns_rdataslab_size((unsigned char *)rdataset,
1281 isc_mem_put(mctx, rdataset, size);
1285 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1286 rdatasetheader_t *header, *dcurrent;
1287 isc_boolean_t make_dirty = ISC_FALSE;
1290 * Caller must hold the node lock.
1294 * We set the IGNORE attribute on rdatasets with serial number
1295 * 'serial'. When the reference count goes to zero, these rdatasets
1296 * will be cleaned up; until that time, they will be ignored.
1298 for (header = node->data; header != NULL; header = header->next) {
1299 if (header->serial == serial) {
1300 header->attributes |= RDATASET_ATTR_IGNORE;
1301 make_dirty = ISC_TRUE;
1303 for (dcurrent = header->down;
1305 dcurrent = dcurrent->down) {
1306 if (dcurrent->serial == serial) {
1307 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1308 make_dirty = ISC_TRUE;
1317 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1319 rdatasetheader_t *d, *down_next;
1321 for (d = top->down; d != NULL; d = down_next) {
1322 down_next = d->down;
1323 free_rdataset(rbtdb, mctx, d);
1329 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1330 rdatasetheader_t *current, *top_prev, *top_next;
1331 isc_mem_t *mctx = rbtdb->common.mctx;
1334 * Caller must be holding the node lock.
1338 for (current = node->data; current != NULL; current = top_next) {
1339 top_next = current->next;
1340 clean_stale_headers(rbtdb, mctx, current);
1342 * If current is nonexistent or stale, we can clean it up.
1344 if ((current->attributes &
1345 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1346 if (top_prev != NULL)
1347 top_prev->next = current->next;
1349 node->data = current->next;
1350 free_rdataset(rbtdb, mctx, current);
1358 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1359 rbtdb_serial_t least_serial)
1361 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1362 rdatasetheader_t *top_prev, *top_next;
1363 isc_mem_t *mctx = rbtdb->common.mctx;
1364 isc_boolean_t still_dirty = ISC_FALSE;
1367 * Caller must be holding the node lock.
1369 REQUIRE(least_serial != 0);
1372 for (current = node->data; current != NULL; current = top_next) {
1373 top_next = current->next;
1376 * First, we clean up any instances of multiple rdatasets
1377 * with the same serial number, or that have the IGNORE
1381 for (dcurrent = current->down;
1383 dcurrent = down_next) {
1384 down_next = dcurrent->down;
1385 INSIST(dcurrent->serial <= dparent->serial);
1386 if (dcurrent->serial == dparent->serial ||
1388 if (down_next != NULL)
1389 down_next->next = dparent;
1390 dparent->down = down_next;
1391 free_rdataset(rbtdb, mctx, dcurrent);
1397 * We've now eliminated all IGNORE datasets with the possible
1398 * exception of current, which we now check.
1400 if (IGNORE(current)) {
1401 down_next = current->down;
1402 if (down_next == NULL) {
1403 if (top_prev != NULL)
1404 top_prev->next = current->next;
1406 node->data = current->next;
1407 free_rdataset(rbtdb, mctx, current);
1409 * current no longer exists, so we can
1410 * just continue with the loop.
1415 * Pull up current->down, making it the new
1418 if (top_prev != NULL)
1419 top_prev->next = down_next;
1421 node->data = down_next;
1422 down_next->next = top_next;
1423 free_rdataset(rbtdb, mctx, current);
1424 current = down_next;
1429 * We now try to find the first down node less than the
1433 for (dcurrent = current->down;
1435 dcurrent = down_next) {
1436 down_next = dcurrent->down;
1437 if (dcurrent->serial < least_serial)
1443 * If there is a such an rdataset, delete it and any older
1446 if (dcurrent != NULL) {
1448 down_next = dcurrent->down;
1449 INSIST(dcurrent->serial <= least_serial);
1450 free_rdataset(rbtdb, mctx, dcurrent);
1451 dcurrent = down_next;
1452 } while (dcurrent != NULL);
1453 dparent->down = NULL;
1457 * Note. The serial number of 'current' might be less than
1458 * least_serial too, but we cannot delete it because it is
1459 * the most recent version, unless it is a NONEXISTENT
1462 if (current->down != NULL) {
1463 still_dirty = ISC_TRUE;
1467 * If this is a NONEXISTENT rdataset, we can delete it.
1469 if (NONEXISTENT(current)) {
1470 if (top_prev != NULL)
1471 top_prev->next = current->next;
1473 node->data = current->next;
1474 free_rdataset(rbtdb, mctx, current);
1484 * Clean up dead nodes. These are nodes which have no references, and
1485 * have no data. They are dead but we could not or chose not to delete
1486 * them when we deleted all the data at that node because we did not want
1487 * to wait for the tree write lock.
1489 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1492 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1493 dns_rbtnode_t *node;
1494 isc_result_t result;
1495 int count = 10; /* XXXJT: should be adjustable */
1497 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1498 while (node != NULL && count > 0) {
1499 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1502 * Since we're holding a tree write lock, it should be
1503 * impossible for this node to be referenced by others.
1505 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1506 node->data == NULL);
1508 INSIST(!ISC_LINK_LINKED(node, deadlink));
1510 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1513 result = dns_rbt_deletenode(rbtdb->tree, node,
1515 if (result != ISC_R_SUCCESS)
1516 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1517 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1518 "cleanup_dead_nodes: "
1519 "dns_rbt_deletenode: %s",
1520 isc_result_totext(result));
1521 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1527 * Caller must be holding the node lock if its reference must be protected
1531 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1532 unsigned int lockrefs, noderefs;
1533 isc_refcount_t *lockref;
1535 dns_rbtnode_refincrement0(node, &noderefs);
1536 if (noderefs == 1) { /* this is the first reference to the node */
1537 lockref = &rbtdb->node_locks[node->locknum].references;
1538 isc_refcount_increment0(lockref, &lockrefs);
1539 INSIST(lockrefs != 0);
1541 INSIST(noderefs != 0);
1545 * This function is assumed to be called when a node is newly referenced
1546 * and can be in the deadnode list. In that case the node must be retrieved
1547 * from the list because it is going to be used. In addition, if the caller
1548 * happens to hold a write lock on the tree, it's a good chance to purge dead
1550 * Note: while a new reference is gained in multiple places, there are only very
1551 * few cases where the node can be in the deadnode list (only empty nodes can
1552 * have been added to the list).
1555 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1556 isc_rwlocktype_t treelocktype)
1558 isc_boolean_t need_relock = ISC_FALSE;
1560 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1561 new_reference(rbtdb, node);
1563 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1564 isc_rwlocktype_read);
1565 if (ISC_LINK_LINKED(node, deadlink))
1566 need_relock = ISC_TRUE;
1567 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1568 treelocktype == isc_rwlocktype_write)
1569 need_relock = ISC_TRUE;
1570 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1571 isc_rwlocktype_read);
1573 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1574 isc_rwlocktype_write);
1575 if (ISC_LINK_LINKED(node, deadlink))
1576 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1578 if (treelocktype == isc_rwlocktype_write)
1579 cleanup_dead_nodes(rbtdb, node->locknum);
1580 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1581 isc_rwlocktype_write);
1584 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1588 * Caller must be holding the node lock; either the "strong", read or write
1589 * lock. Note that the lock must be held even when node references are
1590 * atomically modified; in that case the decrement operation itself does not
1591 * have to be protected, but we must avoid a race condition where multiple
1592 * threads are decreasing the reference to zero simultaneously and at least
1593 * one of them is going to free the node.
1594 * This function returns ISC_TRUE if and only if the node reference decreases
1597 static isc_boolean_t
1598 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1599 rbtdb_serial_t least_serial,
1600 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1601 isc_boolean_t pruning)
1603 isc_result_t result;
1604 isc_boolean_t write_locked;
1605 rbtdb_nodelock_t *nodelock;
1606 unsigned int refs, nrefs;
1607 int bucket = node->locknum;
1608 isc_boolean_t no_reference;
1610 nodelock = &rbtdb->node_locks[bucket];
1612 /* Handle easy and typical case first. */
1613 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1614 dns_rbtnode_refdecrement(node, &nrefs);
1615 INSIST((int)nrefs >= 0);
1617 isc_refcount_decrement(&nodelock->references, &refs);
1618 INSIST((int)refs >= 0);
1620 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1623 /* Upgrade the lock? */
1624 if (nlock == isc_rwlocktype_read) {
1625 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1626 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1628 dns_rbtnode_refdecrement(node, &nrefs);
1629 INSIST((int)nrefs >= 0);
1631 /* Restore the lock? */
1632 if (nlock == isc_rwlocktype_read)
1633 NODE_WEAKDOWNGRADE(&nodelock->lock);
1637 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1638 if (IS_CACHE(rbtdb))
1639 clean_cache_node(rbtdb, node);
1641 if (least_serial == 0) {
1643 * Caller doesn't know the least serial.
1646 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1647 least_serial = rbtdb->least_serial;
1648 RBTDB_UNLOCK(&rbtdb->lock,
1649 isc_rwlocktype_read);
1651 clean_zone_node(rbtdb, node, least_serial);
1655 isc_refcount_decrement(&nodelock->references, &refs);
1656 INSIST((int)refs >= 0);
1659 * XXXDCL should this only be done for cache zones?
1661 if (node->data != NULL || node->down != NULL) {
1662 /* Restore the lock? */
1663 if (nlock == isc_rwlocktype_read)
1664 NODE_WEAKDOWNGRADE(&nodelock->lock);
1669 * Attempt to switch to a write lock on the tree. If this fails,
1670 * we will add this node to a linked list of nodes in this locking
1671 * bucket which we will free later.
1673 if (tlock != isc_rwlocktype_write) {
1675 * Locking hierarchy notwithstanding, we don't need to free
1676 * the node lock before acquiring the tree write lock because
1677 * we only do a trylock.
1679 if (tlock == isc_rwlocktype_read)
1680 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1682 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1683 isc_rwlocktype_write);
1684 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1685 result == ISC_R_LOCKBUSY);
1687 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1689 write_locked = ISC_TRUE;
1691 no_reference = ISC_TRUE;
1692 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1694 * We can now delete the node if the reference counter is
1695 * zero. This should be typically the case, but a different
1696 * thread may still gain a (new) reference just before the
1697 * current thread locks the tree (e.g., in findnode()).
1701 * If this node is the only one in the level it's in, deleting
1702 * this node may recursively make its parent the only node in
1703 * the parent level; if so, and if no one is currently using
1704 * the parent node, this is almost the only opportunity to
1705 * clean it up. But the recursive cleanup is not that trivial
1706 * since the child and parent may be in different lock buckets,
1707 * which would cause a lock order reversal problem. To avoid
1708 * the trouble, we'll dispatch a separate event for batch
1709 * cleaning. We need to check whether we're deleting the node
1710 * as a result of pruning to avoid infinite dispatching.
1711 * Note: pruning happens only when a task has been set for the
1712 * rbtdb. If the user of the rbtdb chooses not to set a task,
1713 * it's their responsibility to purge stale leaves (e.g. by
1714 * periodic walk-through).
1716 if (!pruning && node->parent != NULL &&
1717 node->parent->down == node && node->left == NULL &&
1718 node->right == NULL && rbtdb->task != NULL) {
1722 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1725 sizeof(isc_event_t));
1727 new_reference(rbtdb, node);
1729 attach((dns_db_t *)rbtdb, &db);
1731 isc_task_send(rbtdb->task, &ev);
1732 no_reference = ISC_FALSE;
1735 * XXX: this is a weird situation. We could
1736 * ignore this error case, but then the stale
1737 * node will unlikely be purged except via a
1738 * rare condition such as manual cleanup. So
1739 * we queue it in the deadnodes list, hoping
1740 * the memory shortage is temporary and the node
1741 * will be deleted later.
1743 isc_log_write(dns_lctx,
1744 DNS_LOGCATEGORY_DATABASE,
1745 DNS_LOGMODULE_CACHE,
1747 "decrement_reference: failed to "
1748 "allocate pruning event");
1749 INSIST(!ISC_LINK_LINKED(node, deadlink));
1750 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1754 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1755 char printname[DNS_NAME_FORMATSIZE];
1757 isc_log_write(dns_lctx,
1758 DNS_LOGCATEGORY_DATABASE,
1759 DNS_LOGMODULE_CACHE,
1761 "decrement_reference: "
1762 "delete from rbt: %p %s",
1764 dns_rbt_formatnodename(node,
1766 sizeof(printname)));
1769 INSIST(!ISC_LINK_LINKED(node, deadlink));
1771 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1774 result = dns_rbt_deletenode(rbtdb->tree, node,
1776 if (result != ISC_R_SUCCESS) {
1777 isc_log_write(dns_lctx,
1778 DNS_LOGCATEGORY_DATABASE,
1779 DNS_LOGMODULE_CACHE,
1781 "decrement_reference: "
1782 "dns_rbt_deletenode: %s",
1783 isc_result_totext(result));
1786 } else if (dns_rbtnode_refcurrent(node) == 0) {
1787 INSIST(!ISC_LINK_LINKED(node, deadlink));
1788 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1790 no_reference = ISC_FALSE;
1792 /* Restore the lock? */
1793 if (nlock == isc_rwlocktype_read)
1794 NODE_WEAKDOWNGRADE(&nodelock->lock);
1797 * Relock a read lock, or unlock the write lock if no lock was held.
1799 if (tlock == isc_rwlocktype_none)
1801 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1803 if (tlock == isc_rwlocktype_read)
1805 isc_rwlock_downgrade(&rbtdb->tree_lock);
1807 return (no_reference);
1811 * Prune the tree by recursively cleaning-up single leaves. In the worst
1812 * case, the number of iteration is the number of tree levels, which is at
1813 * most the maximum number of domain name labels, i.e, 127. In practice, this
1814 * should be much smaller (only a few times), and even the worst case would be
1815 * acceptable for a single event.
1818 prune_tree(isc_task_t *task, isc_event_t *event) {
1819 dns_rbtdb_t *rbtdb = event->ev_sender;
1820 dns_rbtnode_t *node = event->ev_arg;
1821 dns_rbtnode_t *parent;
1822 unsigned int locknum;
1826 isc_event_free(&event);
1828 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1829 locknum = node->locknum;
1830 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1832 parent = node->parent;
1833 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1834 isc_rwlocktype_write, ISC_TRUE);
1836 if (parent != NULL && parent->down == NULL) {
1838 * node was the only down child of the parent and has
1839 * just been removed. We'll then need to examine the
1840 * parent. Keep the lock if possible; otherwise,
1841 * release the old lock and acquire one for the parent.
1843 if (parent->locknum != locknum) {
1844 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1845 isc_rwlocktype_write);
1846 locknum = parent->locknum;
1847 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1848 isc_rwlocktype_write);
1852 * We need to gain a reference to the node before
1853 * decrementing it in the next iteration. In addition,
1854 * if the node is in the dead-nodes list, extract it
1855 * from the list beforehand as we do in
1856 * reactivate_node().
1858 new_reference(rbtdb, parent);
1859 if (ISC_LINK_LINKED(parent, deadlink)) {
1860 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1867 } while (node != NULL);
1868 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1869 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1871 detach((dns_db_t **)&rbtdb);
1875 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1876 rbtdb_changedlist_t *cleanup_list)
1879 * Caller must be holding the database lock.
1882 rbtdb->least_serial = version->serial;
1883 *cleanup_list = version->changed_list;
1884 ISC_LIST_INIT(version->changed_list);
1888 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1889 rbtdb_changed_t *changed, *next_changed;
1892 * If the changed record is dirty, then
1893 * an update created multiple versions of
1894 * a given rdataset. We keep this list
1895 * until we're the least open version, at
1896 * which point it's safe to get rid of any
1899 * If the changed record isn't dirty, then
1900 * we don't need it anymore since we're
1901 * committing and not rolling back.
1903 * The caller must be holding the database lock.
1905 for (changed = HEAD(version->changed_list);
1907 changed = next_changed) {
1908 next_changed = NEXT(changed, link);
1909 if (!changed->dirty) {
1910 UNLINK(version->changed_list,
1912 APPEND(*cleanup_list,
1919 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1920 dns_rdataset_t keyset;
1921 dns_rdataset_t nsecset, signsecset;
1922 dns_rdata_t rdata = DNS_RDATA_INIT;
1923 isc_boolean_t haszonekey = ISC_FALSE;
1924 isc_boolean_t hasnsec = ISC_FALSE;
1925 isc_boolean_t hasoptbit = ISC_FALSE;
1926 isc_boolean_t nsec3createflag = ISC_FALSE;
1927 isc_result_t result;
1929 dns_rdataset_init(&keyset);
1930 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1931 0, 0, &keyset, NULL);
1932 if (result == ISC_R_SUCCESS) {
1933 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1934 result = dns_rdataset_first(&keyset);
1935 while (result == ISC_R_SUCCESS) {
1936 dns_rdataset_current(&keyset, &keyrdata);
1937 if (dns_zonekey_iszonekey(&keyrdata)) {
1938 haszonekey = ISC_TRUE;
1941 result = dns_rdataset_next(&keyset);
1943 dns_rdataset_disassociate(&keyset);
1946 version->secure = dns_db_insecure;
1947 version->havensec3 = ISC_FALSE;
1951 dns_rdataset_init(&nsecset);
1952 dns_rdataset_init(&signsecset);
1953 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1954 0, 0, &nsecset, &signsecset);
1955 if (result == ISC_R_SUCCESS) {
1956 if (dns_rdataset_isassociated(&signsecset)) {
1958 result = dns_rdataset_first(&nsecset);
1959 if (result == ISC_R_SUCCESS) {
1960 dns_rdataset_current(&nsecset, &rdata);
1961 hasoptbit = dns_nsec_typepresent(&rdata,
1964 dns_rdataset_disassociate(&signsecset);
1966 dns_rdataset_disassociate(&nsecset);
1969 setnsec3parameters(db, version, &nsec3createflag);
1972 * Do we have a valid NSEC/NSEC3 chain?
1974 if (version->havensec3 || (hasnsec && !hasoptbit))
1975 version->secure = dns_db_secure;
1977 * Do we have a NSEC/NSEC3 chain under creation?
1979 else if (hasoptbit || nsec3createflag)
1980 version->secure = dns_db_partial;
1982 version->secure = dns_db_insecure;
1986 * Walk the origin node looking for NSEC3PARAM records.
1987 * Cache the nsec3 parameters.
1990 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1991 isc_boolean_t *nsec3createflag)
1993 dns_rbtnode_t *node;
1994 dns_rdata_nsec3param_t nsec3param;
1995 dns_rdata_t rdata = DNS_RDATA_INIT;
1996 isc_region_t region;
1997 isc_result_t result;
1998 rdatasetheader_t *header, *header_next;
1999 unsigned char *raw; /* RDATASLAB */
2000 unsigned int count, length;
2001 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2003 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2004 version->havensec3 = ISC_FALSE;
2005 node = rbtdb->origin_node;
2006 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2007 isc_rwlocktype_read);
2008 for (header = node->data;
2010 header = header_next) {
2011 header_next = header->next;
2013 if (header->serial <= version->serial &&
2015 if (NONEXISTENT(header))
2019 header = header->down;
2020 } while (header != NULL);
2022 if (header != NULL &&
2023 header->type == dns_rdatatype_nsec3param) {
2025 * Find A NSEC3PARAM with a supported algorithm.
2027 raw = (unsigned char *)header + sizeof(*header);
2028 count = raw[0] * 256 + raw[1]; /* count */
2029 #if DNS_RDATASET_FIXED
2030 raw += count * 4 + 2;
2034 while (count-- > 0U) {
2035 length = raw[0] * 256 + raw[1];
2036 #if DNS_RDATASET_FIXED
2042 region.length = length;
2044 dns_rdata_fromregion(&rdata,
2045 rbtdb->common.rdclass,
2046 dns_rdatatype_nsec3param,
2048 result = dns_rdata_tostruct(&rdata,
2051 INSIST(result == ISC_R_SUCCESS);
2052 dns_rdata_reset(&rdata);
2054 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2055 !dns_nsec3_supportedhash(nsec3param.hash))
2058 #ifdef RFC5155_STRICT
2059 if (nsec3param.flags != 0)
2062 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2064 *nsec3createflag = ISC_TRUE;
2065 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2070 memcpy(version->salt, nsec3param.salt,
2071 nsec3param.salt_length);
2072 version->hash = nsec3param.hash;
2073 version->salt_length = nsec3param.salt_length;
2074 version->iterations = nsec3param.iterations;
2075 version->flags = nsec3param.flags;
2076 version->havensec3 = ISC_TRUE;
2078 * Look for a better algorithm than the
2079 * unknown test algorithm.
2081 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2087 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2088 isc_rwlocktype_read);
2089 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2093 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2094 dns_rbtdb_t *rbtdb = event->ev_arg;
2095 isc_boolean_t again = ISC_FALSE;
2096 unsigned int locknum;
2099 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2100 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2101 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2102 isc_rwlocktype_write);
2103 cleanup_dead_nodes(rbtdb, locknum);
2104 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2106 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2107 isc_rwlocktype_write);
2109 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2111 isc_task_send(task, &event);
2113 isc_event_free(&event);
2114 isc_refcount_decrement(&rbtdb->references, &refs);
2116 maybe_free_rbtdb(rbtdb);
2121 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2122 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2123 rbtdb_version_t *version, *cleanup_version, *least_greater;
2124 isc_boolean_t rollback = ISC_FALSE;
2125 rbtdb_changedlist_t cleanup_list;
2126 rdatasetheaderlist_t resigned_list;
2127 rbtdb_changed_t *changed, *next_changed;
2128 rbtdb_serial_t serial, least_serial;
2129 dns_rbtnode_t *rbtnode;
2131 rdatasetheader_t *header;
2132 isc_boolean_t writer;
2134 REQUIRE(VALID_RBTDB(rbtdb));
2135 version = (rbtdb_version_t *)*versionp;
2137 cleanup_version = NULL;
2138 ISC_LIST_INIT(cleanup_list);
2139 ISC_LIST_INIT(resigned_list);
2141 isc_refcount_decrement(&version->references, &refs);
2142 if (refs > 0) { /* typical and easy case first */
2144 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2145 INSIST(!version->writer);
2146 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2151 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2152 serial = version->serial;
2153 writer = version->writer;
2154 if (version->writer) {
2157 rbtdb_version_t *cur_version;
2159 INSIST(version->commit_ok);
2160 INSIST(version == rbtdb->future_version);
2162 * The current version is going to be replaced.
2163 * Release the (likely last) reference to it from the
2164 * DB itself and unlink it from the open list.
2166 cur_version = rbtdb->current_version;
2167 isc_refcount_decrement(&cur_version->references,
2170 if (cur_version->serial == rbtdb->least_serial)
2171 INSIST(EMPTY(cur_version->changed_list));
2172 UNLINK(rbtdb->open_versions,
2175 if (EMPTY(rbtdb->open_versions)) {
2177 * We're going to become the least open
2180 make_least_version(rbtdb, version,
2184 * Some other open version is the
2185 * least version. We can't cleanup
2186 * records that were changed in this
2187 * version because the older versions
2188 * may still be in use by an open
2191 * We can, however, discard the
2192 * changed records for things that
2193 * we've added that didn't exist in
2196 cleanup_nondirty(version, &cleanup_list);
2199 * If the (soon to be former) current version
2200 * isn't being used by anyone, we can clean
2204 cleanup_version = cur_version;
2205 APPENDLIST(version->changed_list,
2206 cleanup_version->changed_list,
2210 * Become the current version.
2212 version->writer = ISC_FALSE;
2213 rbtdb->current_version = version;
2214 rbtdb->current_serial = version->serial;
2215 rbtdb->future_version = NULL;
2218 * Keep the current version in the open list, and
2219 * gain a reference for the DB itself (see the DB
2220 * creation function below). This must be the only
2221 * case where we need to increment the counter from
2222 * zero and need to use isc_refcount_increment0().
2224 isc_refcount_increment0(&version->references,
2226 INSIST(cur_ref == 1);
2227 PREPEND(rbtdb->open_versions,
2228 rbtdb->current_version, link);
2229 resigned_list = version->resigned_list;
2230 ISC_LIST_INIT(version->resigned_list);
2233 * We're rolling back this transaction.
2235 cleanup_list = version->changed_list;
2236 ISC_LIST_INIT(version->changed_list);
2237 resigned_list = version->resigned_list;
2238 ISC_LIST_INIT(version->resigned_list);
2239 rollback = ISC_TRUE;
2240 cleanup_version = version;
2241 rbtdb->future_version = NULL;
2244 if (version != rbtdb->current_version) {
2246 * There are no external or internal references
2247 * to this version and it can be cleaned up.
2249 cleanup_version = version;
2252 * Find the version with the least serial
2253 * number greater than ours.
2255 least_greater = PREV(version, link);
2256 if (least_greater == NULL)
2257 least_greater = rbtdb->current_version;
2259 INSIST(version->serial < least_greater->serial);
2261 * Is this the least open version?
2263 if (version->serial == rbtdb->least_serial) {
2265 * Yes. Install the new least open
2268 make_least_version(rbtdb,
2273 * Add any unexecuted cleanups to
2274 * those of the least greater version.
2276 APPENDLIST(least_greater->changed_list,
2277 version->changed_list,
2280 } else if (version->serial == rbtdb->least_serial)
2281 INSIST(EMPTY(version->changed_list));
2282 UNLINK(rbtdb->open_versions, version, link);
2284 least_serial = rbtdb->least_serial;
2285 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2288 * Update the zone's secure status.
2290 if (writer && commit && !IS_CACHE(rbtdb))
2291 iszonesecure(db, version, rbtdb->origin_node);
2293 if (cleanup_version != NULL) {
2294 INSIST(EMPTY(cleanup_version->changed_list));
2295 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2296 sizeof(*cleanup_version));
2300 * Commit/rollback re-signed headers.
2302 for (header = HEAD(resigned_list);
2304 header = HEAD(resigned_list)) {
2307 ISC_LIST_UNLINK(resigned_list, header, link);
2309 lock = &rbtdb->node_locks[header->node->locknum].lock;
2310 NODE_LOCK(lock, isc_rwlocktype_write);
2312 resign_insert(rbtdb, header->node->locknum, header);
2313 decrement_reference(rbtdb, header->node, least_serial,
2314 isc_rwlocktype_write, isc_rwlocktype_none,
2316 NODE_UNLOCK(lock, isc_rwlocktype_write);
2319 if (!EMPTY(cleanup_list)) {
2320 isc_event_t *event = NULL;
2321 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2323 if (rbtdb->task != NULL)
2324 event = isc_event_allocate(rbtdb->common.mctx, NULL,
2325 DNS_EVENT_RBTDEADNODES,
2326 cleanup_dead_nodes_callback,
2327 rbtdb, sizeof(isc_event_t));
2328 if (event == NULL) {
2330 * We acquire a tree write lock here in order to make
2331 * sure that stale nodes will be removed in
2332 * decrement_reference(). If we didn't have the lock,
2333 * those nodes could miss the chance to be removed
2334 * until the server stops. The write lock is
2335 * expensive, but this event should be rare enough
2336 * to justify the cost.
2338 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2339 tlock = isc_rwlocktype_write;
2342 for (changed = HEAD(cleanup_list);
2344 changed = next_changed) {
2347 next_changed = NEXT(changed, link);
2348 rbtnode = changed->node;
2349 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2351 NODE_LOCK(lock, isc_rwlocktype_write);
2353 * This is a good opportunity to purge any dead nodes,
2357 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2360 rollback_node(rbtnode, serial);
2361 decrement_reference(rbtdb, rbtnode, least_serial,
2362 isc_rwlocktype_write, tlock,
2365 NODE_UNLOCK(lock, isc_rwlocktype_write);
2367 isc_mem_put(rbtdb->common.mctx, changed,
2370 if (event != NULL) {
2371 isc_refcount_increment(&rbtdb->references, NULL);
2372 isc_task_send(rbtdb->task, &event);
2374 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2382 * Add the necessary magic for the wildcard name 'name'
2383 * to be found in 'rbtdb'.
2385 * In order for wildcard matching to work correctly in
2386 * zone_find(), we must ensure that a node for the wildcarding
2387 * level exists in the database, and has its 'find_callback'
2388 * and 'wild' bits set.
2390 * E.g. if the wildcard name is "*.sub.example." then we
2391 * must ensure that "sub.example." exists and is marked as
2395 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2396 isc_result_t result;
2397 dns_name_t foundname;
2398 dns_offsets_t offsets;
2400 dns_rbtnode_t *node = NULL;
2402 dns_name_init(&foundname, offsets);
2403 n = dns_name_countlabels(name);
2406 dns_name_getlabelsequence(name, 1, n, &foundname);
2407 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2408 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2411 node->find_callback = 1;
2413 return (ISC_R_SUCCESS);
2417 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2418 isc_result_t result;
2419 dns_name_t foundname;
2420 dns_offsets_t offsets;
2421 unsigned int n, l, i;
2423 dns_name_init(&foundname, offsets);
2424 n = dns_name_countlabels(name);
2425 l = dns_name_countlabels(&rbtdb->common.origin);
2428 dns_rbtnode_t *node = NULL; /* dummy */
2429 dns_name_getlabelsequence(name, n - i, i, &foundname);
2430 if (dns_name_iswildcard(&foundname)) {
2431 result = add_wildcard_magic(rbtdb, &foundname);
2432 if (result != ISC_R_SUCCESS)
2434 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2436 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2442 return (ISC_R_SUCCESS);
2446 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2447 dns_dbnode_t **nodep)
2449 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2450 dns_rbtnode_t *node = NULL;
2451 dns_name_t nodename;
2452 isc_result_t result;
2453 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2455 REQUIRE(VALID_RBTDB(rbtdb));
2457 dns_name_init(&nodename, NULL);
2458 RWLOCK(&rbtdb->tree_lock, locktype);
2459 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2460 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2461 if (result != ISC_R_SUCCESS) {
2462 RWUNLOCK(&rbtdb->tree_lock, locktype);
2464 if (result == DNS_R_PARTIALMATCH)
2465 result = ISC_R_NOTFOUND;
2469 * It would be nice to try to upgrade the lock instead of
2470 * unlocking then relocking.
2472 locktype = isc_rwlocktype_write;
2473 RWLOCK(&rbtdb->tree_lock, locktype);
2475 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2476 if (result == ISC_R_SUCCESS) {
2477 dns_rbt_namefromnode(node, &nodename);
2478 #ifdef DNS_RBT_USEHASH
2479 node->locknum = node->hashval % rbtdb->node_lock_count;
2481 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2482 rbtdb->node_lock_count;
2485 add_empty_wildcards(rbtdb, name);
2487 if (dns_name_iswildcard(name)) {
2488 result = add_wildcard_magic(rbtdb, name);
2489 if (result != ISC_R_SUCCESS) {
2490 RWUNLOCK(&rbtdb->tree_lock, locktype);
2494 } else if (result != ISC_R_EXISTS) {
2495 RWUNLOCK(&rbtdb->tree_lock, locktype);
2499 reactivate_node(rbtdb, node, locktype);
2500 RWUNLOCK(&rbtdb->tree_lock, locktype);
2502 *nodep = (dns_dbnode_t *)node;
2504 return (ISC_R_SUCCESS);
2508 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2509 dns_dbnode_t **nodep)
2511 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2512 dns_rbtnode_t *node = NULL;
2513 dns_name_t nodename;
2514 isc_result_t result;
2515 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2517 REQUIRE(VALID_RBTDB(rbtdb));
2519 dns_name_init(&nodename, NULL);
2520 RWLOCK(&rbtdb->tree_lock, locktype);
2521 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2522 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2523 if (result != ISC_R_SUCCESS) {
2524 RWUNLOCK(&rbtdb->tree_lock, locktype);
2526 if (result == DNS_R_PARTIALMATCH)
2527 result = ISC_R_NOTFOUND;
2531 * It would be nice to try to upgrade the lock instead of
2532 * unlocking then relocking.
2534 locktype = isc_rwlocktype_write;
2535 RWLOCK(&rbtdb->tree_lock, locktype);
2537 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2538 if (result == ISC_R_SUCCESS) {
2539 dns_rbt_namefromnode(node, &nodename);
2540 #ifdef DNS_RBT_USEHASH
2541 node->locknum = node->hashval % rbtdb->node_lock_count;
2543 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2544 rbtdb->node_lock_count;
2547 } else if (result != ISC_R_EXISTS) {
2548 RWUNLOCK(&rbtdb->tree_lock, locktype);
2552 INSIST(node->nsec3);
2553 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2554 new_reference(rbtdb, node);
2555 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2556 RWUNLOCK(&rbtdb->tree_lock, locktype);
2558 *nodep = (dns_dbnode_t *)node;
2560 return (ISC_R_SUCCESS);
2564 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2565 rbtdb_search_t *search = arg;
2566 rdatasetheader_t *header, *header_next;
2567 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2568 rdatasetheader_t *found;
2569 isc_result_t result;
2570 dns_rbtnode_t *onode;
2573 * We only want to remember the topmost zone cut, since it's the one
2574 * that counts, so we'll just continue if we've already found a
2577 if (search->zonecut != NULL)
2578 return (DNS_R_CONTINUE);
2581 result = DNS_R_CONTINUE;
2582 onode = search->rbtdb->origin_node;
2584 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2585 isc_rwlocktype_read);
2588 * Look for an NS or DNAME rdataset active in our version.
2591 dname_header = NULL;
2592 sigdname_header = NULL;
2593 for (header = node->data; header != NULL; header = header_next) {
2594 header_next = header->next;
2595 if (header->type == dns_rdatatype_ns ||
2596 header->type == dns_rdatatype_dname ||
2597 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2599 if (header->serial <= search->serial &&
2602 * Is this a "this rdataset doesn't
2605 if (NONEXISTENT(header))
2609 header = header->down;
2610 } while (header != NULL);
2611 if (header != NULL) {
2612 if (header->type == dns_rdatatype_dname)
2613 dname_header = header;
2614 else if (header->type ==
2615 RBTDB_RDATATYPE_SIGDNAME)
2616 sigdname_header = header;
2617 else if (node != onode ||
2618 IS_STUB(search->rbtdb)) {
2620 * We've found an NS rdataset that
2621 * isn't at the origin node. We check
2622 * that they're not at the origin node,
2623 * because otherwise we'd erroneously
2624 * treat the zone top as if it were
2634 * Did we find anything?
2636 if (dname_header != NULL) {
2638 * Note that DNAME has precedence over NS if both exist.
2640 found = dname_header;
2641 search->zonecut_sigrdataset = sigdname_header;
2642 } else if (ns_header != NULL) {
2644 search->zonecut_sigrdataset = NULL;
2647 if (found != NULL) {
2649 * We increment the reference count on node to ensure that
2650 * search->zonecut_rdataset will still be valid later.
2652 new_reference(search->rbtdb, node);
2653 search->zonecut = node;
2654 search->zonecut_rdataset = found;
2655 search->need_cleanup = ISC_TRUE;
2657 * Since we've found a zonecut, anything beneath it is
2658 * glue and is not subject to wildcard matching, so we
2659 * may clear search->wild.
2661 search->wild = ISC_FALSE;
2662 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2664 * If the caller does not want to find glue, then
2665 * this is the best answer and the search should
2668 result = DNS_R_PARTIALMATCH;
2673 * The search will continue beneath the zone cut.
2674 * This may or may not be the best match. In case it
2675 * is, we need to remember the node name.
2677 zcname = dns_fixedname_name(&search->zonecut_name);
2678 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2680 search->copy_name = ISC_TRUE;
2684 * There is no zonecut at this node which is active in this
2687 * If this is a "wild" node and the caller hasn't disabled
2688 * wildcard matching, remember that we've seen a wild node
2689 * in case we need to go searching for wildcard matches
2692 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2693 search->wild = ISC_TRUE;
2696 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2697 isc_rwlocktype_read);
2703 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2704 rdatasetheader_t *header, isc_stdtime_t now,
2705 dns_rdataset_t *rdataset)
2707 unsigned char *raw; /* RDATASLAB */
2710 * Caller must be holding the node reader lock.
2711 * XXXJT: technically, we need a writer lock, since we'll increment
2712 * the header count below. However, since the actual counter value
2713 * doesn't matter, we prioritize performance here. (We may want to
2714 * use atomic increment when available).
2717 if (rdataset == NULL)
2720 new_reference(rbtdb, node);
2722 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2724 rdataset->methods = &rdataset_methods;
2725 rdataset->rdclass = rbtdb->common.rdclass;
2726 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2727 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2728 rdataset->ttl = header->rdh_ttl - now;
2729 rdataset->trust = header->trust;
2730 if (NXDOMAIN(header))
2731 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2733 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2734 rdataset->private1 = rbtdb;
2735 rdataset->private2 = node;
2736 raw = (unsigned char *)header + sizeof(*header);
2737 rdataset->private3 = raw;
2738 rdataset->count = header->count++;
2739 if (rdataset->count == ISC_UINT32_MAX)
2740 rdataset->count = 0;
2743 * Reset iterator state.
2745 rdataset->privateuint4 = 0;
2746 rdataset->private5 = NULL;
2749 * Add noqname proof.
2751 rdataset->private6 = header->noqname;
2752 if (rdataset->private6 != NULL)
2753 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2754 rdataset->private7 = header->closest;
2755 if (rdataset->private7 != NULL)
2756 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2759 * Copy out re-signing information.
2761 if (RESIGN(header)) {
2762 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2763 rdataset->resign = header->resign;
2765 rdataset->resign = 0;
2768 static inline isc_result_t
2769 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2770 dns_name_t *foundname, dns_rdataset_t *rdataset,
2771 dns_rdataset_t *sigrdataset)
2773 isc_result_t result;
2775 rbtdb_rdatatype_t type;
2776 dns_rbtnode_t *node;
2779 * The caller MUST NOT be holding any node locks.
2782 node = search->zonecut;
2783 type = search->zonecut_rdataset->type;
2786 * If we have to set foundname, we do it before anything else.
2787 * If we were to set foundname after we had set nodep or bound the
2788 * rdataset, then we'd have to undo that work if dns_name_copy()
2789 * failed. By setting foundname first, there's nothing to undo if
2792 if (foundname != NULL && search->copy_name) {
2793 zcname = dns_fixedname_name(&search->zonecut_name);
2794 result = dns_name_copy(zcname, foundname, NULL);
2795 if (result != ISC_R_SUCCESS)
2798 if (nodep != NULL) {
2800 * Note that we don't have to increment the node's reference
2801 * count here because we're going to use the reference we
2802 * already have in the search block.
2805 search->need_cleanup = ISC_FALSE;
2807 if (rdataset != NULL) {
2808 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2809 isc_rwlocktype_read);
2810 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2811 search->now, rdataset);
2812 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2813 bind_rdataset(search->rbtdb, node,
2814 search->zonecut_sigrdataset,
2815 search->now, sigrdataset);
2816 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2817 isc_rwlocktype_read);
2820 if (type == dns_rdatatype_dname)
2821 return (DNS_R_DNAME);
2822 return (DNS_R_DELEGATION);
2825 static inline isc_boolean_t
2826 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2827 dns_rbtnode_t *node)
2829 unsigned char *raw; /* RDATASLAB */
2830 unsigned int count, size;
2832 isc_boolean_t valid = ISC_FALSE;
2833 dns_offsets_t offsets;
2834 isc_region_t region;
2835 rdatasetheader_t *header;
2838 * No additional locking is required.
2842 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2843 * if it occurs at a zone cut, but is not valid below it.
2845 if (type == dns_rdatatype_ns) {
2846 if (node != search->zonecut) {
2849 } else if (type != dns_rdatatype_a &&
2850 type != dns_rdatatype_aaaa &&
2851 type != dns_rdatatype_a6) {
2855 header = search->zonecut_rdataset;
2856 raw = (unsigned char *)header + sizeof(*header);
2857 count = raw[0] * 256 + raw[1];
2858 #if DNS_RDATASET_FIXED
2859 raw += 2 + (4 * count);
2866 size = raw[0] * 256 + raw[1];
2867 #if DNS_RDATASET_FIXED
2873 region.length = size;
2876 * XXX Until we have rdata structures, we have no choice but
2877 * to directly access the rdata format.
2879 dns_name_init(&ns_name, offsets);
2880 dns_name_fromregion(&ns_name, ®ion);
2881 if (dns_name_compare(&ns_name, name) == 0) {
2890 static inline isc_boolean_t
2891 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2894 dns_fixedname_t fnext;
2895 dns_fixedname_t forigin;
2900 dns_rbtnode_t *node;
2901 isc_result_t result;
2902 isc_boolean_t answer = ISC_FALSE;
2903 rdatasetheader_t *header;
2905 rbtdb = search->rbtdb;
2907 dns_name_init(&prefix, NULL);
2908 dns_fixedname_init(&fnext);
2909 next = dns_fixedname_name(&fnext);
2910 dns_fixedname_init(&forigin);
2911 origin = dns_fixedname_name(&forigin);
2913 result = dns_rbtnodechain_next(chain, NULL, NULL);
2914 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2916 result = dns_rbtnodechain_current(chain, &prefix,
2918 if (result != ISC_R_SUCCESS)
2920 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2921 isc_rwlocktype_read);
2922 for (header = node->data;
2924 header = header->next) {
2925 if (header->serial <= search->serial &&
2926 !IGNORE(header) && EXISTS(header))
2929 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2930 isc_rwlocktype_read);
2933 result = dns_rbtnodechain_next(chain, NULL, NULL);
2935 if (result == ISC_R_SUCCESS)
2936 result = dns_name_concatenate(&prefix, origin, next, NULL);
2937 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2942 static inline isc_boolean_t
2943 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2944 dns_fixedname_t fnext;
2945 dns_fixedname_t forigin;
2946 dns_fixedname_t fprev;
2954 dns_rbtnode_t *node;
2955 dns_rbtnodechain_t chain;
2956 isc_boolean_t check_next = ISC_TRUE;
2957 isc_boolean_t check_prev = ISC_TRUE;
2958 isc_boolean_t answer = ISC_FALSE;
2959 isc_result_t result;
2960 rdatasetheader_t *header;
2963 rbtdb = search->rbtdb;
2965 dns_name_init(&name, NULL);
2966 dns_name_init(&tname, NULL);
2967 dns_name_init(&rname, NULL);
2968 dns_fixedname_init(&fnext);
2969 next = dns_fixedname_name(&fnext);
2970 dns_fixedname_init(&fprev);
2971 prev = dns_fixedname_name(&fprev);
2972 dns_fixedname_init(&forigin);
2973 origin = dns_fixedname_name(&forigin);
2976 * Find if qname is at or below a empty node.
2977 * Use our own copy of the chain.
2980 chain = search->chain;
2983 result = dns_rbtnodechain_current(&chain, &name,
2985 if (result != ISC_R_SUCCESS)
2987 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2988 isc_rwlocktype_read);
2989 for (header = node->data;
2991 header = header->next) {
2992 if (header->serial <= search->serial &&
2993 !IGNORE(header) && EXISTS(header))
2996 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2997 isc_rwlocktype_read);
3000 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3001 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3002 if (result == ISC_R_SUCCESS)
3003 result = dns_name_concatenate(&name, origin, prev, NULL);
3004 if (result != ISC_R_SUCCESS)
3005 check_prev = ISC_FALSE;
3007 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3008 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3010 result = dns_rbtnodechain_current(&chain, &name,
3012 if (result != ISC_R_SUCCESS)
3014 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3015 isc_rwlocktype_read);
3016 for (header = node->data;
3018 header = header->next) {
3019 if (header->serial <= search->serial &&
3020 !IGNORE(header) && EXISTS(header))
3023 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3024 isc_rwlocktype_read);
3027 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3029 if (result == ISC_R_SUCCESS)
3030 result = dns_name_concatenate(&name, origin, next, NULL);
3031 if (result != ISC_R_SUCCESS)
3032 check_next = ISC_FALSE;
3034 dns_name_clone(qname, &rname);
3037 * Remove the wildcard label to find the terminal name.
3039 n = dns_name_countlabels(wname);
3040 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3043 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3044 (check_next && dns_name_issubdomain(next, &rname))) {
3049 * Remove the left hand label.
3051 n = dns_name_countlabels(&rname);
3052 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3053 } while (!dns_name_equal(&rname, &tname));
3057 static inline isc_result_t
3058 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3062 dns_rbtnode_t *node, *level_node, *wnode;
3063 rdatasetheader_t *header;
3064 isc_result_t result = ISC_R_NOTFOUND;
3067 dns_fixedname_t fwname;
3069 isc_boolean_t done, wild, active;
3070 dns_rbtnodechain_t wchain;
3073 * Caller must be holding the tree lock and MUST NOT be holding
3078 * Examine each ancestor level. If the level's wild bit
3079 * is set, then construct the corresponding wildcard name and
3080 * search for it. If the wildcard node exists, and is active in
3081 * this version, we're done. If not, then we next check to see
3082 * if the ancestor is active in this version. If so, then there
3083 * can be no possible wildcard match and again we're done. If not,
3084 * continue the search.
3087 rbtdb = search->rbtdb;
3088 i = search->chain.level_matches;
3092 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3093 isc_rwlocktype_read);
3096 * First we try to figure out if this node is active in
3097 * the search's version. We do this now, even though we
3098 * may not need the information, because it simplifies the
3099 * locking and code flow.
3101 for (header = node->data;
3103 header = header->next) {
3104 if (header->serial <= search->serial &&
3105 !IGNORE(header) && EXISTS(header))
3118 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3119 isc_rwlocktype_read);
3123 * Construct the wildcard name for this level.
3125 dns_name_init(&name, NULL);
3126 dns_rbt_namefromnode(node, &name);
3127 dns_fixedname_init(&fwname);
3128 wname = dns_fixedname_name(&fwname);
3129 result = dns_name_concatenate(dns_wildcardname, &name,
3132 while (result == ISC_R_SUCCESS && j != 0) {
3134 level_node = search->chain.levels[j];
3135 dns_name_init(&name, NULL);
3136 dns_rbt_namefromnode(level_node, &name);
3137 result = dns_name_concatenate(wname,
3142 if (result != ISC_R_SUCCESS)
3146 dns_rbtnodechain_init(&wchain, NULL);
3147 result = dns_rbt_findnode(rbtdb->tree, wname,
3148 NULL, &wnode, &wchain,
3149 DNS_RBTFIND_EMPTYDATA,
3151 if (result == ISC_R_SUCCESS) {
3155 * We have found the wildcard node. If it
3156 * is active in the search's version, we're
3159 lock = &rbtdb->node_locks[wnode->locknum].lock;
3160 NODE_LOCK(lock, isc_rwlocktype_read);
3161 for (header = wnode->data;
3163 header = header->next) {
3164 if (header->serial <= search->serial &&
3165 !IGNORE(header) && EXISTS(header))
3168 NODE_UNLOCK(lock, isc_rwlocktype_read);
3169 if (header != NULL ||
3170 activeempty(search, &wchain, wname)) {
3171 if (activeemtpynode(search, qname,
3173 return (ISC_R_NOTFOUND);
3176 * The wildcard node is active!
3178 * Note: result is still ISC_R_SUCCESS
3179 * so we don't have to set it.
3184 } else if (result != ISC_R_NOTFOUND &&
3185 result != DNS_R_PARTIALMATCH) {
3187 * An error has occurred. Bail out.
3195 * The level node is active. Any wildcarding
3196 * present at higher levels has no
3197 * effect and we're done.
3199 result = ISC_R_NOTFOUND;
3205 node = search->chain.levels[i];
3213 static isc_boolean_t
3214 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3216 dns_rdata_t rdata = DNS_RDATA_INIT;
3217 dns_rdata_nsec3_t nsec3;
3218 unsigned char *raw; /* RDATASLAB */
3219 unsigned int rdlen, count;
3220 isc_region_t region;
3221 isc_result_t result;
3223 REQUIRE(header->type == dns_rdatatype_nsec3);
3225 raw = (unsigned char *)header + sizeof(*header);
3226 count = raw[0] * 256 + raw[1]; /* count */
3227 #if DNS_RDATASET_FIXED
3228 raw += count * 4 + 2;
3232 while (count-- > 0) {
3233 rdlen = raw[0] * 256 + raw[1];
3234 #if DNS_RDATASET_FIXED
3240 region.length = rdlen;
3241 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3242 dns_rdatatype_nsec3, ®ion);
3244 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3245 INSIST(result == ISC_R_SUCCESS);
3246 if (nsec3.hash == search->rbtversion->hash &&
3247 nsec3.iterations == search->rbtversion->iterations &&
3248 nsec3.salt_length == search->rbtversion->salt_length &&
3249 memcmp(nsec3.salt, search->rbtversion->salt,
3250 nsec3.salt_length) == 0)
3252 dns_rdata_reset(&rdata);
3258 * Find node of the NSEC/NSEC3 record that is 'name'.
3260 static inline isc_result_t
3261 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3262 dns_name_t *foundname, dns_rdataset_t *rdataset,
3263 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3264 dns_db_secure_t secure)
3266 dns_rbtnode_t *node;
3267 rdatasetheader_t *header, *header_next, *found, *foundsig;
3268 isc_boolean_t empty_node;
3269 isc_result_t result;
3270 dns_fixedname_t fname, forigin;
3271 dns_name_t *name, *origin;
3272 dns_rdatatype_t type;
3273 rbtdb_rdatatype_t sigtype;
3274 isc_boolean_t wraps;
3275 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3277 if (tree == search->rbtdb->nsec3) {
3278 type = dns_rdatatype_nsec3;
3279 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3282 type = dns_rdatatype_nsec;
3283 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3290 dns_fixedname_init(&fname);
3291 name = dns_fixedname_name(&fname);
3292 dns_fixedname_init(&forigin);
3293 origin = dns_fixedname_name(&forigin);
3294 result = dns_rbtnodechain_current(&search->chain, name,
3296 if (result != ISC_R_SUCCESS)
3298 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3299 isc_rwlocktype_read);
3302 empty_node = ISC_TRUE;
3303 for (header = node->data;
3305 header = header_next) {
3306 header_next = header->next;
3308 * Look for an active, extant NSEC or RRSIG NSEC.
3311 if (header->serial <= search->serial &&
3314 * Is this a "this rdataset doesn't
3317 if (NONEXISTENT(header))
3321 header = header->down;
3322 } while (header != NULL);
3323 if (header != NULL) {
3325 * We now know that there is at least one
3326 * active rdataset at this node.
3328 empty_node = ISC_FALSE;
3329 if (header->type == type) {
3331 if (foundsig != NULL)
3333 } else if (header->type == sigtype) {
3341 if (found != NULL && search->rbtversion->havensec3 &&
3342 found->type == dns_rdatatype_nsec3 &&
3343 !matchparams(found, search)) {
3344 empty_node = ISC_TRUE;
3347 result = dns_rbtnodechain_prev(&search->chain,
3349 } else if (found != NULL &&
3350 (foundsig != NULL || !need_sig))
3353 * We've found the right NSEC/NSEC3 record.
3355 * Note: for this to really be the right
3356 * NSEC record, it's essential that the NSEC
3357 * records of any nodes obscured by a zone
3358 * cut have been removed; we assume this is
3361 result = dns_name_concatenate(name, origin,
3363 if (result == ISC_R_SUCCESS) {
3364 if (nodep != NULL) {
3365 new_reference(search->rbtdb,
3369 bind_rdataset(search->rbtdb, node,
3372 if (foundsig != NULL)
3373 bind_rdataset(search->rbtdb,
3379 } else if (found == NULL && foundsig == NULL) {
3381 * This node is active, but has no NSEC or
3382 * RRSIG NSEC. That means it's glue or
3383 * other obscured zone data that isn't
3384 * relevant for our search. Treat the
3385 * node as if it were empty and keep looking.
3387 empty_node = ISC_TRUE;
3388 result = dns_rbtnodechain_prev(&search->chain,
3392 * We found an active node, but either the
3393 * NSEC or the RRSIG NSEC is missing. This
3396 result = DNS_R_BADDB;
3400 * This node isn't active. We've got to keep
3403 result = dns_rbtnodechain_prev(&search->chain, NULL,
3406 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3407 isc_rwlocktype_read);
3408 } while (empty_node && result == ISC_R_SUCCESS);
3410 if (result == ISC_R_NOMORE && wraps) {
3411 result = dns_rbtnodechain_last(&search->chain, tree,
3413 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3420 * If the result is ISC_R_NOMORE, then we got to the beginning of
3421 * the database and didn't find a NSEC record. This shouldn't
3424 if (result == ISC_R_NOMORE)
3425 result = DNS_R_BADDB;
3431 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3432 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3433 dns_dbnode_t **nodep, dns_name_t *foundname,
3434 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3436 dns_rbtnode_t *node = NULL;
3437 isc_result_t result;
3438 rbtdb_search_t search;
3439 isc_boolean_t cname_ok = ISC_TRUE;
3440 isc_boolean_t close_version = ISC_FALSE;
3441 isc_boolean_t maybe_zonecut = ISC_FALSE;
3442 isc_boolean_t at_zonecut = ISC_FALSE;
3444 isc_boolean_t empty_node;
3445 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3446 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3447 rbtdb_rdatatype_t sigtype;
3448 isc_boolean_t active;
3449 dns_rbtnodechain_t chain;
3453 search.rbtdb = (dns_rbtdb_t *)db;
3455 REQUIRE(VALID_RBTDB(search.rbtdb));
3458 * We don't care about 'now'.
3463 * If the caller didn't supply a version, attach to the current
3466 if (version == NULL) {
3467 currentversion(db, &version);
3468 close_version = ISC_TRUE;
3471 search.rbtversion = version;
3472 search.serial = search.rbtversion->serial;
3473 search.options = options;
3474 search.copy_name = ISC_FALSE;
3475 search.need_cleanup = ISC_FALSE;
3476 search.wild = ISC_FALSE;
3477 search.zonecut = NULL;
3478 dns_fixedname_init(&search.zonecut_name);
3479 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3483 * 'wild' will be true iff. we've matched a wildcard.
3487 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3490 * Search down from the root of the tree. If, while going down, we
3491 * encounter a callback node, zone_zonecut_callback() will search the
3492 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3494 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3496 result = dns_rbt_findnode(tree, name, foundname, &node,
3497 &search.chain, DNS_RBTFIND_EMPTYDATA,
3498 zone_zonecut_callback, &search);
3500 if (result == DNS_R_PARTIALMATCH) {
3502 if (search.zonecut != NULL) {
3503 result = setup_delegation(&search, nodep, foundname,
3504 rdataset, sigrdataset);
3510 * At least one of the levels in the search chain
3511 * potentially has a wildcard. For each such level,
3512 * we must see if there's a matching wildcard active
3513 * in the current version.
3515 result = find_wildcard(&search, &node, name);
3516 if (result == ISC_R_SUCCESS) {
3517 result = dns_name_copy(name, foundname, NULL);
3518 if (result != ISC_R_SUCCESS)
3523 else if (result != ISC_R_NOTFOUND)
3527 chain = search.chain;
3528 active = activeempty(&search, &chain, name);
3531 * If we're here, then the name does not exist, is not
3532 * beneath a zonecut, and there's no matching wildcard.
3534 if ((search.rbtversion->secure == dns_db_secure &&
3535 !search.rbtversion->havensec3) ||
3536 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3537 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3539 result = find_closest_nsec(&search, nodep, foundname,
3540 rdataset, sigrdataset, tree,
3541 search.rbtversion->secure);
3542 if (result == ISC_R_SUCCESS)
3543 result = active ? DNS_R_EMPTYNAME :
3546 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3548 } else if (result != ISC_R_SUCCESS)
3553 * We have found a node whose name is the desired name, or we
3554 * have matched a wildcard.
3557 if (search.zonecut != NULL) {
3559 * If we're beneath a zone cut, we don't want to look for
3560 * CNAMEs because they're not legitimate zone glue.
3562 cname_ok = ISC_FALSE;
3565 * The node may be a zone cut itself. If it might be one,
3566 * make sure we check for it later.
3568 * DS records live above the zone cut in ordinary zone so
3569 * we want to ignore any referral.
3571 * Stub zones don't have anything "above" the delgation so
3572 * we always return a referral.
3574 if (node->find_callback &&
3575 ((node != search.rbtdb->origin_node &&
3576 !dns_rdatatype_atparent(type)) ||
3577 IS_STUB(search.rbtdb)))
3578 maybe_zonecut = ISC_TRUE;
3582 * Certain DNSSEC types are not subject to CNAME matching
3583 * (RFC4035, section 2.5 and RFC3007).
3585 * We don't check for RRSIG, because we don't store RRSIG records
3588 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3589 cname_ok = ISC_FALSE;
3592 * We now go looking for rdata...
3595 lock = &search.rbtdb->node_locks[node->locknum].lock;
3596 NODE_LOCK(lock, isc_rwlocktype_read);
3600 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3604 empty_node = ISC_TRUE;
3605 for (header = node->data; header != NULL; header = header_next) {
3606 header_next = header->next;
3608 * Look for an active, extant rdataset.
3611 if (header->serial <= search.serial &&
3614 * Is this a "this rdataset doesn't
3617 if (NONEXISTENT(header))
3621 header = header->down;
3622 } while (header != NULL);
3623 if (header != NULL) {
3625 * We now know that there is at least one active
3626 * rdataset at this node.
3628 empty_node = ISC_FALSE;
3631 * Do special zone cut handling, if requested.
3633 if (maybe_zonecut &&
3634 header->type == dns_rdatatype_ns) {
3636 * We increment the reference count on node to
3637 * ensure that search->zonecut_rdataset will
3638 * still be valid later.
3640 new_reference(search.rbtdb, node);
3641 search.zonecut = node;
3642 search.zonecut_rdataset = header;
3643 search.zonecut_sigrdataset = NULL;
3644 search.need_cleanup = ISC_TRUE;
3645 maybe_zonecut = ISC_FALSE;
3646 at_zonecut = ISC_TRUE;
3648 * It is not clear if KEY should still be
3649 * allowed at the parent side of the zone
3650 * cut or not. It is needed for RFC3007
3651 * validated updates.
3653 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3654 && type != dns_rdatatype_nsec
3655 && type != dns_rdatatype_key) {
3657 * Glue is not OK, but any answer we
3658 * could return would be glue. Return
3664 if (found != NULL && foundsig != NULL)
3670 * If the NSEC3 record doesn't match the chain
3671 * we are using behave as if it isn't here.
3673 if (header->type == dns_rdatatype_nsec3 &&
3674 !matchparams(header, &search)) {
3675 NODE_UNLOCK(lock, isc_rwlocktype_read);
3679 * If we found a type we were looking for,
3682 if (header->type == type ||
3683 type == dns_rdatatype_any ||
3684 (header->type == dns_rdatatype_cname &&
3687 * We've found the answer!
3690 if (header->type == dns_rdatatype_cname &&
3693 * We may be finding a CNAME instead
3694 * of the desired type.
3696 * If we've already got the CNAME RRSIG,
3697 * use it, otherwise change sigtype
3698 * so that we find it.
3700 if (cnamesig != NULL)
3701 foundsig = cnamesig;
3704 RBTDB_RDATATYPE_SIGCNAME;
3707 * If we've got all we need, end the search.
3709 if (!maybe_zonecut && foundsig != NULL)
3711 } else if (header->type == sigtype) {
3713 * We've found the RRSIG rdataset for our
3714 * target type. Remember it.
3718 * If we've got all we need, end the search.
3720 if (!maybe_zonecut && found != NULL)
3722 } else if (header->type == dns_rdatatype_nsec &&
3723 !search.rbtversion->havensec3) {
3725 * Remember a NSEC rdataset even if we're
3726 * not specifically looking for it, because
3727 * we might need it later.
3729 nsecheader = header;
3730 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3731 !search.rbtversion->havensec3) {
3733 * If we need the NSEC rdataset, we'll also
3734 * need its signature.
3737 } else if (cname_ok &&
3738 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3740 * If we get a CNAME match, we'll also need
3750 * We have an exact match for the name, but there are no
3751 * active rdatasets in the desired version. That means that
3752 * this node doesn't exist in the desired version, and that
3753 * we really have a partial match.
3756 NODE_UNLOCK(lock, isc_rwlocktype_read);
3762 * If we didn't find what we were looking for...
3764 if (found == NULL) {
3765 if (search.zonecut != NULL) {
3767 * We were trying to find glue at a node beneath a
3768 * zone cut, but didn't.
3770 * Return the delegation.
3772 NODE_UNLOCK(lock, isc_rwlocktype_read);
3773 result = setup_delegation(&search, nodep, foundname,
3774 rdataset, sigrdataset);
3778 * The desired type doesn't exist.
3780 result = DNS_R_NXRRSET;
3781 if (search.rbtversion->secure == dns_db_secure &&
3782 !search.rbtversion->havensec3 &&
3783 (nsecheader == NULL || nsecsig == NULL)) {
3785 * The zone is secure but there's no NSEC,
3786 * or the NSEC has no signature!
3789 result = DNS_R_BADDB;
3793 NODE_UNLOCK(lock, isc_rwlocktype_read);
3794 result = find_closest_nsec(&search, nodep, foundname,
3795 rdataset, sigrdataset,
3797 search.rbtversion->secure);
3798 if (result == ISC_R_SUCCESS)
3799 result = DNS_R_EMPTYWILD;
3802 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3806 * There's no NSEC record, and we were told
3809 result = DNS_R_BADDB;
3812 if (nodep != NULL) {
3813 new_reference(search.rbtdb, node);
3816 if ((search.rbtversion->secure == dns_db_secure &&
3817 !search.rbtversion->havensec3) ||
3818 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3820 bind_rdataset(search.rbtdb, node, nsecheader,
3822 if (nsecsig != NULL)
3823 bind_rdataset(search.rbtdb, node,
3824 nsecsig, 0, sigrdataset);
3827 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3832 * We found what we were looking for, or we found a CNAME.
3835 if (type != found->type &&
3836 type != dns_rdatatype_any &&
3837 found->type == dns_rdatatype_cname) {
3839 * We weren't doing an ANY query and we found a CNAME instead
3840 * of the type we were looking for, so we need to indicate
3841 * that result to the caller.
3843 result = DNS_R_CNAME;
3844 } else if (search.zonecut != NULL) {
3846 * If we're beneath a zone cut, we must indicate that the
3847 * result is glue, unless we're actually at the zone cut
3848 * and the type is NSEC or KEY.
3850 if (search.zonecut == node) {
3852 * It is not clear if KEY should still be
3853 * allowed at the parent side of the zone
3854 * cut or not. It is needed for RFC3007
3855 * validated updates.
3857 if (type == dns_rdatatype_nsec ||
3858 type == dns_rdatatype_nsec3 ||
3859 type == dns_rdatatype_key)
3860 result = ISC_R_SUCCESS;
3861 else if (type == dns_rdatatype_any)
3862 result = DNS_R_ZONECUT;
3864 result = DNS_R_GLUE;
3866 result = DNS_R_GLUE;
3868 * We might have found data that isn't glue, but was occluded
3869 * by a dynamic update. If the caller cares about this, they
3870 * will have told us to validate glue.
3872 * XXX We should cache the glue validity state!
3874 if (result == DNS_R_GLUE &&
3875 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3876 !valid_glue(&search, foundname, type, node)) {
3877 NODE_UNLOCK(lock, isc_rwlocktype_read);
3878 result = setup_delegation(&search, nodep, foundname,
3879 rdataset, sigrdataset);
3884 * An ordinary successful query!
3886 result = ISC_R_SUCCESS;
3889 if (nodep != NULL) {
3891 new_reference(search.rbtdb, node);
3893 search.need_cleanup = ISC_FALSE;
3897 if (type != dns_rdatatype_any) {
3898 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3899 if (foundsig != NULL)
3900 bind_rdataset(search.rbtdb, node, foundsig, 0,
3905 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3908 NODE_UNLOCK(lock, isc_rwlocktype_read);
3911 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3914 * If we found a zonecut but aren't going to use it, we have to
3917 if (search.need_cleanup) {
3918 node = search.zonecut;
3919 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3921 NODE_LOCK(lock, isc_rwlocktype_read);
3922 decrement_reference(search.rbtdb, node, 0,
3923 isc_rwlocktype_read, isc_rwlocktype_none,
3925 NODE_UNLOCK(lock, isc_rwlocktype_read);
3929 closeversion(db, &version, ISC_FALSE);
3931 dns_rbtnodechain_reset(&search.chain);
3937 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3938 isc_stdtime_t now, dns_dbnode_t **nodep,
3939 dns_name_t *foundname,
3940 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3949 UNUSED(sigrdataset);
3951 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3953 return (ISC_R_NOTIMPLEMENTED);
3957 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3958 rbtdb_search_t *search = arg;
3959 rdatasetheader_t *header, *header_prev, *header_next;
3960 rdatasetheader_t *dname_header, *sigdname_header;
3961 isc_result_t result;
3963 isc_rwlocktype_t locktype;
3967 REQUIRE(search->zonecut == NULL);
3970 * Keep compiler silent.
3974 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3975 locktype = isc_rwlocktype_read;
3976 NODE_LOCK(lock, locktype);
3979 * Look for a DNAME or RRSIG DNAME rdataset.
3981 dname_header = NULL;
3982 sigdname_header = NULL;
3984 for (header = node->data; header != NULL; header = header_next) {
3985 header_next = header->next;
3986 if (header->rdh_ttl <= search->now) {
3988 * This rdataset is stale. If no one else is
3989 * using the node, we can clean it up right
3990 * now, otherwise we mark it as stale, and
3991 * the node as dirty, so it will get cleaned
3994 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3995 (locktype == isc_rwlocktype_write ||
3996 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3998 * We update the node's status only when we
3999 * can get write access; otherwise, we leave
4000 * others to this work. Periodical cleaning
4001 * will eventually take the job as the last
4003 * We won't downgrade the lock, since other
4004 * rdatasets are probably stale, too.
4006 locktype = isc_rwlocktype_write;
4008 if (dns_rbtnode_refcurrent(node) == 0) {
4012 * header->down can be non-NULL if the
4013 * refcount has just decremented to 0
4014 * but decrement_reference() has not
4015 * performed clean_cache_node(), in
4016 * which case we need to purge the
4017 * stale headers first.
4019 mctx = search->rbtdb->common.mctx;
4020 clean_stale_headers(search->rbtdb,
4023 if (header_prev != NULL)
4027 node->data = header->next;
4028 free_rdataset(search->rbtdb, mctx,
4031 header->attributes |=
4032 RDATASET_ATTR_STALE;
4034 header_prev = header;
4037 header_prev = header;
4038 } else if (header->type == dns_rdatatype_dname &&
4040 dname_header = header;
4041 header_prev = header;
4042 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4044 sigdname_header = header;
4045 header_prev = header;
4047 header_prev = header;
4050 if (dname_header != NULL &&
4051 (!DNS_TRUST_PENDING(dname_header->trust) ||
4052 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4054 * We increment the reference count on node to ensure that
4055 * search->zonecut_rdataset will still be valid later.
4057 new_reference(search->rbtdb, node);
4058 INSIST(!ISC_LINK_LINKED(node, deadlink));
4059 search->zonecut = node;
4060 search->zonecut_rdataset = dname_header;
4061 search->zonecut_sigrdataset = sigdname_header;
4062 search->need_cleanup = ISC_TRUE;
4063 result = DNS_R_PARTIALMATCH;
4065 result = DNS_R_CONTINUE;
4067 NODE_UNLOCK(lock, locktype);
4072 static inline isc_result_t
4073 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4074 dns_dbnode_t **nodep, dns_name_t *foundname,
4075 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4078 dns_rbtnode_t *level_node;
4079 rdatasetheader_t *header, *header_prev, *header_next;
4080 rdatasetheader_t *found, *foundsig;
4081 isc_result_t result = ISC_R_NOTFOUND;
4086 isc_rwlocktype_t locktype;
4089 * Caller must be holding the tree lock.
4092 rbtdb = search->rbtdb;
4093 i = search->chain.level_matches;
4096 locktype = isc_rwlocktype_read;
4097 lock = &rbtdb->node_locks[node->locknum].lock;
4098 NODE_LOCK(lock, locktype);
4101 * Look for NS and RRSIG NS rdatasets.
4106 for (header = node->data;
4108 header = header_next) {
4109 header_next = header->next;
4110 if (header->rdh_ttl <= search->now) {
4112 * This rdataset is stale. If no one else is
4113 * using the node, we can clean it up right
4114 * now, otherwise we mark it as stale, and
4115 * the node as dirty, so it will get cleaned
4118 if ((header->rdh_ttl <= search->now -
4120 (locktype == isc_rwlocktype_write ||
4121 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4123 * We update the node's status only
4124 * when we can get write access.
4126 locktype = isc_rwlocktype_write;
4128 if (dns_rbtnode_refcurrent(node)
4132 m = search->rbtdb->common.mctx;
4133 clean_stale_headers(
4136 if (header_prev != NULL)
4142 free_rdataset(rbtdb, m,
4145 header->attributes |=
4146 RDATASET_ATTR_STALE;
4148 header_prev = header;
4151 header_prev = header;
4152 } else if (EXISTS(header)) {
4154 * We've found an extant rdataset. See if
4155 * we're interested in it.
4157 if (header->type == dns_rdatatype_ns) {
4159 if (foundsig != NULL)
4161 } else if (header->type ==
4162 RBTDB_RDATATYPE_SIGNS) {
4167 header_prev = header;
4169 header_prev = header;
4172 if (found != NULL) {
4174 * If we have to set foundname, we do it before
4175 * anything else. If we were to set foundname after
4176 * we had set nodep or bound the rdataset, then we'd
4177 * have to undo that work if dns_name_concatenate()
4178 * failed. By setting foundname first, there's
4179 * nothing to undo if we have trouble.
4181 if (foundname != NULL) {
4182 dns_name_init(&name, NULL);
4183 dns_rbt_namefromnode(node, &name);
4184 result = dns_name_copy(&name, foundname, NULL);
4185 while (result == ISC_R_SUCCESS && i > 0) {
4187 level_node = search->chain.levels[i];
4188 dns_name_init(&name, NULL);
4189 dns_rbt_namefromnode(level_node,
4192 dns_name_concatenate(foundname,
4197 if (result != ISC_R_SUCCESS) {
4202 result = DNS_R_DELEGATION;
4203 if (nodep != NULL) {
4204 new_reference(search->rbtdb, node);
4207 bind_rdataset(search->rbtdb, node, found, search->now,
4209 if (foundsig != NULL)
4210 bind_rdataset(search->rbtdb, node, foundsig,
4211 search->now, sigrdataset);
4212 if (need_headerupdate(found, search->now) ||
4213 (foundsig != NULL &&
4214 need_headerupdate(foundsig, search->now))) {
4215 if (locktype != isc_rwlocktype_write) {
4216 NODE_UNLOCK(lock, locktype);
4217 NODE_LOCK(lock, isc_rwlocktype_write);
4218 locktype = isc_rwlocktype_write;
4220 if (need_headerupdate(found, search->now))
4221 update_header(search->rbtdb, found,
4223 if (foundsig != NULL &&
4224 need_headerupdate(foundsig, search->now)) {
4225 update_header(search->rbtdb, foundsig,
4232 NODE_UNLOCK(lock, locktype);
4234 if (found == NULL && i > 0) {
4236 node = search->chain.levels[i];
4246 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4247 isc_stdtime_t now, dns_name_t *foundname,
4248 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4250 dns_rbtnode_t *node;
4251 rdatasetheader_t *header, *header_next, *header_prev;
4252 rdatasetheader_t *found, *foundsig;
4253 isc_boolean_t empty_node;
4254 isc_result_t result;
4255 dns_fixedname_t fname, forigin;
4256 dns_name_t *name, *origin;
4257 rbtdb_rdatatype_t matchtype, sigmatchtype;
4259 isc_rwlocktype_t locktype;
4261 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4262 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4263 dns_rdatatype_nsec);
4267 dns_fixedname_init(&fname);
4268 name = dns_fixedname_name(&fname);
4269 dns_fixedname_init(&forigin);
4270 origin = dns_fixedname_name(&forigin);
4271 result = dns_rbtnodechain_current(&search->chain, name,
4273 if (result != ISC_R_SUCCESS)
4275 locktype = isc_rwlocktype_read;
4276 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4277 NODE_LOCK(lock, locktype);
4280 empty_node = ISC_TRUE;
4282 for (header = node->data;
4284 header = header_next) {
4285 header_next = header->next;
4286 if (header->rdh_ttl <= now) {
4288 * This rdataset is stale. If no one else is
4289 * using the node, we can clean it up right
4290 * now, otherwise we mark it as stale, and the
4291 * node as dirty, so it will get cleaned up
4294 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4295 (locktype == isc_rwlocktype_write ||
4296 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4298 * We update the node's status only
4299 * when we can get write access.
4301 locktype = isc_rwlocktype_write;
4303 if (dns_rbtnode_refcurrent(node)
4307 m = search->rbtdb->common.mctx;
4308 clean_stale_headers(
4311 if (header_prev != NULL)
4315 node->data = header->next;
4316 free_rdataset(search->rbtdb, m,
4319 header->attributes |=
4320 RDATASET_ATTR_STALE;
4322 header_prev = header;
4325 header_prev = header;
4328 if (NONEXISTENT(header) ||
4329 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4330 header_prev = header;
4333 empty_node = ISC_FALSE;
4334 if (header->type == matchtype)
4336 else if (header->type == sigmatchtype)
4338 header_prev = header;
4340 if (found != NULL) {
4341 result = dns_name_concatenate(name, origin,
4343 if (result != ISC_R_SUCCESS)
4345 bind_rdataset(search->rbtdb, node, found,
4347 if (foundsig != NULL)
4348 bind_rdataset(search->rbtdb, node, foundsig,
4350 new_reference(search->rbtdb, node);
4352 result = DNS_R_COVERINGNSEC;
4353 } else if (!empty_node) {
4354 result = ISC_R_NOTFOUND;
4356 result = dns_rbtnodechain_prev(&search->chain, NULL,
4359 NODE_UNLOCK(lock, locktype);
4360 } while (empty_node && result == ISC_R_SUCCESS);
4365 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4366 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4367 dns_dbnode_t **nodep, dns_name_t *foundname,
4368 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4370 dns_rbtnode_t *node = NULL;
4371 isc_result_t result;
4372 rbtdb_search_t search;
4373 isc_boolean_t cname_ok = ISC_TRUE;
4374 isc_boolean_t empty_node;
4376 isc_rwlocktype_t locktype;
4377 rdatasetheader_t *header, *header_prev, *header_next;
4378 rdatasetheader_t *found, *nsheader;
4379 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4380 rdatasetheader_t *update, *updatesig;
4381 rbtdb_rdatatype_t sigtype, negtype;
4385 search.rbtdb = (dns_rbtdb_t *)db;
4387 REQUIRE(VALID_RBTDB(search.rbtdb));
4388 REQUIRE(version == NULL);
4391 isc_stdtime_get(&now);
4393 search.rbtversion = NULL;
4395 search.options = options;
4396 search.copy_name = ISC_FALSE;
4397 search.need_cleanup = ISC_FALSE;
4398 search.wild = ISC_FALSE;
4399 search.zonecut = NULL;
4400 dns_fixedname_init(&search.zonecut_name);
4401 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4406 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4409 * Search down from the root of the tree. If, while going down, we
4410 * encounter a callback node, cache_zonecut_callback() will search the
4411 * rdatasets at the zone cut for a DNAME rdataset.
4413 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4414 &search.chain, DNS_RBTFIND_EMPTYDATA,
4415 cache_zonecut_callback, &search);
4417 if (result == DNS_R_PARTIALMATCH) {
4418 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4419 result = find_coveringnsec(&search, nodep, now,
4420 foundname, rdataset,
4422 if (result == DNS_R_COVERINGNSEC)
4425 if (search.zonecut != NULL) {
4426 result = setup_delegation(&search, nodep, foundname,
4427 rdataset, sigrdataset);
4431 result = find_deepest_zonecut(&search, node, nodep,
4432 foundname, rdataset,
4436 } else if (result != ISC_R_SUCCESS)
4440 * Certain DNSSEC types are not subject to CNAME matching
4441 * (RFC4035, section 2.5 and RFC3007).
4443 * We don't check for RRSIG, because we don't store RRSIG records
4446 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4447 cname_ok = ISC_FALSE;
4450 * We now go looking for rdata...
4453 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4454 locktype = isc_rwlocktype_read;
4455 NODE_LOCK(lock, locktype);
4459 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4460 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4464 empty_node = ISC_TRUE;
4466 for (header = node->data; header != NULL; header = header_next) {
4467 header_next = header->next;
4468 if (header->rdh_ttl <= now) {
4470 * This rdataset is stale. If no one else is using the
4471 * node, we can clean it up right now, otherwise we
4472 * mark it as stale, and the node as dirty, so it will
4473 * get cleaned up later.
4475 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4476 (locktype == isc_rwlocktype_write ||
4477 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4479 * We update the node's status only when we
4480 * can get write access.
4482 locktype = isc_rwlocktype_write;
4484 if (dns_rbtnode_refcurrent(node) == 0) {
4487 mctx = search.rbtdb->common.mctx;
4488 clean_stale_headers(search.rbtdb, mctx,
4490 if (header_prev != NULL)
4494 node->data = header->next;
4495 free_rdataset(search.rbtdb, mctx,
4498 header->attributes |=
4499 RDATASET_ATTR_STALE;
4501 header_prev = header;
4504 header_prev = header;
4505 } else if (EXISTS(header)) {
4507 * We now know that there is at least one active
4508 * non-stale rdataset at this node.
4510 empty_node = ISC_FALSE;
4513 * If we found a type we were looking for, remember
4516 if (header->type == type ||
4517 (type == dns_rdatatype_any &&
4518 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4519 (cname_ok && header->type ==
4520 dns_rdatatype_cname)) {
4522 * We've found the answer.
4525 if (header->type == dns_rdatatype_cname &&
4529 * If we've already got the CNAME RRSIG,
4530 * use it, otherwise change sigtype
4531 * so that we find it.
4533 if (cnamesig != NULL)
4534 foundsig = cnamesig;
4537 RBTDB_RDATATYPE_SIGCNAME;
4538 foundsig = cnamesig;
4540 } else if (header->type == sigtype) {
4542 * We've found the RRSIG rdataset for our
4543 * target type. Remember it.
4546 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4547 header->type == negtype) {
4549 * We've found a negative cache entry.
4552 } else if (header->type == dns_rdatatype_ns) {
4554 * Remember a NS rdataset even if we're
4555 * not specifically looking for it, because
4556 * we might need it later.
4559 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4561 * If we need the NS rdataset, we'll also
4562 * need its signature.
4565 } else if (cname_ok &&
4566 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4568 * If we get a CNAME match, we'll also need
4573 header_prev = header;
4575 header_prev = header;
4580 * We have an exact match for the name, but there are no
4581 * extant rdatasets. That means that this node doesn't
4582 * meaningfully exist, and that we really have a partial match.
4584 NODE_UNLOCK(lock, locktype);
4589 * If we didn't find what we were looking for...
4591 if (found == NULL ||
4592 (DNS_TRUST_ADDITIONAL(found->trust) &&
4593 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4594 (found->trust == dns_trust_glue &&
4595 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4596 (DNS_TRUST_PENDING(found->trust) &&
4597 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4599 * If there is an NS rdataset at this node, then this is the
4602 if (nsheader != NULL) {
4603 if (nodep != NULL) {
4604 new_reference(search.rbtdb, node);
4605 INSIST(!ISC_LINK_LINKED(node, deadlink));
4608 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4610 if (need_headerupdate(nsheader, search.now))
4612 if (nssig != NULL) {
4613 bind_rdataset(search.rbtdb, node, nssig,
4614 search.now, sigrdataset);
4615 if (need_headerupdate(nssig, search.now))
4618 result = DNS_R_DELEGATION;
4623 * Go find the deepest zone cut.
4625 NODE_UNLOCK(lock, locktype);
4630 * We found what we were looking for, or we found a CNAME.
4633 if (nodep != NULL) {
4634 new_reference(search.rbtdb, node);
4635 INSIST(!ISC_LINK_LINKED(node, deadlink));
4639 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4641 * We found a negative cache entry.
4643 if (NXDOMAIN(found))
4644 result = DNS_R_NCACHENXDOMAIN;
4646 result = DNS_R_NCACHENXRRSET;
4647 } else if (type != found->type &&
4648 type != dns_rdatatype_any &&
4649 found->type == dns_rdatatype_cname) {
4651 * We weren't doing an ANY query and we found a CNAME instead
4652 * of the type we were looking for, so we need to indicate
4653 * that result to the caller.
4655 result = DNS_R_CNAME;
4658 * An ordinary successful query!
4660 result = ISC_R_SUCCESS;
4663 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4664 result == DNS_R_NCACHENXRRSET) {
4665 bind_rdataset(search.rbtdb, node, found, search.now,
4667 if (need_headerupdate(found, search.now))
4669 if (foundsig != NULL) {
4670 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4672 if (need_headerupdate(foundsig, search.now))
4673 updatesig = foundsig;
4678 if ((update != NULL || updatesig != NULL) &&
4679 locktype != isc_rwlocktype_write) {
4680 NODE_UNLOCK(lock, locktype);
4681 NODE_LOCK(lock, isc_rwlocktype_write);
4682 locktype = isc_rwlocktype_write;
4684 if (update != NULL && need_headerupdate(update, search.now))
4685 update_header(search.rbtdb, update, search.now);
4686 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4687 update_header(search.rbtdb, updatesig, search.now);
4689 NODE_UNLOCK(lock, locktype);
4692 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4695 * If we found a zonecut but aren't going to use it, we have to
4698 if (search.need_cleanup) {
4699 node = search.zonecut;
4700 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4702 NODE_LOCK(lock, isc_rwlocktype_read);
4703 decrement_reference(search.rbtdb, node, 0,
4704 isc_rwlocktype_read, isc_rwlocktype_none,
4706 NODE_UNLOCK(lock, isc_rwlocktype_read);
4709 dns_rbtnodechain_reset(&search.chain);
4715 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4716 isc_stdtime_t now, dns_dbnode_t **nodep,
4717 dns_name_t *foundname,
4718 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4720 dns_rbtnode_t *node = NULL;
4722 isc_result_t result;
4723 rbtdb_search_t search;
4724 rdatasetheader_t *header, *header_prev, *header_next;
4725 rdatasetheader_t *found, *foundsig;
4726 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4727 isc_rwlocktype_t locktype;
4729 search.rbtdb = (dns_rbtdb_t *)db;
4731 REQUIRE(VALID_RBTDB(search.rbtdb));
4734 isc_stdtime_get(&now);
4736 search.rbtversion = NULL;
4738 search.options = options;
4739 search.copy_name = ISC_FALSE;
4740 search.need_cleanup = ISC_FALSE;
4741 search.wild = ISC_FALSE;
4742 search.zonecut = NULL;
4743 dns_fixedname_init(&search.zonecut_name);
4744 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4747 if ((options & DNS_DBFIND_NOEXACT) != 0)
4748 rbtoptions |= DNS_RBTFIND_NOEXACT;
4750 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4753 * Search down from the root of the tree.
4755 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4756 &search.chain, rbtoptions, NULL, &search);
4758 if (result == DNS_R_PARTIALMATCH) {
4760 result = find_deepest_zonecut(&search, node, nodep, foundname,
4761 rdataset, sigrdataset);
4763 } else if (result != ISC_R_SUCCESS)
4767 * We now go looking for an NS rdataset at the node.
4770 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4771 locktype = isc_rwlocktype_read;
4772 NODE_LOCK(lock, locktype);
4777 for (header = node->data; header != NULL; header = header_next) {
4778 header_next = header->next;
4779 if (header->rdh_ttl <= now) {
4781 * This rdataset is stale. If no one else is using the
4782 * node, we can clean it up right now, otherwise we
4783 * mark it as stale, and the node as dirty, so it will
4784 * get cleaned up later.
4786 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4787 (locktype == isc_rwlocktype_write ||
4788 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4790 * We update the node's status only when we
4791 * can get write access.
4793 locktype = isc_rwlocktype_write;
4795 if (dns_rbtnode_refcurrent(node) == 0) {
4798 mctx = search.rbtdb->common.mctx;
4799 clean_stale_headers(search.rbtdb, mctx,
4801 if (header_prev != NULL)
4805 node->data = header->next;
4806 free_rdataset(search.rbtdb, mctx,
4809 header->attributes |=
4810 RDATASET_ATTR_STALE;
4812 header_prev = header;
4815 header_prev = header;
4816 } else if (EXISTS(header)) {
4818 * If we found a type we were looking for, remember
4821 if (header->type == dns_rdatatype_ns) {
4823 * Remember a NS rdataset even if we're
4824 * not specifically looking for it, because
4825 * we might need it later.
4828 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4830 * If we need the NS rdataset, we'll also
4831 * need its signature.
4835 header_prev = header;
4837 header_prev = header;
4840 if (found == NULL) {
4842 * No NS records here.
4844 NODE_UNLOCK(lock, locktype);
4848 if (nodep != NULL) {
4849 new_reference(search.rbtdb, node);
4850 INSIST(!ISC_LINK_LINKED(node, deadlink));
4854 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4855 if (foundsig != NULL)
4856 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4859 if (need_headerupdate(found, search.now) ||
4860 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4861 if (locktype != isc_rwlocktype_write) {
4862 NODE_UNLOCK(lock, locktype);
4863 NODE_LOCK(lock, isc_rwlocktype_write);
4864 locktype = isc_rwlocktype_write;
4866 if (need_headerupdate(found, search.now))
4867 update_header(search.rbtdb, found, search.now);
4868 if (foundsig != NULL &&
4869 need_headerupdate(foundsig, search.now)) {
4870 update_header(search.rbtdb, foundsig, search.now);
4874 NODE_UNLOCK(lock, locktype);
4877 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4879 INSIST(!search.need_cleanup);
4881 dns_rbtnodechain_reset(&search.chain);
4883 if (result == DNS_R_DELEGATION)
4884 result = ISC_R_SUCCESS;
4890 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4891 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4892 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4895 REQUIRE(VALID_RBTDB(rbtdb));
4896 REQUIRE(targetp != NULL && *targetp == NULL);
4898 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4899 dns_rbtnode_refincrement(node, &refs);
4901 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4907 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4908 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4909 dns_rbtnode_t *node;
4910 isc_boolean_t want_free = ISC_FALSE;
4911 isc_boolean_t inactive = ISC_FALSE;
4912 rbtdb_nodelock_t *nodelock;
4914 REQUIRE(VALID_RBTDB(rbtdb));
4915 REQUIRE(targetp != NULL && *targetp != NULL);
4917 node = (dns_rbtnode_t *)(*targetp);
4918 nodelock = &rbtdb->node_locks[node->locknum];
4920 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4922 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4923 isc_rwlocktype_none, ISC_FALSE)) {
4924 if (isc_refcount_current(&nodelock->references) == 0 &&
4925 nodelock->exiting) {
4926 inactive = ISC_TRUE;
4930 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4935 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4937 if (rbtdb->active == 0)
4938 want_free = ISC_TRUE;
4939 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4941 char buf[DNS_NAME_FORMATSIZE];
4942 if (dns_name_dynamic(&rbtdb->common.origin))
4943 dns_name_format(&rbtdb->common.origin, buf,
4946 strcpy(buf, "<UNKNOWN>");
4947 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4948 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4949 "calling free_rbtdb(%s)", buf);
4950 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4956 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4957 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4958 dns_rbtnode_t *rbtnode = node;
4959 rdatasetheader_t *header;
4960 isc_boolean_t force_expire = ISC_FALSE;
4962 * These are the category and module used by the cache cleaner.
4964 isc_boolean_t log = ISC_FALSE;
4965 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4966 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4967 int level = ISC_LOG_DEBUG(2);
4968 char printname[DNS_NAME_FORMATSIZE];
4970 REQUIRE(VALID_RBTDB(rbtdb));
4973 * Caller must hold a tree lock.
4977 isc_stdtime_get(&now);
4979 if (isc_mem_isovermem(rbtdb->common.mctx)) {
4982 isc_random_get(&val);
4984 * XXXDCL Could stand to have a better policy, like LRU.
4986 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4989 * Note that 'log' can be true IFF overmem is also true.
4990 * overmem can currently only be true for cache
4991 * databases -- hence all of the "overmem cache" log strings.
4993 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4995 isc_log_write(dns_lctx, category, module, level,
4996 "overmem cache: %s %s",
4997 force_expire ? "FORCE" : "check",
4998 dns_rbt_formatnodename(rbtnode,
5000 sizeof(printname)));
5004 * We may not need write access, but this code path is not performance
5005 * sensitive, so it should be okay to always lock as a writer.
5007 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5008 isc_rwlocktype_write);
5010 for (header = rbtnode->data; header != NULL; header = header->next)
5011 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5013 * We don't check if refcurrent(rbtnode) == 0 and try
5014 * to free like we do in cache_find(), because
5015 * refcurrent(rbtnode) must be non-zero. This is so
5016 * because 'node' is an argument to the function.
5018 header->attributes |= RDATASET_ATTR_STALE;
5021 isc_log_write(dns_lctx, category, module,
5022 level, "overmem cache: stale %s",
5024 } else if (force_expire) {
5025 if (! RETAIN(header)) {
5026 set_ttl(rbtdb, header, 0);
5027 header->attributes |= RDATASET_ATTR_STALE;
5030 isc_log_write(dns_lctx, category, module,
5031 level, "overmem cache: "
5032 "reprieve by RETAIN() %s",
5035 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5036 isc_log_write(dns_lctx, category, module, level,
5037 "overmem cache: saved %s", printname);
5039 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5040 isc_rwlocktype_write);
5042 return (ISC_R_SUCCESS);
5046 overmem(dns_db_t *db, isc_boolean_t overmem) {
5047 /* This is an empty callback. See adb.c:water() */
5056 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5057 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5058 dns_rbtnode_t *rbtnode = node;
5059 isc_boolean_t first;
5061 REQUIRE(VALID_RBTDB(rbtdb));
5063 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5064 isc_rwlocktype_read);
5066 fprintf(out, "node %p, %u references, locknum = %u\n",
5067 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5069 if (rbtnode->data != NULL) {
5070 rdatasetheader_t *current, *top_next;
5072 for (current = rbtnode->data; current != NULL;
5073 current = top_next) {
5074 top_next = current->next;
5076 fprintf(out, "\ttype %u", current->type);
5082 "\tserial = %lu, ttl = %u, "
5083 "trust = %u, attributes = %u, "
5085 (unsigned long)current->serial,
5088 current->attributes,
5090 current = current->down;
5091 } while (current != NULL);
5094 fprintf(out, "(empty)\n");
5096 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5097 isc_rwlocktype_read);
5101 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5103 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5104 rbtdb_dbiterator_t *rbtdbiter;
5106 REQUIRE(VALID_RBTDB(rbtdb));
5108 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5109 if (rbtdbiter == NULL)
5110 return (ISC_R_NOMEMORY);
5112 rbtdbiter->common.methods = &dbiterator_methods;
5113 rbtdbiter->common.db = NULL;
5114 dns_db_attach(db, &rbtdbiter->common.db);
5115 rbtdbiter->common.relative_names =
5116 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5117 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5118 rbtdbiter->common.cleaning = ISC_FALSE;
5119 rbtdbiter->paused = ISC_TRUE;
5120 rbtdbiter->tree_locked = isc_rwlocktype_none;
5121 rbtdbiter->result = ISC_R_SUCCESS;
5122 dns_fixedname_init(&rbtdbiter->name);
5123 dns_fixedname_init(&rbtdbiter->origin);
5124 rbtdbiter->node = NULL;
5125 rbtdbiter->delete = 0;
5126 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5127 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5128 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5129 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5130 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5131 if (rbtdbiter->nsec3only)
5132 rbtdbiter->current = &rbtdbiter->nsec3chain;
5134 rbtdbiter->current = &rbtdbiter->chain;
5136 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5138 return (ISC_R_SUCCESS);
5142 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5143 dns_rdatatype_t type, dns_rdatatype_t covers,
5144 isc_stdtime_t now, dns_rdataset_t *rdataset,
5145 dns_rdataset_t *sigrdataset)
5147 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5148 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5149 rdatasetheader_t *header, *header_next, *found, *foundsig;
5150 rbtdb_serial_t serial;
5151 rbtdb_version_t *rbtversion = version;
5152 isc_boolean_t close_version = ISC_FALSE;
5153 rbtdb_rdatatype_t matchtype, sigmatchtype;
5155 REQUIRE(VALID_RBTDB(rbtdb));
5156 REQUIRE(type != dns_rdatatype_any);
5158 if (rbtversion == NULL) {
5159 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5160 close_version = ISC_TRUE;
5162 serial = rbtversion->serial;
5165 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5166 isc_rwlocktype_read);
5170 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5172 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5176 for (header = rbtnode->data; header != NULL; header = header_next) {
5177 header_next = header->next;
5179 if (header->serial <= serial &&
5182 * Is this a "this rdataset doesn't
5185 if (NONEXISTENT(header))
5189 header = header->down;
5190 } while (header != NULL);
5191 if (header != NULL) {
5193 * We have an active, extant rdataset. If it's a
5194 * type we're looking for, remember it.
5196 if (header->type == matchtype) {
5198 if (foundsig != NULL)
5200 } else if (header->type == sigmatchtype) {
5207 if (found != NULL) {
5208 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5209 if (foundsig != NULL)
5210 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5214 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5215 isc_rwlocktype_read);
5218 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5222 return (ISC_R_NOTFOUND);
5224 return (ISC_R_SUCCESS);
5228 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5229 dns_rdatatype_t type, dns_rdatatype_t covers,
5230 isc_stdtime_t now, dns_rdataset_t *rdataset,
5231 dns_rdataset_t *sigrdataset)
5233 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5234 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5235 rdatasetheader_t *header, *header_next, *found, *foundsig;
5236 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5237 isc_result_t result;
5239 isc_rwlocktype_t locktype;
5241 REQUIRE(VALID_RBTDB(rbtdb));
5242 REQUIRE(type != dns_rdatatype_any);
5246 result = ISC_R_SUCCESS;
5249 isc_stdtime_get(&now);
5251 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5252 locktype = isc_rwlocktype_read;
5253 NODE_LOCK(lock, locktype);
5257 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5258 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5260 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5264 for (header = rbtnode->data; header != NULL; header = header_next) {
5265 header_next = header->next;
5266 if (header->rdh_ttl <= now) {
5267 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5268 (locktype == isc_rwlocktype_write ||
5269 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5271 * We update the node's status only when we
5272 * can get write access.
5274 locktype = isc_rwlocktype_write;
5277 * We don't check if refcurrent(rbtnode) == 0
5278 * and try to free like we do in cache_find(),
5279 * because refcurrent(rbtnode) must be
5280 * non-zero. This is so because 'node' is an
5281 * argument to the function.
5283 header->attributes |= RDATASET_ATTR_STALE;
5286 } else if (EXISTS(header)) {
5287 if (header->type == matchtype)
5289 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5290 header->type == negtype)
5292 else if (header->type == sigmatchtype)
5296 if (found != NULL) {
5297 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5298 if (foundsig != NULL)
5299 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5303 NODE_UNLOCK(lock, locktype);
5306 return (ISC_R_NOTFOUND);
5308 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5310 * We found a negative cache entry.
5312 if (NXDOMAIN(found))
5313 result = DNS_R_NCACHENXDOMAIN;
5315 result = DNS_R_NCACHENXRRSET;
5322 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5323 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5325 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5326 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5327 rbtdb_version_t *rbtversion = version;
5328 rbtdb_rdatasetiter_t *iterator;
5331 REQUIRE(VALID_RBTDB(rbtdb));
5333 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5334 if (iterator == NULL)
5335 return (ISC_R_NOMEMORY);
5337 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5339 if (rbtversion == NULL)
5341 (dns_dbversion_t **) (void *)(&rbtversion));
5345 isc_refcount_increment(&rbtversion->references,
5351 isc_stdtime_get(&now);
5355 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5356 iterator->common.methods = &rdatasetiter_methods;
5357 iterator->common.db = db;
5358 iterator->common.node = node;
5359 iterator->common.version = (dns_dbversion_t *)rbtversion;
5360 iterator->common.now = now;
5362 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5364 dns_rbtnode_refincrement(rbtnode, &refs);
5367 iterator->current = NULL;
5369 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5371 *iteratorp = (dns_rdatasetiter_t *)iterator;
5373 return (ISC_R_SUCCESS);
5376 static isc_boolean_t
5377 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5378 rdatasetheader_t *header, *header_next;
5379 isc_boolean_t cname, other_data;
5380 dns_rdatatype_t rdtype;
5383 * The caller must hold the node lock.
5387 * Look for CNAME and "other data" rdatasets active in our version.
5390 other_data = ISC_FALSE;
5391 for (header = node->data; header != NULL; header = header_next) {
5392 header_next = header->next;
5393 if (header->type == dns_rdatatype_cname) {
5395 * Look for an active extant CNAME.
5398 if (header->serial <= serial &&
5401 * Is this a "this rdataset doesn't
5404 if (NONEXISTENT(header))
5408 header = header->down;
5409 } while (header != NULL);
5414 * Look for active extant "other data".
5416 * "Other data" is any rdataset whose type is not
5417 * KEY, NSEC, SIG or RRSIG.
5419 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5420 if (rdtype != dns_rdatatype_key &&
5421 rdtype != dns_rdatatype_sig &&
5422 rdtype != dns_rdatatype_nsec &&
5423 rdtype != dns_rdatatype_rrsig) {
5425 * Is it active and extant?
5428 if (header->serial <= serial &&
5431 * Is this a "this rdataset
5432 * doesn't exist" record?
5434 if (NONEXISTENT(header))
5438 header = header->down;
5439 } while (header != NULL);
5441 other_data = ISC_TRUE;
5446 if (cname && other_data)
5453 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5454 isc_result_t result;
5456 INSIST(!IS_CACHE(rbtdb));
5457 INSIST(newheader->heap_index == 0);
5458 INSIST(!ISC_LINK_LINKED(newheader, link));
5460 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5465 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5466 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5467 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5469 rbtdb_changed_t *changed = NULL;
5470 rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5471 unsigned char *merged;
5472 isc_result_t result;
5473 isc_boolean_t header_nx;
5474 isc_boolean_t newheader_nx;
5475 isc_boolean_t merge;
5476 dns_rdatatype_t rdtype, covers;
5477 rbtdb_rdatatype_t negtype, sigtype;
5482 * Add an rdatasetheader_t to a node.
5486 * Caller must be holding the node lock.
5489 if ((options & DNS_DBADD_MERGE) != 0) {
5490 REQUIRE(rbtversion != NULL);
5495 if ((options & DNS_DBADD_FORCE) != 0)
5496 trust = dns_trust_ultimate;
5498 trust = newheader->trust;
5500 if (rbtversion != NULL && !loading) {
5502 * We always add a changed record, even if no changes end up
5503 * being made to this node, because it's harmless and
5504 * simplifies the code.
5506 changed = add_changed(rbtdb, rbtversion, rbtnode);
5507 if (changed == NULL) {
5508 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5509 return (ISC_R_NOMEMORY);
5513 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5514 topheader_prev = NULL;
5517 if (rbtversion == NULL && !newheader_nx) {
5518 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5521 * We're adding a negative cache entry.
5523 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5524 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
5526 for (topheader = rbtnode->data;
5528 topheader = topheader->next) {
5530 * If we're adding an negative cache entry
5531 * which covers all types (NXDOMAIN,
5532 * NODATA(QTYPE=ANY)).
5534 * We make all other data stale so that the
5535 * only rdataset that can be found at this
5536 * node is the negative cache entry.
5538 * Otherwise look for any RRSIGs of the
5539 * given type so they can be marked stale
5542 if (covers == dns_rdatatype_any) {
5543 set_ttl(rbtdb, topheader, 0);
5544 topheader->attributes |=
5545 RDATASET_ATTR_STALE;
5547 } else if (topheader->type == sigtype)
5548 sigheader = topheader;
5550 if (covers == dns_rdatatype_any)
5552 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5555 * We're adding something that isn't a
5556 * negative cache entry. Look for an extant
5557 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5560 for (topheader = rbtnode->data;
5562 topheader = topheader->next) {
5563 if (topheader->type ==
5564 RBTDB_RDATATYPE_NCACHEANY)
5567 if (topheader != NULL && EXISTS(topheader) &&
5568 topheader->rdh_ttl > now) {
5572 if (trust < topheader->trust) {
5574 * The NXDOMAIN/NODATA(QTYPE=ANY)
5577 free_rdataset(rbtdb,
5580 if (addedrdataset != NULL)
5581 bind_rdataset(rbtdb, rbtnode,
5584 return (DNS_R_UNCHANGED);
5587 * The new rdataset is better. Expire the
5588 * NXDOMAIN/NODATA(QTYPE=ANY).
5590 set_ttl(rbtdb, topheader, 0);
5591 topheader->attributes |= RDATASET_ATTR_STALE;
5596 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5600 for (topheader = rbtnode->data;
5602 topheader = topheader->next) {
5603 if (topheader->type == newheader->type ||
5604 topheader->type == negtype)
5606 topheader_prev = topheader;
5611 * If header isn't NULL, we've found the right type. There may be
5612 * IGNORE rdatasets between the top of the chain and the first real
5613 * data. We skip over them.
5616 while (header != NULL && IGNORE(header))
5617 header = header->down;
5618 if (header != NULL) {
5619 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5622 * Deleting an already non-existent rdataset has no effect.
5624 if (header_nx && newheader_nx) {
5625 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5626 return (DNS_R_UNCHANGED);
5630 * Trying to add an rdataset with lower trust to a cache DB
5631 * has no effect, provided that the cache data isn't stale.
5633 if (rbtversion == NULL && trust < header->trust &&
5634 (header->rdh_ttl > now || header_nx)) {
5635 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5636 if (addedrdataset != NULL)
5637 bind_rdataset(rbtdb, rbtnode, header, now,
5639 return (DNS_R_UNCHANGED);
5643 * Don't merge if a nonexistent rdataset is involved.
5645 if (merge && (header_nx || newheader_nx))
5649 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5650 * that is the union of 'newheader' and 'header'.
5653 unsigned int flags = 0;
5654 INSIST(rbtversion->serial >= header->serial);
5656 result = ISC_R_SUCCESS;
5658 if ((options & DNS_DBADD_EXACT) != 0)
5659 flags |= DNS_RDATASLAB_EXACT;
5660 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5661 newheader->rdh_ttl != header->rdh_ttl)
5662 result = DNS_R_NOTEXACT;
5663 else if (newheader->rdh_ttl != header->rdh_ttl)
5664 flags |= DNS_RDATASLAB_FORCE;
5665 if (result == ISC_R_SUCCESS)
5666 result = dns_rdataslab_merge(
5667 (unsigned char *)header,
5668 (unsigned char *)newheader,
5669 (unsigned int)(sizeof(*newheader)),
5671 rbtdb->common.rdclass,
5672 (dns_rdatatype_t)header->type,
5674 if (result == ISC_R_SUCCESS) {
5676 * If 'header' has the same serial number as
5677 * we do, we could clean it up now if we knew
5678 * that our caller had no references to it.
5679 * We don't know this, however, so we leave it
5680 * alone. It will get cleaned up when
5681 * clean_zone_node() runs.
5683 free_rdataset(rbtdb, rbtdb->common.mctx,
5685 newheader = (rdatasetheader_t *)merged;
5686 if (loading && RESIGN(newheader) &&
5688 header->resign < newheader->resign)
5689 newheader->resign = header->resign;
5691 free_rdataset(rbtdb, rbtdb->common.mctx,
5697 * Don't replace existing NS, A and AAAA RRsets
5698 * in the cache if they are already exist. This
5699 * prevents named being locked to old servers.
5700 * Don't lower trust of existing record if the
5703 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5704 header->type == dns_rdatatype_ns &&
5705 !header_nx && !newheader_nx &&
5706 header->trust >= newheader->trust &&
5707 dns_rdataslab_equalx((unsigned char *)header,
5708 (unsigned char *)newheader,
5709 (unsigned int)(sizeof(*newheader)),
5710 rbtdb->common.rdclass,
5711 (dns_rdatatype_t)header->type)) {
5713 * Honour the new ttl if it is less than the
5716 if (header->rdh_ttl > newheader->rdh_ttl)
5717 set_ttl(rbtdb, header, newheader->rdh_ttl);
5718 if (header->noqname == NULL &&
5719 newheader->noqname != NULL) {
5720 header->noqname = newheader->noqname;
5721 newheader->noqname = NULL;
5723 if (header->closest == NULL &&
5724 newheader->closest != NULL) {
5725 header->closest = newheader->closest;
5726 newheader->closest = NULL;
5728 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5729 if (addedrdataset != NULL)
5730 bind_rdataset(rbtdb, rbtnode, header, now,
5732 return (ISC_R_SUCCESS);
5734 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5735 (header->type == dns_rdatatype_a ||
5736 header->type == dns_rdatatype_aaaa) &&
5737 !header_nx && !newheader_nx &&
5738 header->trust >= newheader->trust &&
5739 dns_rdataslab_equal((unsigned char *)header,
5740 (unsigned char *)newheader,
5741 (unsigned int)(sizeof(*newheader)))) {
5743 * Honour the new ttl if it is less than the
5746 if (header->rdh_ttl > newheader->rdh_ttl)
5747 set_ttl(rbtdb, header, newheader->rdh_ttl);
5748 if (header->noqname == NULL &&
5749 newheader->noqname != NULL) {
5750 header->noqname = newheader->noqname;
5751 newheader->noqname = NULL;
5753 if (header->closest == NULL &&
5754 newheader->closest != NULL) {
5755 header->closest = newheader->closest;
5756 newheader->closest = NULL;
5758 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5759 if (addedrdataset != NULL)
5760 bind_rdataset(rbtdb, rbtnode, header, now,
5762 return (ISC_R_SUCCESS);
5764 INSIST(rbtversion == NULL ||
5765 rbtversion->serial >= topheader->serial);
5766 if (topheader_prev != NULL)
5767 topheader_prev->next = newheader;
5769 rbtnode->data = newheader;
5770 newheader->next = topheader->next;
5773 * There are no other references to 'header' when
5774 * loading, so we MAY clean up 'header' now.
5775 * Since we don't generate changed records when
5776 * loading, we MUST clean up 'header' now.
5778 newheader->down = NULL;
5779 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5781 newheader->down = topheader;
5782 topheader->next = newheader;
5784 if (changed != NULL)
5785 changed->dirty = ISC_TRUE;
5786 if (rbtversion == NULL) {
5787 set_ttl(rbtdb, header, 0);
5788 header->attributes |= RDATASET_ATTR_STALE;
5789 if (sigheader != NULL) {
5790 set_ttl(rbtdb, sigheader, 0);
5791 sigheader->attributes |=
5792 RDATASET_ATTR_STALE;
5795 idx = newheader->node->locknum;
5796 if (IS_CACHE(rbtdb)) {
5797 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5800 * XXXMLG We don't check the return value
5801 * here. If it fails, we will not do TTL
5802 * based expiry on this node. However, we
5803 * will do it on the LRU side, so memory
5804 * will not leak... for long.
5806 isc_heap_insert(rbtdb->heaps[idx], newheader);
5807 } else if (RESIGN(newheader))
5808 resign_insert(rbtdb, idx, newheader);
5812 * No non-IGNORED rdatasets of the given type exist at
5817 * If we're trying to delete the type, don't bother.
5820 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5821 return (DNS_R_UNCHANGED);
5824 if (topheader != NULL) {
5826 * We have an list of rdatasets of the given type,
5827 * but they're all marked IGNORE. We simply insert
5828 * the new rdataset at the head of the list.
5830 * Ignored rdatasets cannot occur during loading, so
5834 INSIST(rbtversion == NULL ||
5835 rbtversion->serial >= topheader->serial);
5836 if (topheader_prev != NULL)
5837 topheader_prev->next = newheader;
5839 rbtnode->data = newheader;
5840 newheader->next = topheader->next;
5841 newheader->down = topheader;
5842 topheader->next = newheader;
5844 if (changed != NULL)
5845 changed->dirty = ISC_TRUE;
5848 * No rdatasets of the given type exist at the node.
5850 newheader->next = rbtnode->data;
5851 newheader->down = NULL;
5852 rbtnode->data = newheader;
5854 idx = newheader->node->locknum;
5855 if (IS_CACHE(rbtdb)) {
5856 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5858 isc_heap_insert(rbtdb->heaps[idx], newheader);
5859 } else if (RESIGN(newheader)) {
5860 resign_insert(rbtdb, idx, newheader);
5865 * Check if the node now contains CNAME and other data.
5867 if (rbtversion != NULL &&
5868 cname_and_other_data(rbtnode, rbtversion->serial))
5869 return (DNS_R_CNAMEANDOTHER);
5871 if (addedrdataset != NULL)
5872 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5874 return (ISC_R_SUCCESS);
5877 static inline isc_boolean_t
5878 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5879 rbtdb_rdatatype_t type)
5881 if (IS_CACHE(rbtdb)) {
5882 if (type == dns_rdatatype_dname)
5886 } else if (type == dns_rdatatype_dname ||
5887 (type == dns_rdatatype_ns &&
5888 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5893 static inline isc_result_t
5894 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5895 dns_rdataset_t *rdataset)
5897 struct noqname *noqname;
5898 isc_mem_t *mctx = rbtdb->common.mctx;
5900 dns_rdataset_t neg, negsig;
5901 isc_result_t result;
5904 dns_name_init(&name, NULL);
5905 dns_rdataset_init(&neg);
5906 dns_rdataset_init(&negsig);
5908 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5909 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5911 noqname = isc_mem_get(mctx, sizeof(*noqname));
5912 if (noqname == NULL) {
5913 result = ISC_R_NOMEMORY;
5916 dns_name_init(&noqname->name, NULL);
5917 noqname->neg = NULL;
5918 noqname->negsig = NULL;
5919 noqname->type = neg.type;
5920 result = dns_name_dup(&name, mctx, &noqname->name);
5921 if (result != ISC_R_SUCCESS)
5923 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5924 if (result != ISC_R_SUCCESS)
5926 noqname->neg = r.base;
5927 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5928 if (result != ISC_R_SUCCESS)
5930 noqname->negsig = r.base;
5931 dns_rdataset_disassociate(&neg);
5932 dns_rdataset_disassociate(&negsig);
5933 newheader->noqname = noqname;
5934 return (ISC_R_SUCCESS);
5937 dns_rdataset_disassociate(&neg);
5938 dns_rdataset_disassociate(&negsig);
5939 free_noqname(mctx, &noqname);
5943 static inline isc_result_t
5944 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5945 dns_rdataset_t *rdataset)
5947 struct noqname *closest;
5948 isc_mem_t *mctx = rbtdb->common.mctx;
5950 dns_rdataset_t neg, negsig;
5951 isc_result_t result;
5954 dns_name_init(&name, NULL);
5955 dns_rdataset_init(&neg);
5956 dns_rdataset_init(&negsig);
5958 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5959 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5961 closest = isc_mem_get(mctx, sizeof(*closest));
5962 if (closest == NULL) {
5963 result = ISC_R_NOMEMORY;
5966 dns_name_init(&closest->name, NULL);
5967 closest->neg = NULL;
5968 closest->negsig = NULL;
5969 closest->type = neg.type;
5970 result = dns_name_dup(&name, mctx, &closest->name);
5971 if (result != ISC_R_SUCCESS)
5973 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5974 if (result != ISC_R_SUCCESS)
5976 closest->neg = r.base;
5977 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5978 if (result != ISC_R_SUCCESS)
5980 closest->negsig = r.base;
5981 dns_rdataset_disassociate(&neg);
5982 dns_rdataset_disassociate(&negsig);
5983 newheader->closest = closest;
5984 return (ISC_R_SUCCESS);
5987 dns_rdataset_disassociate(&neg);
5988 dns_rdataset_disassociate(&negsig);
5989 free_noqname(mctx, &closest);
5993 static dns_dbmethods_t zone_methods;
5996 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5997 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5998 dns_rdataset_t *addedrdataset)
6000 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6001 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6002 rbtdb_version_t *rbtversion = version;
6003 isc_region_t region;
6004 rdatasetheader_t *newheader;
6005 rdatasetheader_t *header;
6006 isc_result_t result;
6007 isc_boolean_t delegating;
6008 isc_boolean_t tree_locked = ISC_FALSE;
6009 isc_boolean_t cache_is_overmem = ISC_FALSE;
6011 REQUIRE(VALID_RBTDB(rbtdb));
6013 if (rbtdb->common.methods == &zone_methods)
6014 REQUIRE(((rbtnode->nsec3 &&
6015 (rdataset->type == dns_rdatatype_nsec3 ||
6016 rdataset->covers == dns_rdatatype_nsec3)) ||
6018 rdataset->type != dns_rdatatype_nsec3 &&
6019 rdataset->covers != dns_rdatatype_nsec3)));
6021 if (rbtversion == NULL) {
6023 isc_stdtime_get(&now);
6027 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6029 sizeof(rdatasetheader_t));
6030 if (result != ISC_R_SUCCESS)
6033 newheader = (rdatasetheader_t *)region.base;
6034 init_rdataset(rbtdb, newheader);
6035 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6036 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6038 newheader->attributes = 0;
6039 newheader->noqname = NULL;
6040 newheader->closest = NULL;
6041 newheader->count = init_count++;
6042 newheader->trust = rdataset->trust;
6043 newheader->additional_auth = NULL;
6044 newheader->additional_glue = NULL;
6045 newheader->last_used = now;
6046 newheader->node = rbtnode;
6047 if (rbtversion != NULL) {
6048 newheader->serial = rbtversion->serial;
6051 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6052 newheader->attributes |= RDATASET_ATTR_RESIGN;
6053 newheader->resign = rdataset->resign;
6055 newheader->resign = 0;
6057 newheader->serial = 1;
6058 newheader->resign = 0;
6059 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6060 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6061 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6062 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6063 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6064 result = addnoqname(rbtdb, newheader, rdataset);
6065 if (result != ISC_R_SUCCESS) {
6066 free_rdataset(rbtdb, rbtdb->common.mctx,
6071 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6072 result = addclosest(rbtdb, newheader, rdataset);
6073 if (result != ISC_R_SUCCESS) {
6074 free_rdataset(rbtdb, rbtdb->common.mctx,
6082 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6083 * just DNAME for the cache), then we need to set the callback bit
6086 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6087 delegating = ISC_TRUE;
6089 delegating = ISC_FALSE;
6092 * If we're adding a delegation type or the DB is a cache in an overmem
6093 * state, hold an exclusive lock on the tree. In the latter case
6094 * the lock does not necessarily have to be acquired but it will help
6095 * purge stale entries more effectively.
6097 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6098 cache_is_overmem = ISC_TRUE;
6099 if (delegating || cache_is_overmem) {
6100 tree_locked = ISC_TRUE;
6101 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6104 if (cache_is_overmem)
6105 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6107 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6108 isc_rwlocktype_write);
6110 if (rbtdb->rrsetstats != NULL) {
6111 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6112 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6115 if (IS_CACHE(rbtdb)) {
6117 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6119 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6120 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6121 expire_header(rbtdb, header, tree_locked);
6124 * If we've been holding a write lock on the tree just for
6125 * cleaning, we can release it now. However, we still need the
6128 if (tree_locked && !delegating) {
6129 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6130 tree_locked = ISC_FALSE;
6134 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6135 addedrdataset, now);
6136 if (result == ISC_R_SUCCESS && delegating)
6137 rbtnode->find_callback = 1;
6139 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6140 isc_rwlocktype_write);
6143 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6146 * Update the zone's secure status. If version is non-NULL
6147 * this is deferred until closeversion() is called.
6149 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6150 iszonesecure(db, version, rbtdb->origin_node);
6156 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6157 dns_rdataset_t *rdataset, unsigned int options,
6158 dns_rdataset_t *newrdataset)
6160 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6161 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6162 rbtdb_version_t *rbtversion = version;
6163 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6164 unsigned char *subresult;
6165 isc_region_t region;
6166 isc_result_t result;
6167 rbtdb_changed_t *changed;
6169 REQUIRE(VALID_RBTDB(rbtdb));
6171 if (rbtdb->common.methods == &zone_methods)
6172 REQUIRE(((rbtnode->nsec3 &&
6173 (rdataset->type == dns_rdatatype_nsec3 ||
6174 rdataset->covers == dns_rdatatype_nsec3)) ||
6176 rdataset->type != dns_rdatatype_nsec3 &&
6177 rdataset->covers != dns_rdatatype_nsec3)));
6179 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6181 sizeof(rdatasetheader_t));
6182 if (result != ISC_R_SUCCESS)
6184 newheader = (rdatasetheader_t *)region.base;
6185 init_rdataset(rbtdb, newheader);
6186 set_ttl(rbtdb, newheader, rdataset->ttl);
6187 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6189 newheader->attributes = 0;
6190 newheader->serial = rbtversion->serial;
6191 newheader->trust = 0;
6192 newheader->noqname = NULL;
6193 newheader->closest = NULL;
6194 newheader->count = init_count++;
6195 newheader->additional_auth = NULL;
6196 newheader->additional_glue = NULL;
6197 newheader->last_used = 0;
6198 newheader->node = rbtnode;
6199 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6200 newheader->attributes |= RDATASET_ATTR_RESIGN;
6201 newheader->resign = rdataset->resign;
6203 newheader->resign = 0;
6205 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6206 isc_rwlocktype_write);
6208 changed = add_changed(rbtdb, rbtversion, rbtnode);
6209 if (changed == NULL) {
6210 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6211 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6212 isc_rwlocktype_write);
6213 return (ISC_R_NOMEMORY);
6216 topheader_prev = NULL;
6217 for (topheader = rbtnode->data;
6219 topheader = topheader->next) {
6220 if (topheader->type == newheader->type)
6222 topheader_prev = topheader;
6225 * If header isn't NULL, we've found the right type. There may be
6226 * IGNORE rdatasets between the top of the chain and the first real
6227 * data. We skip over them.
6230 while (header != NULL && IGNORE(header))
6231 header = header->down;
6232 if (header != NULL && EXISTS(header)) {
6233 unsigned int flags = 0;
6235 result = ISC_R_SUCCESS;
6236 if ((options & DNS_DBSUB_EXACT) != 0) {
6237 flags |= DNS_RDATASLAB_EXACT;
6238 if (newheader->rdh_ttl != header->rdh_ttl)
6239 result = DNS_R_NOTEXACT;
6241 if (result == ISC_R_SUCCESS)
6242 result = dns_rdataslab_subtract(
6243 (unsigned char *)header,
6244 (unsigned char *)newheader,
6245 (unsigned int)(sizeof(*newheader)),
6247 rbtdb->common.rdclass,
6248 (dns_rdatatype_t)header->type,
6250 if (result == ISC_R_SUCCESS) {
6251 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6252 newheader = (rdatasetheader_t *)subresult;
6253 init_rdataset(rbtdb, newheader);
6255 * We have to set the serial since the rdataslab
6256 * subtraction routine copies the reserved portion of
6257 * header, not newheader.
6259 newheader->serial = rbtversion->serial;
6261 * XXXJT: dns_rdataslab_subtract() copied the pointers
6262 * to additional info. We need to clear these fields
6263 * to avoid having duplicated references.
6265 newheader->additional_auth = NULL;
6266 newheader->additional_glue = NULL;
6267 } else if (result == DNS_R_NXRRSET) {
6269 * This subtraction would remove all of the rdata;
6270 * add a nonexistent header instead.
6272 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6273 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6274 if (newheader == NULL) {
6275 result = ISC_R_NOMEMORY;
6278 set_ttl(rbtdb, newheader, 0);
6279 newheader->type = topheader->type;
6280 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6281 newheader->trust = 0;
6282 newheader->serial = rbtversion->serial;
6283 newheader->noqname = NULL;
6284 newheader->closest = NULL;
6285 newheader->count = 0;
6286 newheader->additional_auth = NULL;
6287 newheader->additional_glue = NULL;
6288 newheader->node = rbtnode;
6289 newheader->resign = 0;
6290 newheader->last_used = 0;
6292 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6297 * If we're here, we want to link newheader in front of
6300 INSIST(rbtversion->serial >= topheader->serial);
6301 if (topheader_prev != NULL)
6302 topheader_prev->next = newheader;
6304 rbtnode->data = newheader;
6305 newheader->next = topheader->next;
6306 newheader->down = topheader;
6307 topheader->next = newheader;
6309 changed->dirty = ISC_TRUE;
6312 * The rdataset doesn't exist, so we don't need to do anything
6313 * to satisfy the deletion request.
6315 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6316 if ((options & DNS_DBSUB_EXACT) != 0)
6317 result = DNS_R_NOTEXACT;
6319 result = DNS_R_UNCHANGED;
6322 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6323 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6326 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6327 isc_rwlocktype_write);
6330 * Update the zone's secure status. If version is non-NULL
6331 * this is deferred until closeversion() is called.
6333 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6334 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6340 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6341 dns_rdatatype_t type, dns_rdatatype_t covers)
6343 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6344 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6345 rbtdb_version_t *rbtversion = version;
6346 isc_result_t result;
6347 rdatasetheader_t *newheader;
6349 REQUIRE(VALID_RBTDB(rbtdb));
6351 if (type == dns_rdatatype_any)
6352 return (ISC_R_NOTIMPLEMENTED);
6353 if (type == dns_rdatatype_rrsig && covers == 0)
6354 return (ISC_R_NOTIMPLEMENTED);
6356 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6357 if (newheader == NULL)
6358 return (ISC_R_NOMEMORY);
6359 set_ttl(rbtdb, newheader, 0);
6360 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6361 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6362 newheader->trust = 0;
6363 newheader->noqname = NULL;
6364 newheader->closest = NULL;
6365 newheader->additional_auth = NULL;
6366 newheader->additional_glue = NULL;
6367 if (rbtversion != NULL)
6368 newheader->serial = rbtversion->serial;
6370 newheader->serial = 0;
6371 newheader->count = 0;
6372 newheader->last_used = 0;
6373 newheader->node = rbtnode;
6375 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6376 isc_rwlocktype_write);
6378 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6379 ISC_FALSE, NULL, 0);
6381 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6382 isc_rwlocktype_write);
6385 * Update the zone's secure status. If version is non-NULL
6386 * this is deferred until closeversion() is called.
6388 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6389 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6395 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6396 rbtdb_load_t *loadctx = arg;
6397 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6398 dns_rbtnode_t *node;
6399 isc_result_t result;
6400 isc_region_t region;
6401 rdatasetheader_t *newheader;
6404 * This routine does no node locking. See comments in
6405 * 'load' below for more information on loading and
6411 * SOA records are only allowed at top of zone.
6413 if (rdataset->type == dns_rdatatype_soa &&
6414 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6415 return (DNS_R_NOTZONETOP);
6417 if (rdataset->type != dns_rdatatype_nsec3 &&
6418 rdataset->covers != dns_rdatatype_nsec3)
6419 add_empty_wildcards(rbtdb, name);
6421 if (dns_name_iswildcard(name)) {
6423 * NS record owners cannot legally be wild cards.
6425 if (rdataset->type == dns_rdatatype_ns)
6426 return (DNS_R_INVALIDNS);
6428 * NSEC3 record owners cannot legally be wild cards.
6430 if (rdataset->type == dns_rdatatype_nsec3)
6431 return (DNS_R_INVALIDNSEC3);
6432 result = add_wildcard_magic(rbtdb, name);
6433 if (result != ISC_R_SUCCESS)
6438 if (rdataset->type == dns_rdatatype_nsec3 ||
6439 rdataset->covers == dns_rdatatype_nsec3) {
6440 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6441 if (result == ISC_R_SUCCESS)
6444 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6445 if (result == ISC_R_SUCCESS)
6448 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6450 if (result != ISC_R_EXISTS) {
6451 dns_name_t foundname;
6452 dns_name_init(&foundname, NULL);
6453 dns_rbt_namefromnode(node, &foundname);
6454 #ifdef DNS_RBT_USEHASH
6455 node->locknum = node->hashval % rbtdb->node_lock_count;
6457 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6458 rbtdb->node_lock_count;
6462 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6464 sizeof(rdatasetheader_t));
6465 if (result != ISC_R_SUCCESS)
6467 newheader = (rdatasetheader_t *)region.base;
6468 init_rdataset(rbtdb, newheader);
6469 set_ttl(rbtdb, newheader,
6470 rdataset->ttl + loadctx->now); /* XXX overflow check */
6471 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6473 newheader->attributes = 0;
6474 newheader->trust = rdataset->trust;
6475 newheader->serial = 1;
6476 newheader->noqname = NULL;
6477 newheader->closest = NULL;
6478 newheader->count = init_count++;
6479 newheader->additional_auth = NULL;
6480 newheader->additional_glue = NULL;
6481 newheader->last_used = 0;
6482 newheader->node = node;
6483 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6484 newheader->attributes |= RDATASET_ATTR_RESIGN;
6485 newheader->resign = rdataset->resign;
6487 newheader->resign = 0;
6489 result = add(rbtdb, node, rbtdb->current_version, newheader,
6490 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6491 if (result == ISC_R_SUCCESS &&
6492 delegating_type(rbtdb, node, rdataset->type))
6493 node->find_callback = 1;
6494 else if (result == DNS_R_UNCHANGED)
6495 result = ISC_R_SUCCESS;
6501 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6502 rbtdb_load_t *loadctx;
6505 rbtdb = (dns_rbtdb_t *)db;
6507 REQUIRE(VALID_RBTDB(rbtdb));
6509 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6510 if (loadctx == NULL)
6511 return (ISC_R_NOMEMORY);
6513 loadctx->rbtdb = rbtdb;
6514 if (IS_CACHE(rbtdb))
6515 isc_stdtime_get(&loadctx->now);
6519 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6521 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6523 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6525 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6527 *addp = loading_addrdataset;
6530 return (ISC_R_SUCCESS);
6534 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6535 rbtdb_load_t *loadctx;
6536 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6538 REQUIRE(VALID_RBTDB(rbtdb));
6539 REQUIRE(dbloadp != NULL);
6541 REQUIRE(loadctx->rbtdb == rbtdb);
6543 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6545 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6546 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6548 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6549 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6551 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6554 * If there's a KEY rdataset at the zone origin containing a
6555 * zone key, we consider the zone secure.
6557 if (! IS_CACHE(rbtdb))
6558 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6562 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6564 return (ISC_R_SUCCESS);
6568 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6569 dns_masterformat_t masterformat) {
6572 rbtdb = (dns_rbtdb_t *)db;
6574 REQUIRE(VALID_RBTDB(rbtdb));
6576 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6577 &dns_master_style_default,
6578 filename, masterformat));
6582 delete_callback(void *data, void *arg) {
6583 dns_rbtdb_t *rbtdb = arg;
6584 rdatasetheader_t *current, *next;
6585 unsigned int locknum;
6588 locknum = current->node->locknum;
6589 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6590 while (current != NULL) {
6591 next = current->next;
6592 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6595 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6598 static isc_boolean_t
6599 issecure(dns_db_t *db) {
6601 isc_boolean_t secure;
6603 rbtdb = (dns_rbtdb_t *)db;
6605 REQUIRE(VALID_RBTDB(rbtdb));
6607 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6608 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6609 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6614 static isc_boolean_t
6615 isdnssec(dns_db_t *db) {
6617 isc_boolean_t dnssec;
6619 rbtdb = (dns_rbtdb_t *)db;
6621 REQUIRE(VALID_RBTDB(rbtdb));
6623 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6624 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6625 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6631 nodecount(dns_db_t *db) {
6635 rbtdb = (dns_rbtdb_t *)db;
6637 REQUIRE(VALID_RBTDB(rbtdb));
6639 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6640 count = dns_rbt_nodecount(rbtdb->tree);
6641 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6647 settask(dns_db_t *db, isc_task_t *task) {
6650 rbtdb = (dns_rbtdb_t *)db;
6652 REQUIRE(VALID_RBTDB(rbtdb));
6654 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6655 if (rbtdb->task != NULL)
6656 isc_task_detach(&rbtdb->task);
6658 isc_task_attach(task, &rbtdb->task);
6659 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6662 static isc_boolean_t
6663 ispersistent(dns_db_t *db) {
6669 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6670 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6671 dns_rbtnode_t *onode;
6672 isc_result_t result = ISC_R_SUCCESS;
6674 REQUIRE(VALID_RBTDB(rbtdb));
6675 REQUIRE(nodep != NULL && *nodep == NULL);
6677 /* Note that the access to origin_node doesn't require a DB lock */
6678 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6679 if (onode != NULL) {
6680 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6681 new_reference(rbtdb, onode);
6682 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6684 *nodep = rbtdb->origin_node;
6686 INSIST(IS_CACHE(rbtdb));
6687 result = ISC_R_NOTFOUND;
6694 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6695 isc_uint8_t *flags, isc_uint16_t *iterations,
6696 unsigned char *salt, size_t *salt_length)
6699 isc_result_t result = ISC_R_NOTFOUND;
6700 rbtdb_version_t *rbtversion = version;
6702 rbtdb = (dns_rbtdb_t *)db;
6704 REQUIRE(VALID_RBTDB(rbtdb));
6706 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6708 if (rbtversion == NULL)
6709 rbtversion = rbtdb->current_version;
6711 if (rbtversion->havensec3) {
6713 *hash = rbtversion->hash;
6714 if (salt != NULL && salt_length != NULL) {
6715 REQUIRE(*salt_length >= rbtversion->salt_length);
6716 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6718 if (salt_length != NULL)
6719 *salt_length = rbtversion->salt_length;
6720 if (iterations != NULL)
6721 *iterations = rbtversion->iterations;
6723 *flags = rbtversion->flags;
6724 result = ISC_R_SUCCESS;
6726 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6732 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6733 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6734 isc_stdtime_t oldresign;
6735 isc_result_t result = ISC_R_SUCCESS;
6736 rdatasetheader_t *header;
6738 REQUIRE(VALID_RBTDB(rbtdb));
6739 REQUIRE(!IS_CACHE(rbtdb));
6740 REQUIRE(rdataset != NULL);
6742 header = rdataset->private3;
6745 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6746 isc_rwlocktype_write);
6748 oldresign = header->resign;
6749 header->resign = resign;
6750 if (header->heap_index != 0) {
6751 INSIST(RESIGN(header));
6753 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6754 header->heap_index);
6755 header->heap_index = 0;
6756 } else if (resign < oldresign)
6757 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6758 header->heap_index);
6760 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6761 header->heap_index);
6762 } else if (resign && header->heap_index == 0) {
6763 header->attributes |= RDATASET_ATTR_RESIGN;
6764 result = resign_insert(rbtdb, header->node->locknum, header);
6766 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6767 isc_rwlocktype_write);
6772 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6773 dns_name_t *foundname)
6775 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6776 rdatasetheader_t *header = NULL, *this;
6778 isc_result_t result = ISC_R_NOTFOUND;
6779 unsigned int locknum;
6781 REQUIRE(VALID_RBTDB(rbtdb));
6783 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6785 for (i = 0; i < rbtdb->node_lock_count; i++) {
6786 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6787 this = isc_heap_element(rbtdb->heaps[i], 1);
6789 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6790 isc_rwlocktype_read);
6795 else if (isc_serial_lt(this->resign, header->resign)) {
6796 locknum = header->node->locknum;
6797 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6798 isc_rwlocktype_read);
6801 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6802 isc_rwlocktype_read);
6808 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6810 if (foundname != NULL)
6811 dns_rbt_fullnamefromnode(header->node, foundname);
6813 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6814 isc_rwlocktype_read);
6816 result = ISC_R_SUCCESS;
6819 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6825 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6827 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6828 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6829 dns_rbtnode_t *node;
6830 rdatasetheader_t *header;
6832 REQUIRE(VALID_RBTDB(rbtdb));
6833 REQUIRE(rdataset != NULL);
6834 REQUIRE(rbtdb->future_version == rbtversion);
6835 REQUIRE(rbtversion->writer);
6837 node = rdataset->private2;
6838 header = rdataset->private3;
6841 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6842 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6843 isc_rwlocktype_write);
6845 * Delete from heap and save to re-signed list so that it can
6846 * be restored if we backout of this change.
6848 new_reference(rbtdb, node);
6849 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6850 header->heap_index = 0;
6851 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6853 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6854 isc_rwlocktype_write);
6855 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6858 static dns_stats_t *
6859 getrrsetstats(dns_db_t *db) {
6860 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6862 REQUIRE(VALID_RBTDB(rbtdb));
6863 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6865 return (rbtdb->rrsetstats);
6868 static dns_dbmethods_t zone_methods = {
6907 static dns_dbmethods_t cache_methods = {
6947 #ifdef DNS_RBTDB_VERSION64
6952 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6953 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6954 void *driverarg, dns_db_t **dbp)
6957 isc_result_t result;
6960 isc_boolean_t (*sooner)(void *, void *);
6962 /* Keep the compiler happy. */
6967 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6969 return (ISC_R_NOMEMORY);
6971 memset(rbtdb, '\0', sizeof(*rbtdb));
6972 dns_name_init(&rbtdb->common.origin, NULL);
6973 rbtdb->common.attributes = 0;
6974 if (type == dns_dbtype_cache) {
6975 rbtdb->common.methods = &cache_methods;
6976 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6977 } else if (type == dns_dbtype_stub) {
6978 rbtdb->common.methods = &zone_methods;
6979 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6981 rbtdb->common.methods = &zone_methods;
6982 rbtdb->common.rdclass = rdclass;
6983 rbtdb->common.mctx = NULL;
6985 result = RBTDB_INITLOCK(&rbtdb->lock);
6986 if (result != ISC_R_SUCCESS)
6989 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6990 if (result != ISC_R_SUCCESS)
6994 * Initialize node_lock_count in a generic way to support future
6995 * extension which allows the user to specify this value on creation.
6996 * Note that when specified for a cache DB it must be larger than 1
6997 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6999 if (rbtdb->node_lock_count == 0) {
7000 if (IS_CACHE(rbtdb))
7001 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7003 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7004 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7005 result = ISC_R_RANGE;
7006 goto cleanup_tree_lock;
7008 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7009 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7010 sizeof(rbtdb_nodelock_t));
7011 if (rbtdb->node_locks == NULL) {
7012 result = ISC_R_NOMEMORY;
7013 goto cleanup_tree_lock;
7016 rbtdb->rrsetstats = NULL;
7017 if (IS_CACHE(rbtdb)) {
7018 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7019 if (result != ISC_R_SUCCESS)
7020 goto cleanup_node_locks;
7021 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7022 sizeof(rdatasetheaderlist_t));
7023 if (rbtdb->rdatasets == NULL) {
7024 result = ISC_R_NOMEMORY;
7025 goto cleanup_rrsetstats;
7027 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7028 ISC_LIST_INIT(rbtdb->rdatasets[i]);
7030 rbtdb->rdatasets = NULL;
7035 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
7036 sizeof(isc_heap_t *));
7037 if (rbtdb->heaps == NULL) {
7038 result = ISC_R_NOMEMORY;
7039 goto cleanup_rdatasets;
7041 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7042 rbtdb->heaps[i] = NULL;
7043 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7044 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7045 result = isc_heap_create(mctx, sooner, set_index, 0,
7047 if (result != ISC_R_SUCCESS)
7052 * Create deadnode lists.
7054 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7055 sizeof(rbtnodelist_t));
7056 if (rbtdb->deadnodes == NULL) {
7057 result = ISC_R_NOMEMORY;
7060 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7061 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7063 rbtdb->active = rbtdb->node_lock_count;
7065 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7066 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7067 if (result == ISC_R_SUCCESS) {
7068 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7069 if (result != ISC_R_SUCCESS)
7070 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7072 if (result != ISC_R_SUCCESS) {
7074 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7075 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7076 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7078 goto cleanup_deadnodes;
7080 rbtdb->node_locks[i].exiting = ISC_FALSE;
7084 * Attach to the mctx. The database will persist so long as there
7085 * are references to it, and attaching to the mctx ensures that our
7086 * mctx won't disappear out from under us.
7088 isc_mem_attach(mctx, &rbtdb->common.mctx);
7091 * Must be initialized before free_rbtdb() is called.
7093 isc_ondestroy_init(&rbtdb->common.ondest);
7096 * Make a copy of the origin name.
7098 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7099 if (result != ISC_R_SUCCESS) {
7100 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7105 * Make the Red-Black Trees.
7107 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7108 if (result != ISC_R_SUCCESS) {
7109 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7113 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7114 if (result != ISC_R_SUCCESS) {
7115 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7120 * In order to set the node callback bit correctly in zone databases,
7121 * we need to know if the node has the origin name of the zone.
7122 * In loading_addrdataset() we could simply compare the new name
7123 * to the origin name, but this is expensive. Also, we don't know the
7124 * node name in addrdataset(), so we need another way of knowing the
7127 * We now explicitly create a node for the zone's origin, and then
7128 * we simply remember the node's address. This is safe, because
7129 * the top-of-zone node can never be deleted, nor can its address
7132 if (!IS_CACHE(rbtdb)) {
7133 dns_rbtnode_t *nsec3node;
7135 rbtdb->origin_node = NULL;
7136 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7137 &rbtdb->origin_node);
7138 if (result != ISC_R_SUCCESS) {
7139 INSIST(result != ISC_R_EXISTS);
7140 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7143 rbtdb->origin_node->nsec3 = 0;
7145 * We need to give the origin node the right locknum.
7147 dns_name_init(&name, NULL);
7148 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7149 #ifdef DNS_RBT_USEHASH
7150 rbtdb->origin_node->locknum =
7151 rbtdb->origin_node->hashval %
7152 rbtdb->node_lock_count;
7154 rbtdb->origin_node->locknum =
7155 dns_name_hash(&name, ISC_TRUE) %
7156 rbtdb->node_lock_count;
7159 * Add an apex node to the NSEC3 tree so that NSEC3 searches
7160 * return partial matches when there is only a single NSEC3
7161 * record in the tree.
7164 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7166 if (result != ISC_R_SUCCESS) {
7167 INSIST(result != ISC_R_EXISTS);
7168 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7171 nsec3node->nsec3 = 1;
7173 * We need to give the nsec3 origin node the right locknum.
7175 dns_name_init(&name, NULL);
7176 dns_rbt_namefromnode(nsec3node, &name);
7177 #ifdef DNS_RBT_USEHASH
7178 nsec3node->locknum = nsec3node->hashval %
7179 rbtdb->node_lock_count;
7181 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7182 rbtdb->node_lock_count;
7187 * Misc. Initialization.
7189 result = isc_refcount_init(&rbtdb->references, 1);
7190 if (result != ISC_R_SUCCESS) {
7191 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7194 rbtdb->attributes = 0;
7198 * Version Initialization.
7200 rbtdb->current_serial = 1;
7201 rbtdb->least_serial = 1;
7202 rbtdb->next_serial = 2;
7203 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7204 if (rbtdb->current_version == NULL) {
7205 isc_refcount_decrement(&rbtdb->references, NULL);
7206 isc_refcount_destroy(&rbtdb->references);
7207 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7208 return (ISC_R_NOMEMORY);
7210 rbtdb->current_version->secure = dns_db_insecure;
7211 rbtdb->current_version->havensec3 = ISC_FALSE;
7212 rbtdb->current_version->flags = 0;
7213 rbtdb->current_version->iterations = 0;
7214 rbtdb->current_version->hash = 0;
7215 rbtdb->current_version->salt_length = 0;
7216 memset(rbtdb->current_version->salt, 0,
7217 sizeof(rbtdb->current_version->salt));
7218 rbtdb->future_version = NULL;
7219 ISC_LIST_INIT(rbtdb->open_versions);
7221 * Keep the current version in the open list so that list operation
7222 * won't happen in normal lookup operations.
7224 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7226 rbtdb->common.magic = DNS_DB_MAGIC;
7227 rbtdb->common.impmagic = RBTDB_MAGIC;
7229 *dbp = (dns_db_t *)rbtdb;
7231 return (ISC_R_SUCCESS);
7234 isc_mem_put(mctx, rbtdb->deadnodes,
7235 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7238 if (rbtdb->heaps != NULL) {
7239 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7240 if (rbtdb->heaps[i] != NULL)
7241 isc_heap_destroy(&rbtdb->heaps[i]);
7242 isc_mem_put(mctx, rbtdb->heaps,
7243 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7247 if (rbtdb->rdatasets != NULL)
7248 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7249 sizeof(rdatasetheaderlist_t));
7251 if (rbtdb->rrsetstats != NULL)
7252 dns_stats_detach(&rbtdb->rrsetstats);
7255 isc_mem_put(mctx, rbtdb->node_locks,
7256 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7259 isc_rwlock_destroy(&rbtdb->tree_lock);
7262 RBTDB_DESTROYLOCK(&rbtdb->lock);
7265 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7271 * Slabbed Rdataset Methods
7275 rdataset_disassociate(dns_rdataset_t *rdataset) {
7276 dns_db_t *db = rdataset->private1;
7277 dns_dbnode_t *node = rdataset->private2;
7279 detachnode(db, &node);
7283 rdataset_first(dns_rdataset_t *rdataset) {
7284 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7287 count = raw[0] * 256 + raw[1];
7289 rdataset->private5 = NULL;
7290 return (ISC_R_NOMORE);
7293 #if DNS_RDATASET_FIXED
7294 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7295 raw += 2 + (4 * count);
7301 * The privateuint4 field is the number of rdata beyond the
7302 * cursor position, so we decrement the total count by one
7303 * before storing it.
7305 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7306 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7307 * to the first entry in the offset table.
7310 rdataset->privateuint4 = count;
7311 rdataset->private5 = raw;
7313 return (ISC_R_SUCCESS);
7317 rdataset_next(dns_rdataset_t *rdataset) {
7319 unsigned int length;
7320 unsigned char *raw; /* RDATASLAB */
7322 count = rdataset->privateuint4;
7324 return (ISC_R_NOMORE);
7326 rdataset->privateuint4 = count;
7329 * Skip forward one record (length + 4) or one offset (4).
7331 raw = rdataset->private5;
7332 #if DNS_RDATASET_FIXED
7333 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7335 length = raw[0] * 256 + raw[1];
7337 #if DNS_RDATASET_FIXED
7339 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7341 rdataset->private5 = raw + 2; /* length(2) */
7344 return (ISC_R_SUCCESS);
7348 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7349 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7350 #if DNS_RDATASET_FIXED
7351 unsigned int offset;
7353 unsigned int length;
7355 unsigned int flags = 0;
7357 REQUIRE(raw != NULL);
7360 * Find the start of the record if not already in private5
7361 * then skip the length and order fields.
7363 #if DNS_RDATASET_FIXED
7364 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7365 offset = (raw[0] << 24) + (raw[1] << 16) +
7366 (raw[2] << 8) + raw[3];
7367 raw = rdataset->private3;
7371 length = raw[0] * 256 + raw[1];
7372 #if DNS_RDATASET_FIXED
7377 if (rdataset->type == dns_rdatatype_rrsig) {
7378 if (*raw & DNS_RDATASLAB_OFFLINE)
7379 flags |= DNS_RDATA_OFFLINE;
7385 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7386 rdata->flags |= flags;
7390 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7391 dns_db_t *db = source->private1;
7392 dns_dbnode_t *node = source->private2;
7393 dns_dbnode_t *cloned_node = NULL;
7395 attachnode(db, node, &cloned_node);
7399 * Reset iterator state.
7401 target->privateuint4 = 0;
7402 target->private5 = NULL;
7406 rdataset_count(dns_rdataset_t *rdataset) {
7407 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7410 count = raw[0] * 256 + raw[1];
7416 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7417 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7419 dns_db_t *db = rdataset->private1;
7420 dns_dbnode_t *node = rdataset->private2;
7421 dns_dbnode_t *cloned_node;
7422 struct noqname *noqname = rdataset->private6;
7425 attachnode(db, node, &cloned_node);
7426 nsec->methods = &rdataset_methods;
7427 nsec->rdclass = db->rdclass;
7428 nsec->type = noqname->type;
7430 nsec->ttl = rdataset->ttl;
7431 nsec->trust = rdataset->trust;
7432 nsec->private1 = rdataset->private1;
7433 nsec->private2 = rdataset->private2;
7434 nsec->private3 = noqname->neg;
7435 nsec->privateuint4 = 0;
7436 nsec->private5 = NULL;
7437 nsec->private6 = NULL;
7438 nsec->private7 = NULL;
7441 attachnode(db, node, &cloned_node);
7442 nsecsig->methods = &rdataset_methods;
7443 nsecsig->rdclass = db->rdclass;
7444 nsecsig->type = dns_rdatatype_rrsig;
7445 nsecsig->covers = noqname->type;
7446 nsecsig->ttl = rdataset->ttl;
7447 nsecsig->trust = rdataset->trust;
7448 nsecsig->private1 = rdataset->private1;
7449 nsecsig->private2 = rdataset->private2;
7450 nsecsig->private3 = noqname->negsig;
7451 nsecsig->privateuint4 = 0;
7452 nsecsig->private5 = NULL;
7453 nsec->private6 = NULL;
7454 nsec->private7 = NULL;
7456 dns_name_clone(&noqname->name, name);
7458 return (ISC_R_SUCCESS);
7462 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7463 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7465 dns_db_t *db = rdataset->private1;
7466 dns_dbnode_t *node = rdataset->private2;
7467 dns_dbnode_t *cloned_node;
7468 struct noqname *closest = rdataset->private7;
7471 attachnode(db, node, &cloned_node);
7472 nsec->methods = &rdataset_methods;
7473 nsec->rdclass = db->rdclass;
7474 nsec->type = closest->type;
7476 nsec->ttl = rdataset->ttl;
7477 nsec->trust = rdataset->trust;
7478 nsec->private1 = rdataset->private1;
7479 nsec->private2 = rdataset->private2;
7480 nsec->private3 = closest->neg;
7481 nsec->privateuint4 = 0;
7482 nsec->private5 = NULL;
7483 nsec->private6 = NULL;
7484 nsec->private7 = NULL;
7487 attachnode(db, node, &cloned_node);
7488 nsecsig->methods = &rdataset_methods;
7489 nsecsig->rdclass = db->rdclass;
7490 nsecsig->type = dns_rdatatype_rrsig;
7491 nsecsig->covers = closest->type;
7492 nsecsig->ttl = rdataset->ttl;
7493 nsecsig->trust = rdataset->trust;
7494 nsecsig->private1 = rdataset->private1;
7495 nsecsig->private2 = rdataset->private2;
7496 nsecsig->private3 = closest->negsig;
7497 nsecsig->privateuint4 = 0;
7498 nsecsig->private5 = NULL;
7499 nsec->private6 = NULL;
7500 nsec->private7 = NULL;
7502 dns_name_clone(&closest->name, name);
7504 return (ISC_R_SUCCESS);
7508 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7509 dns_rbtdb_t *rbtdb = rdataset->private1;
7510 dns_rbtnode_t *rbtnode = rdataset->private2;
7511 rdatasetheader_t *header = rdataset->private3;
7514 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7515 isc_rwlocktype_write);
7516 header->trust = rdataset->trust = trust;
7517 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7518 isc_rwlocktype_write);
7522 rdataset_expire(dns_rdataset_t *rdataset) {
7523 dns_rbtdb_t *rbtdb = rdataset->private1;
7524 dns_rbtnode_t *rbtnode = rdataset->private2;
7525 rdatasetheader_t *header = rdataset->private3;
7528 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7529 isc_rwlocktype_write);
7530 expire_header(rbtdb, header, ISC_FALSE);
7531 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7532 isc_rwlocktype_write);
7536 * Rdataset Iterator Methods
7540 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7541 rbtdb_rdatasetiter_t *rbtiterator;
7543 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7545 if (rbtiterator->common.version != NULL)
7546 closeversion(rbtiterator->common.db,
7547 &rbtiterator->common.version, ISC_FALSE);
7548 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7549 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7550 sizeof(*rbtiterator));
7556 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7557 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7558 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7559 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7560 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7561 rdatasetheader_t *header, *top_next;
7562 rbtdb_serial_t serial;
7565 if (IS_CACHE(rbtdb)) {
7567 now = rbtiterator->common.now;
7569 serial = rbtversion->serial;
7573 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7574 isc_rwlocktype_read);
7576 for (header = rbtnode->data; header != NULL; header = top_next) {
7577 top_next = header->next;
7579 if (header->serial <= serial && !IGNORE(header)) {
7581 * Is this a "this rdataset doesn't exist"
7582 * record? Or is it too old in the cache?
7584 * Note: unlike everywhere else, we
7585 * check for now > header->rdh_ttl instead
7586 * of now >= header->rdh_ttl. This allows
7587 * ANY and RRSIG queries for 0 TTL
7588 * rdatasets to work.
7590 if (NONEXISTENT(header) ||
7591 (now != 0 && now > header->rdh_ttl))
7595 header = header->down;
7596 } while (header != NULL);
7601 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7602 isc_rwlocktype_read);
7604 rbtiterator->current = header;
7607 return (ISC_R_NOMORE);
7609 return (ISC_R_SUCCESS);
7613 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7614 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7615 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7616 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7617 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7618 rdatasetheader_t *header, *top_next;
7619 rbtdb_serial_t serial;
7621 rbtdb_rdatatype_t type, negtype;
7622 dns_rdatatype_t rdtype, covers;
7624 header = rbtiterator->current;
7626 return (ISC_R_NOMORE);
7628 if (IS_CACHE(rbtdb)) {
7630 now = rbtiterator->common.now;
7632 serial = rbtversion->serial;
7636 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7637 isc_rwlocktype_read);
7639 type = header->type;
7640 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7642 covers = RBTDB_RDATATYPE_EXT(header->type);
7643 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7645 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7646 for (header = header->next; header != NULL; header = top_next) {
7647 top_next = header->next;
7649 * If not walking back up the down list.
7651 if (header->type != type && header->type != negtype) {
7653 if (header->serial <= serial &&
7656 * Is this a "this rdataset doesn't
7659 * Note: unlike everywhere else, we
7660 * check for now > header->ttl instead
7661 * of now >= header->ttl. This allows
7662 * ANY and RRSIG queries for 0 TTL
7663 * rdatasets to work.
7665 if ((header->attributes &
7666 RDATASET_ATTR_NONEXISTENT) != 0 ||
7667 (now != 0 && now > header->rdh_ttl))
7671 header = header->down;
7672 } while (header != NULL);
7678 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7679 isc_rwlocktype_read);
7681 rbtiterator->current = header;
7684 return (ISC_R_NOMORE);
7686 return (ISC_R_SUCCESS);
7690 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7691 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7692 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7693 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7694 rdatasetheader_t *header;
7696 header = rbtiterator->current;
7697 REQUIRE(header != NULL);
7699 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7700 isc_rwlocktype_read);
7702 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7705 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7706 isc_rwlocktype_read);
7711 * Database Iterator Methods
7715 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7716 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7717 dns_rbtnode_t *node = rbtdbiter->node;
7722 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7723 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7727 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7728 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7729 dns_rbtnode_t *node = rbtdbiter->node;
7735 lock = &rbtdb->node_locks[node->locknum].lock;
7736 NODE_LOCK(lock, isc_rwlocktype_read);
7737 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7738 rbtdbiter->tree_locked, ISC_FALSE);
7739 NODE_UNLOCK(lock, isc_rwlocktype_read);
7741 rbtdbiter->node = NULL;
7745 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7746 dns_rbtnode_t *node;
7747 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7748 isc_boolean_t was_read_locked = ISC_FALSE;
7752 if (rbtdbiter->delete != 0) {
7754 * Note that "%d node of %d in tree" can report things like
7755 * "flush_deletions: 59 nodes of 41 in tree". This means
7756 * That some nodes appear on the deletions list more than
7757 * once. Only the last occurence will actually be deleted.
7759 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7760 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7761 "flush_deletions: %d nodes of %d in tree",
7763 dns_rbt_nodecount(rbtdb->tree));
7765 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7766 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7767 was_read_locked = ISC_TRUE;
7769 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7770 rbtdbiter->tree_locked = isc_rwlocktype_write;
7772 for (i = 0; i < rbtdbiter->delete; i++) {
7773 node = rbtdbiter->deletions[i];
7774 lock = &rbtdb->node_locks[node->locknum].lock;
7776 NODE_LOCK(lock, isc_rwlocktype_read);
7777 decrement_reference(rbtdb, node, 0,
7778 isc_rwlocktype_read,
7779 rbtdbiter->tree_locked, ISC_FALSE);
7780 NODE_UNLOCK(lock, isc_rwlocktype_read);
7783 rbtdbiter->delete = 0;
7785 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7786 if (was_read_locked) {
7787 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7788 rbtdbiter->tree_locked = isc_rwlocktype_read;
7791 rbtdbiter->tree_locked = isc_rwlocktype_none;
7797 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7798 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7800 REQUIRE(rbtdbiter->paused);
7801 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7803 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7804 rbtdbiter->tree_locked = isc_rwlocktype_read;
7806 rbtdbiter->paused = ISC_FALSE;
7810 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7811 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7812 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7813 dns_db_t *db = NULL;
7815 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7816 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7817 rbtdbiter->tree_locked = isc_rwlocktype_none;
7819 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7821 dereference_iter_node(rbtdbiter);
7823 flush_deletions(rbtdbiter);
7825 dns_db_attach(rbtdbiter->common.db, &db);
7826 dns_db_detach(&rbtdbiter->common.db);
7828 dns_rbtnodechain_reset(&rbtdbiter->chain);
7829 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7830 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7837 dbiterator_first(dns_dbiterator_t *iterator) {
7838 isc_result_t result;
7839 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7840 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7841 dns_name_t *name, *origin;
7843 if (rbtdbiter->result != ISC_R_SUCCESS &&
7844 rbtdbiter->result != ISC_R_NOMORE)
7845 return (rbtdbiter->result);
7847 if (rbtdbiter->paused)
7848 resume_iteration(rbtdbiter);
7850 dereference_iter_node(rbtdbiter);
7852 name = dns_fixedname_name(&rbtdbiter->name);
7853 origin = dns_fixedname_name(&rbtdbiter->origin);
7854 dns_rbtnodechain_reset(&rbtdbiter->chain);
7855 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7857 if (rbtdbiter->nsec3only) {
7858 rbtdbiter->current = &rbtdbiter->nsec3chain;
7859 result = dns_rbtnodechain_first(rbtdbiter->current,
7860 rbtdb->nsec3, name, origin);
7862 rbtdbiter->current = &rbtdbiter->chain;
7863 result = dns_rbtnodechain_first(rbtdbiter->current,
7864 rbtdb->tree, name, origin);
7865 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7866 rbtdbiter->current = &rbtdbiter->nsec3chain;
7867 result = dns_rbtnodechain_first(rbtdbiter->current,
7872 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7873 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7874 NULL, &rbtdbiter->node);
7875 if (result == ISC_R_SUCCESS) {
7876 rbtdbiter->new_origin = ISC_TRUE;
7877 reference_iter_node(rbtdbiter);
7880 INSIST(result == ISC_R_NOTFOUND);
7881 result = ISC_R_NOMORE; /* The tree is empty. */
7884 rbtdbiter->result = result;
7890 dbiterator_last(dns_dbiterator_t *iterator) {
7891 isc_result_t result;
7892 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7893 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7894 dns_name_t *name, *origin;
7896 if (rbtdbiter->result != ISC_R_SUCCESS &&
7897 rbtdbiter->result != ISC_R_NOMORE)
7898 return (rbtdbiter->result);
7900 if (rbtdbiter->paused)
7901 resume_iteration(rbtdbiter);
7903 dereference_iter_node(rbtdbiter);
7905 name = dns_fixedname_name(&rbtdbiter->name);
7906 origin = dns_fixedname_name(&rbtdbiter->origin);
7907 dns_rbtnodechain_reset(&rbtdbiter->chain);
7908 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7910 result = ISC_R_NOTFOUND;
7911 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7912 rbtdbiter->current = &rbtdbiter->nsec3chain;
7913 result = dns_rbtnodechain_last(rbtdbiter->current,
7914 rbtdb->nsec3, name, origin);
7916 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7917 rbtdbiter->current = &rbtdbiter->chain;
7918 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7921 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7922 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7923 NULL, &rbtdbiter->node);
7924 if (result == ISC_R_SUCCESS) {
7925 rbtdbiter->new_origin = ISC_TRUE;
7926 reference_iter_node(rbtdbiter);
7929 INSIST(result == ISC_R_NOTFOUND);
7930 result = ISC_R_NOMORE; /* The tree is empty. */
7933 rbtdbiter->result = result;
7939 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7940 isc_result_t result;
7941 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7942 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7943 dns_name_t *iname, *origin;
7945 if (rbtdbiter->result != ISC_R_SUCCESS &&
7946 rbtdbiter->result != ISC_R_NOTFOUND &&
7947 rbtdbiter->result != ISC_R_NOMORE)
7948 return (rbtdbiter->result);
7950 if (rbtdbiter->paused)
7951 resume_iteration(rbtdbiter);
7953 dereference_iter_node(rbtdbiter);
7955 iname = dns_fixedname_name(&rbtdbiter->name);
7956 origin = dns_fixedname_name(&rbtdbiter->origin);
7957 dns_rbtnodechain_reset(&rbtdbiter->chain);
7958 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7960 if (rbtdbiter->nsec3only) {
7961 rbtdbiter->current = &rbtdbiter->nsec3chain;
7962 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7965 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7966 } else if (rbtdbiter->nonsec3) {
7967 rbtdbiter->current = &rbtdbiter->chain;
7968 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7971 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7974 * Stay on main chain if not found on either chain.
7976 rbtdbiter->current = &rbtdbiter->chain;
7977 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7980 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7981 if (result == DNS_R_PARTIALMATCH) {
7982 dns_rbtnode_t *node = NULL;
7983 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7984 &node, &rbtdbiter->nsec3chain,
7985 DNS_RBTFIND_EMPTYDATA,
7987 if (result == ISC_R_SUCCESS) {
7988 rbtdbiter->node = node;
7989 rbtdbiter->current = &rbtdbiter->nsec3chain;
7995 if (result == ISC_R_SUCCESS) {
7996 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7998 if (result == ISC_R_SUCCESS) {
7999 rbtdbiter->new_origin = ISC_TRUE;
8000 reference_iter_node(rbtdbiter);
8002 } else if (result == DNS_R_PARTIALMATCH) {
8003 result = ISC_R_NOTFOUND;
8004 rbtdbiter->node = NULL;
8007 rbtdbiter->result = result;
8009 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8010 isc_result_t tresult;
8011 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8013 if (tresult == ISC_R_SUCCESS) {
8014 rbtdbiter->new_origin = ISC_TRUE;
8015 reference_iter_node(rbtdbiter);
8018 rbtdbiter->node = NULL;
8021 rbtdbiter->node = NULL;
8023 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8024 ISC_R_SUCCESS : result;
8031 dbiterator_prev(dns_dbiterator_t *iterator) {
8032 isc_result_t result;
8033 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8034 dns_name_t *name, *origin;
8035 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8037 REQUIRE(rbtdbiter->node != NULL);
8039 if (rbtdbiter->result != ISC_R_SUCCESS)
8040 return (rbtdbiter->result);
8042 if (rbtdbiter->paused)
8043 resume_iteration(rbtdbiter);
8045 name = dns_fixedname_name(&rbtdbiter->name);
8046 origin = dns_fixedname_name(&rbtdbiter->origin);
8047 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8048 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8049 !rbtdbiter->nonsec3 &&
8050 &rbtdbiter->nsec3chain == rbtdbiter->current) {
8051 rbtdbiter->current = &rbtdbiter->chain;
8052 dns_rbtnodechain_reset(rbtdbiter->current);
8053 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8055 if (result == ISC_R_NOTFOUND)
8056 result = ISC_R_NOMORE;
8059 dereference_iter_node(rbtdbiter);
8061 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8062 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8063 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8064 NULL, &rbtdbiter->node);
8067 if (result == ISC_R_SUCCESS)
8068 reference_iter_node(rbtdbiter);
8070 rbtdbiter->result = result;
8076 dbiterator_next(dns_dbiterator_t *iterator) {
8077 isc_result_t result;
8078 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8079 dns_name_t *name, *origin;
8080 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8082 REQUIRE(rbtdbiter->node != NULL);
8084 if (rbtdbiter->result != ISC_R_SUCCESS)
8085 return (rbtdbiter->result);
8087 if (rbtdbiter->paused)
8088 resume_iteration(rbtdbiter);
8090 name = dns_fixedname_name(&rbtdbiter->name);
8091 origin = dns_fixedname_name(&rbtdbiter->origin);
8092 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8093 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8094 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8095 rbtdbiter->current = &rbtdbiter->nsec3chain;
8096 dns_rbtnodechain_reset(rbtdbiter->current);
8097 result = dns_rbtnodechain_first(rbtdbiter->current,
8098 rbtdb->nsec3, name, origin);
8099 if (result == ISC_R_NOTFOUND)
8100 result = ISC_R_NOMORE;
8103 dereference_iter_node(rbtdbiter);
8105 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8106 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8107 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8108 NULL, &rbtdbiter->node);
8110 if (result == ISC_R_SUCCESS)
8111 reference_iter_node(rbtdbiter);
8113 rbtdbiter->result = result;
8119 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8122 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8123 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8124 dns_rbtnode_t *node = rbtdbiter->node;
8125 isc_result_t result;
8126 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8127 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8129 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8130 REQUIRE(rbtdbiter->node != NULL);
8132 if (rbtdbiter->paused)
8133 resume_iteration(rbtdbiter);
8136 if (rbtdbiter->common.relative_names)
8138 result = dns_name_concatenate(nodename, origin, name, NULL);
8139 if (result != ISC_R_SUCCESS)
8141 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8142 result = DNS_R_NEWORIGIN;
8144 result = ISC_R_SUCCESS;
8146 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8147 new_reference(rbtdb, node);
8148 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8150 *nodep = rbtdbiter->node;
8152 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8153 isc_result_t expire_result;
8156 * If the deletion array is full, flush it before trying
8157 * to expire the current node. The current node can't
8158 * fully deleted while the iteration cursor is still on it.
8160 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8161 flush_deletions(rbtdbiter);
8163 expire_result = expirenode(iterator->db, *nodep, 0);
8166 * expirenode() currently always returns success.
8168 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8171 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8172 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8173 dns_rbtnode_refincrement(node, &refs);
8175 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8183 dbiterator_pause(dns_dbiterator_t *iterator) {
8184 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8185 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8187 if (rbtdbiter->result != ISC_R_SUCCESS &&
8188 rbtdbiter->result != ISC_R_NOMORE)
8189 return (rbtdbiter->result);
8191 if (rbtdbiter->paused)
8192 return (ISC_R_SUCCESS);
8194 rbtdbiter->paused = ISC_TRUE;
8196 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8197 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8198 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8199 rbtdbiter->tree_locked = isc_rwlocktype_none;
8202 flush_deletions(rbtdbiter);
8204 return (ISC_R_SUCCESS);
8208 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8209 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8210 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8212 if (rbtdbiter->result != ISC_R_SUCCESS)
8213 return (rbtdbiter->result);
8215 return (dns_name_copy(origin, name, NULL));
8219 * Additional cache routines.
8222 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8223 dns_rdatatype_t qtype, dns_acache_t *acache,
8224 dns_zone_t **zonep, dns_db_t **dbp,
8225 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8226 dns_name_t *fname, dns_message_t *msg,
8229 dns_rbtdb_t *rbtdb = rdataset->private1;
8230 dns_rbtnode_t *rbtnode = rdataset->private2;
8231 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8232 unsigned int current_count = rdataset->privateuint4;
8234 rdatasetheader_t *header;
8235 nodelock_t *nodelock;
8236 unsigned int total_count;
8237 acachectl_t *acarray;
8238 dns_acacheentry_t *entry;
8239 isc_result_t result;
8241 UNUSED(qtype); /* we do not use this value at least for now */
8244 header = (struct rdatasetheader *)(raw - sizeof(*header));
8246 total_count = raw[0] * 256 + raw[1];
8247 INSIST(total_count > current_count);
8248 count = total_count - current_count - 1;
8252 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8253 NODE_LOCK(nodelock, isc_rwlocktype_read);
8256 case dns_rdatasetadditional_fromauth:
8257 acarray = header->additional_auth;
8259 case dns_rdatasetadditional_fromcache:
8262 case dns_rdatasetadditional_fromglue:
8263 acarray = header->additional_glue;
8269 if (acarray == NULL) {
8270 if (type != dns_rdatasetadditional_fromcache)
8271 dns_acache_countquerymiss(acache);
8272 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8273 return (ISC_R_NOTFOUND);
8276 if (acarray[count].entry == NULL) {
8277 dns_acache_countquerymiss(acache);
8278 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8279 return (ISC_R_NOTFOUND);
8283 dns_acache_attachentry(acarray[count].entry, &entry);
8285 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8287 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8288 nodep, fname, msg, now);
8290 dns_acache_detachentry(&entry);
8296 acache_callback(dns_acacheentry_t *entry, void **arg) {
8298 dns_rbtnode_t *rbtnode;
8299 nodelock_t *nodelock;
8300 acachectl_t *acarray = NULL;
8301 acache_cbarg_t *cbarg;
8304 REQUIRE(arg != NULL);
8308 * The caller must hold the entry lock.
8311 rbtdb = (dns_rbtdb_t *)cbarg->db;
8312 rbtnode = (dns_rbtnode_t *)cbarg->node;
8314 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8315 NODE_LOCK(nodelock, isc_rwlocktype_write);
8317 switch (cbarg->type) {
8318 case dns_rdatasetadditional_fromauth:
8319 acarray = cbarg->header->additional_auth;
8321 case dns_rdatasetadditional_fromglue:
8322 acarray = cbarg->header->additional_glue;
8328 count = cbarg->count;
8329 if (acarray != NULL && acarray[count].entry == entry) {
8330 acarray[count].entry = NULL;
8331 INSIST(acarray[count].cbarg == cbarg);
8332 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8333 acarray[count].cbarg = NULL;
8335 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8337 dns_acache_detachentry(&entry);
8339 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8341 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8342 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8348 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8349 acache_cbarg_t **cbargp)
8351 acache_cbarg_t *cbarg;
8353 REQUIRE(mctx != NULL);
8354 REQUIRE(entry != NULL);
8355 REQUIRE(cbargp != NULL && *cbargp != NULL);
8359 dns_acache_cancelentry(entry);
8360 dns_db_detachnode(cbarg->db, &cbarg->node);
8361 dns_db_detach(&cbarg->db);
8363 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8369 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8370 dns_rdatatype_t qtype, dns_acache_t *acache,
8371 dns_zone_t *zone, dns_db_t *db,
8372 dns_dbversion_t *version, dns_dbnode_t *node,
8375 dns_rbtdb_t *rbtdb = rdataset->private1;
8376 dns_rbtnode_t *rbtnode = rdataset->private2;
8377 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8378 unsigned int current_count = rdataset->privateuint4;
8379 rdatasetheader_t *header;
8380 unsigned int total_count, count;
8381 nodelock_t *nodelock;
8382 isc_result_t result;
8383 acachectl_t *acarray;
8384 dns_acacheentry_t *newentry, *oldentry = NULL;
8385 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8389 if (type == dns_rdatasetadditional_fromcache)
8390 return (ISC_R_SUCCESS);
8392 header = (struct rdatasetheader *)(raw - sizeof(*header));
8394 total_count = raw[0] * 256 + raw[1];
8395 INSIST(total_count > current_count);
8396 count = total_count - current_count - 1; /* should be private data */
8398 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8399 if (newcbarg == NULL)
8400 return (ISC_R_NOMEMORY);
8401 newcbarg->type = type;
8402 newcbarg->count = count;
8403 newcbarg->header = header;
8404 newcbarg->db = NULL;
8405 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8406 newcbarg->node = NULL;
8407 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8410 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8411 acache_callback, newcbarg, &newentry);
8412 if (result != ISC_R_SUCCESS)
8414 /* Set cache data in the new entry. */
8415 result = dns_acache_setentry(acache, newentry, zone, db,
8416 version, node, fname);
8417 if (result != ISC_R_SUCCESS)
8420 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8421 NODE_LOCK(nodelock, isc_rwlocktype_write);
8425 case dns_rdatasetadditional_fromauth:
8426 acarray = header->additional_auth;
8428 case dns_rdatasetadditional_fromglue:
8429 acarray = header->additional_glue;
8435 if (acarray == NULL) {
8438 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8439 sizeof(acachectl_t));
8441 if (acarray == NULL) {
8442 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8446 for (i = 0; i < total_count; i++) {
8447 acarray[i].entry = NULL;
8448 acarray[i].cbarg = NULL;
8452 case dns_rdatasetadditional_fromauth:
8453 header->additional_auth = acarray;
8455 case dns_rdatasetadditional_fromglue:
8456 header->additional_glue = acarray;
8462 if (acarray[count].entry != NULL) {
8464 * Swap the entry. Delay cleaning-up the old entry since
8465 * it would require a node lock.
8467 oldentry = acarray[count].entry;
8468 INSIST(acarray[count].cbarg != NULL);
8469 oldcbarg = acarray[count].cbarg;
8471 acarray[count].entry = newentry;
8472 acarray[count].cbarg = newcbarg;
8474 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8476 if (oldentry != NULL) {
8477 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8478 dns_acache_detachentry(&oldentry);
8481 return (ISC_R_SUCCESS);
8484 if (newcbarg != NULL) {
8485 if (newentry != NULL) {
8486 acache_cancelentry(rbtdb->common.mctx, newentry,
8488 dns_acache_detachentry(&newentry);
8490 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8491 dns_db_detach(&newcbarg->db);
8492 isc_mem_put(rbtdb->common.mctx, newcbarg,
8501 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8502 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8504 dns_rbtdb_t *rbtdb = rdataset->private1;
8505 dns_rbtnode_t *rbtnode = rdataset->private2;
8506 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8507 unsigned int current_count = rdataset->privateuint4;
8508 rdatasetheader_t *header;
8509 nodelock_t *nodelock;
8510 unsigned int total_count, count;
8511 acachectl_t *acarray;
8512 dns_acacheentry_t *entry;
8513 acache_cbarg_t *cbarg;
8515 UNUSED(qtype); /* we do not use this value at least for now */
8518 if (type == dns_rdatasetadditional_fromcache)
8519 return (ISC_R_SUCCESS);
8521 header = (struct rdatasetheader *)(raw - sizeof(*header));
8523 total_count = raw[0] * 256 + raw[1];
8524 INSIST(total_count > current_count);
8525 count = total_count - current_count - 1;
8530 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8531 NODE_LOCK(nodelock, isc_rwlocktype_write);
8534 case dns_rdatasetadditional_fromauth:
8535 acarray = header->additional_auth;
8537 case dns_rdatasetadditional_fromglue:
8538 acarray = header->additional_glue;
8544 if (acarray == NULL) {
8545 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8546 return (ISC_R_NOTFOUND);
8549 entry = acarray[count].entry;
8550 if (entry == NULL) {
8551 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8552 return (ISC_R_NOTFOUND);
8555 acarray[count].entry = NULL;
8556 cbarg = acarray[count].cbarg;
8557 acarray[count].cbarg = NULL;
8559 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8561 if (entry != NULL) {
8563 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8564 dns_acache_detachentry(&entry);
8567 return (ISC_R_SUCCESS);
8571 * Routines for LRU-based cache management.
8575 * See if a given cache entry that is being reused needs to be updated
8576 * in the LRU-list. From the LRU management point of view, this function is
8577 * expected to return true for almost all cases. When used with threads,
8578 * however, this may cause a non-negligible performance penalty because a
8579 * writer lock will have to be acquired before updating the list.
8580 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8581 * function returns true if the entry has not been updated for some period of
8582 * time. We differentiate the NS or glue address case and the others since
8583 * experiments have shown that the former tends to be accessed relatively
8584 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8585 * may cause external queries at a higher level zone, involving more
8588 * Caller must hold the node (read or write) lock.
8590 static inline isc_boolean_t
8591 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8592 if ((header->attributes &
8593 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8596 #if DNS_RBTDB_LIMITLRUUPDATE
8597 if (header->type == dns_rdatatype_ns ||
8598 (header->trust == dns_trust_glue &&
8599 (header->type == dns_rdatatype_a ||
8600 header->type == dns_rdatatype_aaaa))) {
8602 * Glue records are updated if at least 60 seconds have passed
8603 * since the previous update time.
8605 return (header->last_used + 60 <= now);
8608 /* Other records are updated if 5 minutes have passed. */
8609 return (header->last_used + 300 <= now);
8618 * Update the timestamp of a given cache entry and move it to the head
8619 * of the corresponding LRU list.
8621 * Caller must hold the node (write) lock.
8623 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8626 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8629 INSIST(IS_CACHE(rbtdb));
8631 /* To be checked: can we really assume this? XXXMLG */
8632 INSIST(ISC_LINK_LINKED(header, link));
8634 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8635 header->last_used = now;
8636 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8640 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8641 * under an overmem condition. To recover from this condition quickly, up to
8642 * 2 entries will be purged. This process is triggered while adding a new
8643 * entry, and we specifically avoid purging entries in the same LRU bucket as
8644 * the one to which the new entry will belong. Otherwise, we might purge
8645 * entries of the same name of different RR types while adding RRsets from a
8646 * single response (consider the case where we're adding A and AAAA glue records
8647 * of the same NS name).
8650 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8651 isc_stdtime_t now, isc_boolean_t tree_locked)
8653 rdatasetheader_t *header, *header_prev;
8654 unsigned int locknum;
8657 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8658 locknum != locknum_start && purgecount > 0;
8659 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8660 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8661 isc_rwlocktype_write);
8663 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8664 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8665 expire_header(rbtdb, header, tree_locked);
8669 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8670 header != NULL && purgecount > 0;
8671 header = header_prev) {
8672 header_prev = ISC_LIST_PREV(header, link);
8674 * Unlink the entry at this point to avoid checking it
8675 * again even if it's currently used someone else and
8676 * cannot be purged at this moment. This entry won't be
8677 * referenced any more (so unlinking is safe) since the
8678 * TTL was reset to 0.
8680 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8682 expire_header(rbtdb, header, tree_locked);
8686 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8687 isc_rwlocktype_write);
8692 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8693 isc_boolean_t tree_locked)
8695 set_ttl(rbtdb, header, 0);
8696 header->attributes |= RDATASET_ATTR_STALE;
8697 header->node->dirty = 1;
8700 * Caller must hold the node (write) lock.
8703 if (dns_rbtnode_refcurrent(header->node) == 0) {
8705 * If no one else is using the node, we can clean it up now.
8706 * We first need to gain a new reference to the node to meet a
8707 * requirement of decrement_reference().
8709 new_reference(rbtdb, header->node);
8710 decrement_reference(rbtdb, header->node, 0,
8711 isc_rwlocktype_write,
8712 tree_locked ? isc_rwlocktype_write :
8713 isc_rwlocktype_none, ISC_FALSE);