2 * Copyright (C) 2004-2012 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
65 #include <dns/zonekey.h>
67 #ifdef DNS_RBTDB_VERSION64
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
80 * Note that "impmagic" is not the first four bytes of the struct, so
81 * ISC_MAGIC_VALID cannot be used.
83 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
84 (rbtdb)->common.impmagic == RBTDB_MAGIC)
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t rbtdb_serial_t;
89 * Make casting easier in symbolic debuggers by using different names
90 * for the 64 bit version.
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
96 typedef isc_uint32_t rbtdb_serial_t;
99 typedef isc_uint32_t rbtdb_rdatatype_t;
101 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
105 #define RBTDB_RDATATYPE_SIGNSEC \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_SIGDDS \
116 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
117 #define RBTDB_RDATATYPE_NCACHEANY \
118 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
121 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
122 * Using rwlock is effective with regard to lookup performance only when
123 * it is implemented in an efficient way.
124 * Otherwise, it is generally wise to stick to the simple locking since rwlock
125 * would require more memory or can even make lookups slower due to its own
126 * overhead (when it internally calls mutex locks).
128 #ifdef ISC_RWLOCK_USEATOMIC
129 #define DNS_RBTDB_USERWLOCK 1
131 #define DNS_RBTDB_USERWLOCK 0
134 #if DNS_RBTDB_USERWLOCK
135 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
136 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
137 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
138 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
140 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
141 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
142 #define RBTDB_LOCK(l, t) LOCK(l)
143 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
147 * Since node locking is sensitive to both performance and memory footprint,
148 * we need some trick here. If we have both high-performance rwlock and
149 * high performance and small-memory reference counters, we use rwlock for
150 * node lock and isc_refcount for node references. In this case, we don't have
151 * to protect the access to the counters by locks.
152 * Otherwise, we simply use ordinary mutex lock for node locking, and use
153 * simple integers as reference counters which is protected by the lock.
154 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
155 * NODE_UNLOCK. In some other cases, however, we need to protect reference
156 * counters first and then protect other parts of a node as read-only data.
157 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
158 * provided for these special cases. When we can use the efficient backend
159 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
160 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
161 * section including the access to the reference counter.
162 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
163 * section is also protected by NODE_STRONGLOCK().
165 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
166 typedef isc_rwlock_t nodelock_t;
168 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
169 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
170 #define NODE_LOCK(l, t) RWLOCK((l), (t))
171 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
172 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
174 #define NODE_STRONGLOCK(l) ((void)0)
175 #define NODE_STRONGUNLOCK(l) ((void)0)
176 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
177 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
178 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
180 typedef isc_mutex_t nodelock_t;
182 #define NODE_INITLOCK(l) isc_mutex_init(l)
183 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
184 #define NODE_LOCK(l, t) LOCK(l)
185 #define NODE_UNLOCK(l, t) UNLOCK(l)
186 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
188 #define NODE_STRONGLOCK(l) LOCK(l)
189 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
190 #define NODE_WEAKLOCK(l, t) ((void)0)
191 #define NODE_WEAKUNLOCK(l, t) ((void)0)
192 #define NODE_WEAKDOWNGRADE(l) ((void)0)
196 * Whether to rate-limit updating the LRU to avoid possible thread contention.
197 * Our performance measurement has shown the cost is marginal, so it's defined
198 * to be 0 by default either with or without threads.
200 #ifndef DNS_RBTDB_LIMITLRUUPDATE
201 #define DNS_RBTDB_LIMITLRUUPDATE 0
205 * Allow clients with a virtual time of up to 5 minutes in the past to see
206 * records that would have otherwise have expired.
208 #define RBTDB_VIRTUAL 300
214 dns_rdatatype_t type;
217 typedef struct acachectl acachectl_t;
219 typedef struct rdatasetheader {
221 * Locked by the owning node's lock.
223 rbtdb_serial_t serial;
225 rbtdb_rdatatype_t type;
226 isc_uint16_t attributes;
228 struct noqname *noqname;
229 struct noqname *closest;
231 * We don't use the LIST macros, because the LIST structure has
232 * both head and tail pointers, and is doubly linked.
235 struct rdatasetheader *next;
237 * If this is the top header for an rdataset, 'next' points
238 * to the top header for the next rdataset (i.e., the next type).
239 * Otherwise, it points up to the header whose down pointer points
243 struct rdatasetheader *down;
245 * Points to the header for the next older version of
251 * Monotonously increased every time this rdataset is bound so that
252 * it is used as the base of the starting point in DNS responses
253 * when the "cyclic" rrset-order is required. Since the ordering
254 * should not be so crucial, no lock is set for the counter for
255 * performance reasons.
258 acachectl_t *additional_auth;
259 acachectl_t *additional_glue;
262 isc_stdtime_t last_used;
263 ISC_LINK(struct rdatasetheader) link;
265 unsigned int heap_index;
267 * Used for TTL-based cache cleaning.
269 isc_stdtime_t resign;
272 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
273 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
275 #define RDATASET_ATTR_NONEXISTENT 0x0001
276 #define RDATASET_ATTR_STALE 0x0002
277 #define RDATASET_ATTR_IGNORE 0x0004
278 #define RDATASET_ATTR_RETAIN 0x0008
279 #define RDATASET_ATTR_NXDOMAIN 0x0010
280 #define RDATASET_ATTR_RESIGN 0x0020
281 #define RDATASET_ATTR_STATCOUNT 0x0040
282 #define RDATASET_ATTR_OPTOUT 0x0080
283 #define RDATASET_ATTR_NEGATIVE 0x0100
285 typedef struct acache_cbarg {
286 dns_rdatasetadditional_t type;
290 rdatasetheader_t *header;
294 dns_acacheentry_t *entry;
295 acache_cbarg_t *cbarg;
300 * When the cache will pre-expire data (due to memory low or other
301 * situations) before the rdataset's TTL has expired, it MUST
302 * respect the RETAIN bit and not expire the data until its TTL is
306 #undef IGNORE /* WIN32 winbase.h defines this. */
308 #define EXISTS(header) \
309 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
310 #define NONEXISTENT(header) \
311 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
312 #define IGNORE(header) \
313 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
314 #define RETAIN(header) \
315 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
316 #define NXDOMAIN(header) \
317 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
318 #define RESIGN(header) \
319 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
320 #define OPTOUT(header) \
321 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
322 #define NEGATIVE(header) \
323 (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
325 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
328 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
329 * There is a tradeoff issue about configuring this value: if this is too
330 * small, it may cause heavier contention between threads; if this is too large,
331 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
332 * The default value should work well for most environments, but this can
333 * also be configurable at compilation time via the
334 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
335 * 1 due to the assumption of overmem_purge().
337 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
338 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
339 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
341 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
344 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
345 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
349 /* Protected in the refcount routines. */
350 isc_refcount_t references;
351 /* Locked by lock. */
352 isc_boolean_t exiting;
355 typedef struct rbtdb_changed {
356 dns_rbtnode_t * node;
358 ISC_LINK(struct rbtdb_changed) link;
361 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
369 typedef struct dns_rbtdb dns_rbtdb_t;
371 typedef struct rbtdb_version {
373 rbtdb_serial_t serial;
376 * Protected in the refcount routines.
377 * XXXJT: should we change the lock policy based on the refcount
380 isc_refcount_t references;
381 /* Locked by database lock. */
382 isc_boolean_t writer;
383 isc_boolean_t commit_ok;
384 rbtdb_changedlist_t changed_list;
385 rdatasetheaderlist_t resigned_list;
386 ISC_LINK(struct rbtdb_version) link;
387 dns_db_secure_t secure;
388 isc_boolean_t havensec3;
389 /* NSEC3 parameters */
392 isc_uint16_t iterations;
393 isc_uint8_t salt_length;
394 unsigned char salt[DNS_NSEC3_SALTSIZE];
397 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
402 /* Locks the data in this struct */
403 #if DNS_RBTDB_USERWLOCK
408 /* Locks the tree structure (prevents nodes appearing/disappearing) */
409 isc_rwlock_t tree_lock;
410 /* Locks for individual tree nodes */
411 unsigned int node_lock_count;
412 rbtdb_nodelock_t * node_locks;
413 dns_rbtnode_t * origin_node;
414 dns_stats_t * rrsetstats; /* cache DB only */
415 /* Locked by lock. */
417 isc_refcount_t references;
418 unsigned int attributes;
419 rbtdb_serial_t current_serial;
420 rbtdb_serial_t least_serial;
421 rbtdb_serial_t next_serial;
422 rbtdb_version_t * current_version;
423 rbtdb_version_t * future_version;
424 rbtdb_versionlist_t open_versions;
426 dns_dbnode_t *soanode;
427 dns_dbnode_t *nsnode;
430 * This is a linked list used to implement the LRU cache. There will
431 * be node_lock_count linked lists here. Nodes in bucket 1 will be
432 * placed on the linked list rdatasets[1].
434 rdatasetheaderlist_t *rdatasets;
437 * Temporary storage for stale cache nodes and dynamically deleted
438 * nodes that await being cleaned up.
440 rbtnodelist_t *deadnodes;
443 * Heaps. These are used for TTL based expiry in a cache,
444 * or for zone resigning in a zone DB. hmctx is the memory
445 * context to use for the heap (which differs from the main
446 * database memory context in the case of a cache).
451 /* Locked by tree_lock. */
456 unsigned int quantum;
459 #define RBTDB_ATTR_LOADED 0x01
460 #define RBTDB_ATTR_LOADING 0x02
467 rbtdb_version_t * rbtversion;
468 rbtdb_serial_t serial;
469 unsigned int options;
470 dns_rbtnodechain_t chain;
471 isc_boolean_t copy_name;
472 isc_boolean_t need_cleanup;
474 dns_rbtnode_t * zonecut;
475 rdatasetheader_t * zonecut_rdataset;
476 rdatasetheader_t * zonecut_sigrdataset;
477 dns_fixedname_t zonecut_name;
489 static void rdataset_disassociate(dns_rdataset_t *rdataset);
490 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
491 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
492 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
493 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
494 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
495 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
498 dns_rdataset_t *negsig);
499 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
502 dns_rdataset_t *negsig);
503 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
504 dns_rdatasetadditional_t type,
505 dns_rdatatype_t qtype,
506 dns_acache_t *acache,
509 dns_dbversion_t **versionp,
510 dns_dbnode_t **nodep,
514 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
515 dns_rdatasetadditional_t type,
516 dns_rdatatype_t qtype,
517 dns_acache_t *acache,
520 dns_dbversion_t *version,
523 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
524 dns_rdataset_t *rdataset,
525 dns_rdatasetadditional_t type,
526 dns_rdatatype_t qtype);
527 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
529 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
531 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
532 isc_boolean_t tree_locked);
533 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
534 isc_stdtime_t now, isc_boolean_t tree_locked);
535 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
536 rdatasetheader_t *newheader);
537 static void prune_tree(isc_task_t *task, isc_event_t *event);
538 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
539 static void rdataset_expire(dns_rdataset_t *rdataset);
541 static dns_rdatasetmethods_t rdataset_methods = {
542 rdataset_disassociate,
552 rdataset_getadditional,
553 rdataset_setadditional,
554 rdataset_putadditional,
559 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
560 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
561 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
562 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
563 dns_rdataset_t *rdataset);
565 static dns_rdatasetitermethods_t rdatasetiter_methods = {
566 rdatasetiter_destroy,
572 typedef struct rbtdb_rdatasetiter {
573 dns_rdatasetiter_t common;
574 rdatasetheader_t * current;
575 } rbtdb_rdatasetiter_t;
577 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
578 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
579 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
580 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
582 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
583 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
584 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
585 dns_dbnode_t **nodep,
587 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
588 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
591 static dns_dbiteratormethods_t dbiterator_methods = {
603 #define DELETION_BATCH_MAX 64
606 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
608 typedef struct rbtdb_dbiterator {
609 dns_dbiterator_t common;
610 isc_boolean_t paused;
611 isc_boolean_t new_origin;
612 isc_rwlocktype_t tree_locked;
614 dns_fixedname_t name;
615 dns_fixedname_t origin;
616 dns_rbtnodechain_t chain;
617 dns_rbtnodechain_t nsec3chain;
618 dns_rbtnodechain_t *current;
620 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
622 isc_boolean_t nsec3only;
623 isc_boolean_t nonsec3;
624 } rbtdb_dbiterator_t;
627 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
628 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
630 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
632 static void overmem(dns_db_t *db, isc_boolean_t overmem);
633 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
634 isc_boolean_t *nsec3createflag);
637 * 'init_count' is used to initialize 'newheader->count' which inturn
638 * is used to determine where in the cycle rrset-order cyclic starts.
639 * We don't lock this as we don't care about simultaneous updates.
642 * Both init_count and header->count can be ISC_UINT32_MAX.
643 * The count on the returned rdataset however can't be as
644 * that indicates that the database does not implement cyclic
647 static unsigned int init_count;
652 * If a routine is going to lock more than one lock in this module, then
653 * the locking must be done in the following order:
657 * Node Lock (Only one from the set may be locked at one time by
662 * Failure to follow this hierarchy can result in deadlock.
668 * For zone databases the node for the origin of the zone MUST NOT be deleted.
677 attach(dns_db_t *source, dns_db_t **targetp) {
678 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
680 REQUIRE(VALID_RBTDB(rbtdb));
682 isc_refcount_increment(&rbtdb->references, NULL);
688 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
689 dns_rbtdb_t *rbtdb = event->ev_arg;
693 free_rbtdb(rbtdb, ISC_TRUE, event);
697 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
698 isc_boolean_t increment)
700 dns_rdatastatstype_t statattributes = 0;
701 dns_rdatastatstype_t base = 0;
702 dns_rdatastatstype_t type;
704 /* At the moment we count statistics only for cache DB */
705 INSIST(IS_CACHE(rbtdb));
707 if (NEGATIVE(header)) {
708 if (NXDOMAIN(header))
709 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
711 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
712 base = RBTDB_RDATATYPE_EXT(header->type);
715 base = RBTDB_RDATATYPE_BASE(header->type);
717 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
719 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
721 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
725 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
730 oldttl = header->rdh_ttl;
731 header->rdh_ttl = newttl;
733 if (!IS_CACHE(rbtdb))
737 * It's possible the rbtdb is not a cache. If this is the case,
738 * we will not have a heap, and we move on. If we do, though,
739 * we might need to adjust things.
741 if (header->heap_index == 0 || newttl == oldttl)
743 idx = header->node->locknum;
744 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
746 heap = rbtdb->heaps[idx];
749 isc_heap_increased(heap, header->heap_index);
751 isc_heap_decreased(heap, header->heap_index);
755 * These functions allow the heap code to rank the priority of each
756 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
759 ttl_sooner(void *v1, void *v2) {
760 rdatasetheader_t *h1 = v1;
761 rdatasetheader_t *h2 = v2;
763 if (h1->rdh_ttl < h2->rdh_ttl)
769 resign_sooner(void *v1, void *v2) {
770 rdatasetheader_t *h1 = v1;
771 rdatasetheader_t *h2 = v2;
773 if (h1->resign < h2->resign)
779 * This function sets the heap index into the header.
782 set_index(void *what, unsigned int index) {
783 rdatasetheader_t *h = what;
785 h->heap_index = index;
789 * Work out how many nodes can be deleted in the time between two
790 * requests to the nameserver. Smooth the resulting number and use it
791 * as a estimate for the number of nodes to be deleted in the next
795 adjust_quantum(unsigned int old, isc_time_t *start) {
796 unsigned int pps = dns_pps; /* packets per second */
797 unsigned int interval;
806 interval = 1000000 / pps; /* interval in usec */
809 usecs = isc_time_microdiff(&end, start);
812 * We were unable to measure the amount of time taken.
813 * Double the nodes deleted next time.
820 new = old * interval;
821 new /= (unsigned int)usecs;
828 new = (new + old * 3) / 4;
830 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
831 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
837 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
839 isc_ondestroy_t ondest;
841 char buf[DNS_NAME_FORMATSIZE];
844 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
845 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
847 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
848 REQUIRE(rbtdb->future_version == NULL);
850 if (rbtdb->current_version != NULL) {
853 isc_refcount_decrement(&rbtdb->current_version->references,
856 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
857 isc_refcount_destroy(&rbtdb->current_version->references);
858 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
859 sizeof(rbtdb_version_t));
863 * We assume the number of remaining dead nodes is reasonably small;
864 * the overhead of unlinking all nodes here should be negligible.
866 for (i = 0; i < rbtdb->node_lock_count; i++) {
869 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
870 while (node != NULL) {
871 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
872 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
877 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
879 if (rbtdb->tree != NULL) {
880 isc_time_now(&start);
881 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
882 if (result == ISC_R_QUOTA) {
883 INSIST(rbtdb->task != NULL);
884 if (rbtdb->quantum != 0)
885 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
888 event = isc_event_allocate(rbtdb->common.mctx,
890 DNS_EVENT_FREESTORAGE,
893 sizeof(isc_event_t));
896 isc_task_send(rbtdb->task, &event);
899 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
902 if (rbtdb->nsec3 != NULL) {
903 isc_time_now(&start);
904 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
905 if (result == ISC_R_QUOTA) {
906 INSIST(rbtdb->task != NULL);
907 if (rbtdb->quantum != 0)
908 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
911 event = isc_event_allocate(rbtdb->common.mctx,
913 DNS_EVENT_FREESTORAGE,
916 sizeof(isc_event_t));
919 isc_task_send(rbtdb->task, &event);
922 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
926 isc_event_free(&event);
928 if (dns_name_dynamic(&rbtdb->common.origin))
929 dns_name_format(&rbtdb->common.origin, buf,
932 strcpy(buf, "<UNKNOWN>");
933 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
934 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
935 "done free_rbtdb(%s)", buf);
937 if (dns_name_dynamic(&rbtdb->common.origin))
938 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
939 for (i = 0; i < rbtdb->node_lock_count; i++) {
940 isc_refcount_destroy(&rbtdb->node_locks[i].references);
941 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
945 * Clean up LRU / re-signing order lists.
947 if (rbtdb->rdatasets != NULL) {
948 for (i = 0; i < rbtdb->node_lock_count; i++)
949 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
950 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
951 rbtdb->node_lock_count *
952 sizeof(rdatasetheaderlist_t));
955 * Clean up dead node buckets.
957 if (rbtdb->deadnodes != NULL) {
958 for (i = 0; i < rbtdb->node_lock_count; i++)
959 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
960 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
961 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
964 * Clean up heap objects.
966 if (rbtdb->heaps != NULL) {
967 for (i = 0; i < rbtdb->node_lock_count; i++)
968 isc_heap_destroy(&rbtdb->heaps[i]);
969 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
970 rbtdb->node_lock_count * sizeof(isc_heap_t *));
973 if (rbtdb->rrsetstats != NULL)
974 dns_stats_detach(&rbtdb->rrsetstats);
976 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
977 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
978 isc_rwlock_destroy(&rbtdb->tree_lock);
979 isc_refcount_destroy(&rbtdb->references);
980 if (rbtdb->task != NULL)
981 isc_task_detach(&rbtdb->task);
983 RBTDB_DESTROYLOCK(&rbtdb->lock);
984 rbtdb->common.magic = 0;
985 rbtdb->common.impmagic = 0;
986 ondest = rbtdb->common.ondest;
987 isc_mem_detach(&rbtdb->hmctx);
988 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
989 isc_ondestroy_notify(&ondest, rbtdb);
993 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
994 isc_boolean_t want_free = ISC_FALSE;
996 unsigned int inactive = 0;
998 /* XXX check for open versions here */
1000 if (rbtdb->soanode != NULL)
1001 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1002 if (rbtdb->nsnode != NULL)
1003 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1006 * Even though there are no external direct references, there still
1007 * may be nodes in use.
1009 for (i = 0; i < rbtdb->node_lock_count; i++) {
1010 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1011 rbtdb->node_locks[i].exiting = ISC_TRUE;
1012 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1013 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1019 if (inactive != 0) {
1020 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1021 rbtdb->active -= inactive;
1022 if (rbtdb->active == 0)
1023 want_free = ISC_TRUE;
1024 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1026 char buf[DNS_NAME_FORMATSIZE];
1027 if (dns_name_dynamic(&rbtdb->common.origin))
1028 dns_name_format(&rbtdb->common.origin, buf,
1031 strcpy(buf, "<UNKNOWN>");
1032 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1033 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1034 "calling free_rbtdb(%s)", buf);
1035 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1041 detach(dns_db_t **dbp) {
1042 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1045 REQUIRE(VALID_RBTDB(rbtdb));
1047 isc_refcount_decrement(&rbtdb->references, &refs);
1050 maybe_free_rbtdb(rbtdb);
1056 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1057 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1058 rbtdb_version_t *version;
1061 REQUIRE(VALID_RBTDB(rbtdb));
1063 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1064 version = rbtdb->current_version;
1065 isc_refcount_increment(&version->references, &refs);
1066 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1068 *versionp = (dns_dbversion_t *)version;
1071 static inline rbtdb_version_t *
1072 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1073 unsigned int references, isc_boolean_t writer)
1075 isc_result_t result;
1076 rbtdb_version_t *version;
1078 version = isc_mem_get(mctx, sizeof(*version));
1079 if (version == NULL)
1081 version->serial = serial;
1082 result = isc_refcount_init(&version->references, references);
1083 if (result != ISC_R_SUCCESS) {
1084 isc_mem_put(mctx, version, sizeof(*version));
1087 version->writer = writer;
1088 version->commit_ok = ISC_FALSE;
1089 ISC_LIST_INIT(version->changed_list);
1090 ISC_LIST_INIT(version->resigned_list);
1091 ISC_LINK_INIT(version, link);
1097 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1098 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1099 rbtdb_version_t *version;
1101 REQUIRE(VALID_RBTDB(rbtdb));
1102 REQUIRE(versionp != NULL && *versionp == NULL);
1103 REQUIRE(rbtdb->future_version == NULL);
1105 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1106 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1107 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1109 if (version != NULL) {
1110 version->rbtdb = rbtdb;
1111 version->commit_ok = ISC_TRUE;
1112 version->secure = rbtdb->current_version->secure;
1113 version->havensec3 = rbtdb->current_version->havensec3;
1114 if (version->havensec3) {
1115 version->flags = rbtdb->current_version->flags;
1116 version->iterations =
1117 rbtdb->current_version->iterations;
1118 version->hash = rbtdb->current_version->hash;
1119 version->salt_length =
1120 rbtdb->current_version->salt_length;
1121 memcpy(version->salt, rbtdb->current_version->salt,
1122 version->salt_length);
1125 version->iterations = 0;
1127 version->salt_length = 0;
1128 memset(version->salt, 0, sizeof(version->salt));
1130 rbtdb->next_serial++;
1131 rbtdb->future_version = version;
1133 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1135 if (version == NULL)
1136 return (ISC_R_NOMEMORY);
1138 *versionp = version;
1140 return (ISC_R_SUCCESS);
1144 attachversion(dns_db_t *db, dns_dbversion_t *source,
1145 dns_dbversion_t **targetp)
1147 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1148 rbtdb_version_t *rbtversion = source;
1151 REQUIRE(VALID_RBTDB(rbtdb));
1152 INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
1154 isc_refcount_increment(&rbtversion->references, &refs);
1157 *targetp = rbtversion;
1160 static rbtdb_changed_t *
1161 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1162 dns_rbtnode_t *node)
1164 rbtdb_changed_t *changed;
1168 * Caller must be holding the node lock if its reference must be
1169 * protected by the lock.
1172 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1174 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1176 REQUIRE(version->writer);
1178 if (changed != NULL) {
1179 dns_rbtnode_refincrement(node, &refs);
1181 changed->node = node;
1182 changed->dirty = ISC_FALSE;
1183 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1185 version->commit_ok = ISC_FALSE;
1187 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1193 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1198 unsigned char *raw; /* RDATASLAB */
1201 * The caller must be holding the corresponding node lock.
1207 raw = (unsigned char *)header + sizeof(*header);
1208 count = raw[0] * 256 + raw[1];
1211 * Sanity check: since an additional cache entry has a reference to
1212 * the original DB node (in the callback arg), there should be no
1213 * acache entries when the node can be freed.
1215 for (i = 0; i < count; i++)
1216 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1218 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1222 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1224 if (dns_name_dynamic(&(*noqname)->name))
1225 dns_name_free(&(*noqname)->name, mctx);
1226 if ((*noqname)->neg != NULL)
1227 isc_mem_put(mctx, (*noqname)->neg,
1228 dns_rdataslab_size((*noqname)->neg, 0));
1229 if ((*noqname)->negsig != NULL)
1230 isc_mem_put(mctx, (*noqname)->negsig,
1231 dns_rdataslab_size((*noqname)->negsig, 0));
1232 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1237 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1239 ISC_LINK_INIT(h, link);
1243 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1244 fprintf(stderr, "initialized header: %p\n", h);
1250 static inline rdatasetheader_t *
1251 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1253 rdatasetheader_t *h;
1255 h = isc_mem_get(mctx, sizeof(*h));
1260 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1261 fprintf(stderr, "allocated header: %p\n", h);
1263 init_rdataset(rbtdb, h);
1268 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1273 if (EXISTS(rdataset) &&
1274 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1275 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1278 idx = rdataset->node->locknum;
1279 if (ISC_LINK_LINKED(rdataset, link)) {
1280 INSIST(IS_CACHE(rbtdb));
1281 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1283 if (rdataset->heap_index != 0)
1284 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1285 rdataset->heap_index = 0;
1287 if (rdataset->noqname != NULL)
1288 free_noqname(mctx, &rdataset->noqname);
1289 if (rdataset->closest != NULL)
1290 free_noqname(mctx, &rdataset->closest);
1292 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1293 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1295 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1296 size = sizeof(*rdataset);
1298 size = dns_rdataslab_size((unsigned char *)rdataset,
1300 isc_mem_put(mctx, rdataset, size);
1304 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1305 rdatasetheader_t *header, *dcurrent;
1306 isc_boolean_t make_dirty = ISC_FALSE;
1309 * Caller must hold the node lock.
1313 * We set the IGNORE attribute on rdatasets with serial number
1314 * 'serial'. When the reference count goes to zero, these rdatasets
1315 * will be cleaned up; until that time, they will be ignored.
1317 for (header = node->data; header != NULL; header = header->next) {
1318 if (header->serial == serial) {
1319 header->attributes |= RDATASET_ATTR_IGNORE;
1320 make_dirty = ISC_TRUE;
1322 for (dcurrent = header->down;
1324 dcurrent = dcurrent->down) {
1325 if (dcurrent->serial == serial) {
1326 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1327 make_dirty = ISC_TRUE;
1336 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1338 rdatasetheader_t *d, *down_next;
1340 for (d = top->down; d != NULL; d = down_next) {
1341 down_next = d->down;
1342 free_rdataset(rbtdb, mctx, d);
1348 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1349 rdatasetheader_t *current, *top_prev, *top_next;
1350 isc_mem_t *mctx = rbtdb->common.mctx;
1353 * Caller must be holding the node lock.
1357 for (current = node->data; current != NULL; current = top_next) {
1358 top_next = current->next;
1359 clean_stale_headers(rbtdb, mctx, current);
1361 * If current is nonexistent or stale, we can clean it up.
1363 if ((current->attributes &
1364 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1365 if (top_prev != NULL)
1366 top_prev->next = current->next;
1368 node->data = current->next;
1369 free_rdataset(rbtdb, mctx, current);
1377 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1378 rbtdb_serial_t least_serial)
1380 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1381 rdatasetheader_t *top_prev, *top_next;
1382 isc_mem_t *mctx = rbtdb->common.mctx;
1383 isc_boolean_t still_dirty = ISC_FALSE;
1386 * Caller must be holding the node lock.
1388 REQUIRE(least_serial != 0);
1391 for (current = node->data; current != NULL; current = top_next) {
1392 top_next = current->next;
1395 * First, we clean up any instances of multiple rdatasets
1396 * with the same serial number, or that have the IGNORE
1400 for (dcurrent = current->down;
1402 dcurrent = down_next) {
1403 down_next = dcurrent->down;
1404 INSIST(dcurrent->serial <= dparent->serial);
1405 if (dcurrent->serial == dparent->serial ||
1407 if (down_next != NULL)
1408 down_next->next = dparent;
1409 dparent->down = down_next;
1410 free_rdataset(rbtdb, mctx, dcurrent);
1416 * We've now eliminated all IGNORE datasets with the possible
1417 * exception of current, which we now check.
1419 if (IGNORE(current)) {
1420 down_next = current->down;
1421 if (down_next == NULL) {
1422 if (top_prev != NULL)
1423 top_prev->next = current->next;
1425 node->data = current->next;
1426 free_rdataset(rbtdb, mctx, current);
1428 * current no longer exists, so we can
1429 * just continue with the loop.
1434 * Pull up current->down, making it the new
1437 if (top_prev != NULL)
1438 top_prev->next = down_next;
1440 node->data = down_next;
1441 down_next->next = top_next;
1442 free_rdataset(rbtdb, mctx, current);
1443 current = down_next;
1448 * We now try to find the first down node less than the
1452 for (dcurrent = current->down;
1454 dcurrent = down_next) {
1455 down_next = dcurrent->down;
1456 if (dcurrent->serial < least_serial)
1462 * If there is a such an rdataset, delete it and any older
1465 if (dcurrent != NULL) {
1467 down_next = dcurrent->down;
1468 INSIST(dcurrent->serial <= least_serial);
1469 free_rdataset(rbtdb, mctx, dcurrent);
1470 dcurrent = down_next;
1471 } while (dcurrent != NULL);
1472 dparent->down = NULL;
1476 * Note. The serial number of 'current' might be less than
1477 * least_serial too, but we cannot delete it because it is
1478 * the most recent version, unless it is a NONEXISTENT
1481 if (current->down != NULL) {
1482 still_dirty = ISC_TRUE;
1486 * If this is a NONEXISTENT rdataset, we can delete it.
1488 if (NONEXISTENT(current)) {
1489 if (top_prev != NULL)
1490 top_prev->next = current->next;
1492 node->data = current->next;
1493 free_rdataset(rbtdb, mctx, current);
1503 * Clean up dead nodes. These are nodes which have no references, and
1504 * have no data. They are dead but we could not or chose not to delete
1505 * them when we deleted all the data at that node because we did not want
1506 * to wait for the tree write lock.
1508 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1511 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1512 dns_rbtnode_t *node;
1513 isc_result_t result;
1514 int count = 10; /* XXXJT: should be adjustable */
1516 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1517 while (node != NULL && count > 0) {
1518 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1521 * Since we're holding a tree write lock, it should be
1522 * impossible for this node to be referenced by others.
1524 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1525 node->data == NULL);
1527 INSIST(!ISC_LINK_LINKED(node, deadlink));
1529 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1532 result = dns_rbt_deletenode(rbtdb->tree, node,
1534 if (result != ISC_R_SUCCESS)
1535 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1536 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1537 "cleanup_dead_nodes: "
1538 "dns_rbt_deletenode: %s",
1539 isc_result_totext(result));
1540 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1546 * Caller must be holding the node lock.
1549 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1550 unsigned int lockrefs, noderefs;
1551 isc_refcount_t *lockref;
1553 INSIST(!ISC_LINK_LINKED(node, deadlink));
1554 dns_rbtnode_refincrement0(node, &noderefs);
1555 if (noderefs == 1) { /* this is the first reference to the node */
1556 lockref = &rbtdb->node_locks[node->locknum].references;
1557 isc_refcount_increment0(lockref, &lockrefs);
1558 INSIST(lockrefs != 0);
1560 INSIST(noderefs != 0);
1564 * This function is assumed to be called when a node is newly referenced
1565 * and can be in the deadnode list. In that case the node must be retrieved
1566 * from the list because it is going to be used. In addition, if the caller
1567 * happens to hold a write lock on the tree, it's a good chance to purge dead
1569 * Note: while a new reference is gained in multiple places, there are only very
1570 * few cases where the node can be in the deadnode list (only empty nodes can
1571 * have been added to the list).
1574 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1575 isc_rwlocktype_t treelocktype)
1577 isc_rwlocktype_t locktype = isc_rwlocktype_read;
1578 nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
1579 isc_boolean_t maybe_cleanup = ISC_FALSE;
1583 NODE_STRONGLOCK(nodelock);
1584 NODE_WEAKLOCK(nodelock, locktype);
1587 * Check if we can possibly cleanup the dead node. If so, upgrade
1588 * the node lock below to perform the cleanup.
1590 if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1591 treelocktype == isc_rwlocktype_write) {
1592 maybe_cleanup = ISC_TRUE;
1595 if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
1597 * Upgrade the lock and test if we still need to unlink.
1599 NODE_WEAKUNLOCK(nodelock, locktype);
1600 locktype = isc_rwlocktype_write;
1602 NODE_WEAKLOCK(nodelock, locktype);
1603 if (ISC_LINK_LINKED(node, deadlink))
1604 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1607 cleanup_dead_nodes(rbtdb, node->locknum);
1610 new_reference(rbtdb, node);
1612 NODE_WEAKUNLOCK(nodelock, locktype);
1613 NODE_STRONGUNLOCK(nodelock);
1617 * Caller must be holding the node lock; either the "strong", read or write
1618 * lock. Note that the lock must be held even when node references are
1619 * atomically modified; in that case the decrement operation itself does not
1620 * have to be protected, but we must avoid a race condition where multiple
1621 * threads are decreasing the reference to zero simultaneously and at least
1622 * one of them is going to free the node.
1623 * This function returns ISC_TRUE if and only if the node reference decreases
1626 static isc_boolean_t
1627 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1628 rbtdb_serial_t least_serial,
1629 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1630 isc_boolean_t pruning)
1632 isc_result_t result;
1633 isc_boolean_t write_locked;
1634 rbtdb_nodelock_t *nodelock;
1635 unsigned int refs, nrefs;
1636 int bucket = node->locknum;
1637 isc_boolean_t no_reference = ISC_TRUE;
1639 nodelock = &rbtdb->node_locks[bucket];
1641 /* Handle easy and typical case first. */
1642 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1643 dns_rbtnode_refdecrement(node, &nrefs);
1644 INSIST((int)nrefs >= 0);
1646 isc_refcount_decrement(&nodelock->references, &refs);
1647 INSIST((int)refs >= 0);
1649 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1652 /* Upgrade the lock? */
1653 if (nlock == isc_rwlocktype_read) {
1654 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1655 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1658 dns_rbtnode_refdecrement(node, &nrefs);
1659 INSIST((int)nrefs >= 0);
1661 /* Restore the lock? */
1662 if (nlock == isc_rwlocktype_read)
1663 NODE_WEAKDOWNGRADE(&nodelock->lock);
1668 if (IS_CACHE(rbtdb))
1669 clean_cache_node(rbtdb, node);
1671 if (least_serial == 0) {
1673 * Caller doesn't know the least serial.
1676 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1677 least_serial = rbtdb->least_serial;
1678 RBTDB_UNLOCK(&rbtdb->lock,
1679 isc_rwlocktype_read);
1681 clean_zone_node(rbtdb, node, least_serial);
1686 * Attempt to switch to a write lock on the tree. If this fails,
1687 * we will add this node to a linked list of nodes in this locking
1688 * bucket which we will free later.
1690 if (tlock != isc_rwlocktype_write) {
1692 * Locking hierarchy notwithstanding, we don't need to free
1693 * the node lock before acquiring the tree write lock because
1694 * we only do a trylock.
1696 if (tlock == isc_rwlocktype_read)
1697 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1699 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1700 isc_rwlocktype_write);
1701 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1702 result == ISC_R_LOCKBUSY);
1704 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1706 write_locked = ISC_TRUE;
1708 isc_refcount_decrement(&nodelock->references, &refs);
1709 INSIST((int)refs >= 0);
1712 * XXXDCL should this only be done for cache zones?
1714 if (node->data != NULL || node->down != NULL)
1719 * We can now delete the node.
1723 * If this node is the only one in the level it's in, deleting
1724 * this node may recursively make its parent the only node in
1725 * the parent level; if so, and if no one is currently using
1726 * the parent node, this is almost the only opportunity to
1727 * clean it up. But the recursive cleanup is not that trivial
1728 * since the child and parent may be in different lock buckets,
1729 * which would cause a lock order reversal problem. To avoid
1730 * the trouble, we'll dispatch a separate event for batch
1731 * cleaning. We need to check whether we're deleting the node
1732 * as a result of pruning to avoid infinite dispatching.
1733 * Note: pruning happens only when a task has been set for the
1734 * rbtdb. If the user of the rbtdb chooses not to set a task,
1735 * it's their responsibility to purge stale leaves (e.g. by
1736 * periodic walk-through).
1738 if (!pruning && node->parent != NULL &&
1739 node->parent->down == node && node->left == NULL &&
1740 node->right == NULL && rbtdb->task != NULL) {
1744 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1747 sizeof(isc_event_t));
1749 new_reference(rbtdb, node);
1751 attach((dns_db_t *)rbtdb, &db);
1753 isc_task_send(rbtdb->task, &ev);
1754 no_reference = ISC_FALSE;
1757 * XXX: this is a weird situation. We could
1758 * ignore this error case, but then the stale
1759 * node will unlikely be purged except via a
1760 * rare condition such as manual cleanup. So
1761 * we queue it in the deadnodes list, hoping
1762 * the memory shortage is temporary and the node
1763 * will be deleted later.
1765 isc_log_write(dns_lctx,
1766 DNS_LOGCATEGORY_DATABASE,
1767 DNS_LOGMODULE_CACHE,
1769 "decrement_reference: failed to "
1770 "allocate pruning event");
1771 INSIST(node->data == NULL);
1772 INSIST(!ISC_LINK_LINKED(node, deadlink));
1773 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1777 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1778 char printname[DNS_NAME_FORMATSIZE];
1780 isc_log_write(dns_lctx,
1781 DNS_LOGCATEGORY_DATABASE,
1782 DNS_LOGMODULE_CACHE,
1784 "decrement_reference: "
1785 "delete from rbt: %p %s",
1787 dns_rbt_formatnodename(node,
1789 sizeof(printname)));
1792 INSIST(!ISC_LINK_LINKED(node, deadlink));
1794 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1797 result = dns_rbt_deletenode(rbtdb->tree, node,
1799 if (result != ISC_R_SUCCESS) {
1800 isc_log_write(dns_lctx,
1801 DNS_LOGCATEGORY_DATABASE,
1802 DNS_LOGMODULE_CACHE,
1804 "decrement_reference: "
1805 "dns_rbt_deletenode: %s",
1806 isc_result_totext(result));
1810 INSIST(node->data == NULL);
1811 INSIST(!ISC_LINK_LINKED(node, deadlink));
1812 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1816 /* Restore the lock? */
1817 if (nlock == isc_rwlocktype_read)
1818 NODE_WEAKDOWNGRADE(&nodelock->lock);
1821 * Relock a read lock, or unlock the write lock if no lock was held.
1823 if (tlock == isc_rwlocktype_none)
1825 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1827 if (tlock == isc_rwlocktype_read)
1829 isc_rwlock_downgrade(&rbtdb->tree_lock);
1831 return (no_reference);
1835 * Prune the tree by recursively cleaning-up single leaves. In the worst
1836 * case, the number of iteration is the number of tree levels, which is at
1837 * most the maximum number of domain name labels, i.e, 127. In practice, this
1838 * should be much smaller (only a few times), and even the worst case would be
1839 * acceptable for a single event.
1842 prune_tree(isc_task_t *task, isc_event_t *event) {
1843 dns_rbtdb_t *rbtdb = event->ev_sender;
1844 dns_rbtnode_t *node = event->ev_arg;
1845 dns_rbtnode_t *parent;
1846 unsigned int locknum;
1850 isc_event_free(&event);
1852 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1853 locknum = node->locknum;
1854 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1856 parent = node->parent;
1857 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1858 isc_rwlocktype_write, ISC_TRUE);
1860 if (parent != NULL && parent->down == NULL) {
1862 * node was the only down child of the parent and has
1863 * just been removed. We'll then need to examine the
1864 * parent. Keep the lock if possible; otherwise,
1865 * release the old lock and acquire one for the parent.
1867 if (parent->locknum != locknum) {
1868 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1869 isc_rwlocktype_write);
1870 locknum = parent->locknum;
1871 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1872 isc_rwlocktype_write);
1876 * We need to gain a reference to the node before
1877 * decrementing it in the next iteration. In addition,
1878 * if the node is in the dead-nodes list, extract it
1879 * from the list beforehand as we do in
1880 * reactivate_node().
1882 if (ISC_LINK_LINKED(parent, deadlink))
1883 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1885 new_reference(rbtdb, parent);
1890 } while (node != NULL);
1891 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1892 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1894 detach((dns_db_t **)&rbtdb);
1898 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1899 rbtdb_changedlist_t *cleanup_list)
1902 * Caller must be holding the database lock.
1905 rbtdb->least_serial = version->serial;
1906 *cleanup_list = version->changed_list;
1907 ISC_LIST_INIT(version->changed_list);
1911 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1912 rbtdb_changed_t *changed, *next_changed;
1915 * If the changed record is dirty, then
1916 * an update created multiple versions of
1917 * a given rdataset. We keep this list
1918 * until we're the least open version, at
1919 * which point it's safe to get rid of any
1922 * If the changed record isn't dirty, then
1923 * we don't need it anymore since we're
1924 * committing and not rolling back.
1926 * The caller must be holding the database lock.
1928 for (changed = HEAD(version->changed_list);
1930 changed = next_changed) {
1931 next_changed = NEXT(changed, link);
1932 if (!changed->dirty) {
1933 UNLINK(version->changed_list,
1935 APPEND(*cleanup_list,
1942 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1943 dns_rdataset_t keyset;
1944 dns_rdataset_t nsecset, signsecset;
1945 dns_rdata_t rdata = DNS_RDATA_INIT;
1946 isc_boolean_t haszonekey = ISC_FALSE;
1947 isc_boolean_t hasnsec = ISC_FALSE;
1948 isc_boolean_t hasoptbit = ISC_FALSE;
1949 isc_boolean_t nsec3createflag = ISC_FALSE;
1950 isc_result_t result;
1952 dns_rdataset_init(&keyset);
1953 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1954 0, 0, &keyset, NULL);
1955 if (result == ISC_R_SUCCESS) {
1956 result = dns_rdataset_first(&keyset);
1957 while (result == ISC_R_SUCCESS) {
1958 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1959 dns_rdataset_current(&keyset, &keyrdata);
1960 if (dns_zonekey_iszonekey(&keyrdata)) {
1961 haszonekey = ISC_TRUE;
1964 result = dns_rdataset_next(&keyset);
1966 dns_rdataset_disassociate(&keyset);
1969 version->secure = dns_db_insecure;
1970 version->havensec3 = ISC_FALSE;
1974 dns_rdataset_init(&nsecset);
1975 dns_rdataset_init(&signsecset);
1976 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1977 0, 0, &nsecset, &signsecset);
1978 if (result == ISC_R_SUCCESS) {
1979 if (dns_rdataset_isassociated(&signsecset)) {
1981 result = dns_rdataset_first(&nsecset);
1982 if (result == ISC_R_SUCCESS) {
1983 dns_rdataset_current(&nsecset, &rdata);
1984 hasoptbit = dns_nsec_typepresent(&rdata,
1987 dns_rdataset_disassociate(&signsecset);
1989 dns_rdataset_disassociate(&nsecset);
1992 setnsec3parameters(db, version, &nsec3createflag);
1995 * Do we have a valid NSEC/NSEC3 chain?
1997 if (version->havensec3 || (hasnsec && !hasoptbit))
1998 version->secure = dns_db_secure;
2000 * Do we have a NSEC/NSEC3 chain under creation?
2002 else if (hasoptbit || nsec3createflag)
2003 version->secure = dns_db_partial;
2005 version->secure = dns_db_insecure;
2009 * Walk the origin node looking for NSEC3PARAM records.
2010 * Cache the nsec3 parameters.
2013 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
2014 isc_boolean_t *nsec3createflag)
2016 dns_rbtnode_t *node;
2017 dns_rdata_nsec3param_t nsec3param;
2018 dns_rdata_t rdata = DNS_RDATA_INIT;
2019 isc_region_t region;
2020 isc_result_t result;
2021 rdatasetheader_t *header, *header_next;
2022 unsigned char *raw; /* RDATASLAB */
2023 unsigned int count, length;
2024 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2026 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2027 version->havensec3 = ISC_FALSE;
2028 node = rbtdb->origin_node;
2029 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2030 isc_rwlocktype_read);
2031 for (header = node->data;
2033 header = header_next) {
2034 header_next = header->next;
2036 if (header->serial <= version->serial &&
2038 if (NONEXISTENT(header))
2042 header = header->down;
2043 } while (header != NULL);
2045 if (header != NULL &&
2046 header->type == dns_rdatatype_nsec3param) {
2048 * Find A NSEC3PARAM with a supported algorithm.
2050 raw = (unsigned char *)header + sizeof(*header);
2051 count = raw[0] * 256 + raw[1]; /* count */
2052 #if DNS_RDATASET_FIXED
2053 raw += count * 4 + 2;
2057 while (count-- > 0U) {
2058 length = raw[0] * 256 + raw[1];
2059 #if DNS_RDATASET_FIXED
2065 region.length = length;
2067 dns_rdata_fromregion(&rdata,
2068 rbtdb->common.rdclass,
2069 dns_rdatatype_nsec3param,
2071 result = dns_rdata_tostruct(&rdata,
2074 INSIST(result == ISC_R_SUCCESS);
2075 dns_rdata_reset(&rdata);
2077 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2078 !dns_nsec3_supportedhash(nsec3param.hash))
2081 #ifdef RFC5155_STRICT
2082 if (nsec3param.flags != 0)
2085 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2087 *nsec3createflag = ISC_TRUE;
2088 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2093 memcpy(version->salt, nsec3param.salt,
2094 nsec3param.salt_length);
2095 version->hash = nsec3param.hash;
2096 version->salt_length = nsec3param.salt_length;
2097 version->iterations = nsec3param.iterations;
2098 version->flags = nsec3param.flags;
2099 version->havensec3 = ISC_TRUE;
2101 * Look for a better algorithm than the
2102 * unknown test algorithm.
2104 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2110 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2111 isc_rwlocktype_read);
2112 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2116 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2117 dns_rbtdb_t *rbtdb = event->ev_arg;
2118 isc_boolean_t again = ISC_FALSE;
2119 unsigned int locknum;
2122 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2123 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2124 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2125 isc_rwlocktype_write);
2126 cleanup_dead_nodes(rbtdb, locknum);
2127 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2129 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2130 isc_rwlocktype_write);
2132 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2134 isc_task_send(task, &event);
2136 isc_event_free(&event);
2137 isc_refcount_decrement(&rbtdb->references, &refs);
2139 maybe_free_rbtdb(rbtdb);
2144 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2145 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2146 rbtdb_version_t *version, *cleanup_version, *least_greater;
2147 isc_boolean_t rollback = ISC_FALSE;
2148 rbtdb_changedlist_t cleanup_list;
2149 rdatasetheaderlist_t resigned_list;
2150 rbtdb_changed_t *changed, *next_changed;
2151 rbtdb_serial_t serial, least_serial;
2152 dns_rbtnode_t *rbtnode;
2154 rdatasetheader_t *header;
2155 isc_boolean_t writer;
2157 REQUIRE(VALID_RBTDB(rbtdb));
2158 version = (rbtdb_version_t *)*versionp;
2159 INSIST(version->rbtdb == rbtdb);
2161 cleanup_version = NULL;
2162 ISC_LIST_INIT(cleanup_list);
2163 ISC_LIST_INIT(resigned_list);
2165 isc_refcount_decrement(&version->references, &refs);
2166 if (refs > 0) { /* typical and easy case first */
2168 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2169 INSIST(!version->writer);
2170 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2175 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2176 serial = version->serial;
2177 writer = version->writer;
2178 if (version->writer) {
2181 rbtdb_version_t *cur_version;
2183 INSIST(version->commit_ok);
2184 INSIST(version == rbtdb->future_version);
2186 * The current version is going to be replaced.
2187 * Release the (likely last) reference to it from the
2188 * DB itself and unlink it from the open list.
2190 cur_version = rbtdb->current_version;
2191 isc_refcount_decrement(&cur_version->references,
2194 if (cur_version->serial == rbtdb->least_serial)
2195 INSIST(EMPTY(cur_version->changed_list));
2196 UNLINK(rbtdb->open_versions,
2199 if (EMPTY(rbtdb->open_versions)) {
2201 * We're going to become the least open
2204 make_least_version(rbtdb, version,
2208 * Some other open version is the
2209 * least version. We can't cleanup
2210 * records that were changed in this
2211 * version because the older versions
2212 * may still be in use by an open
2215 * We can, however, discard the
2216 * changed records for things that
2217 * we've added that didn't exist in
2220 cleanup_nondirty(version, &cleanup_list);
2223 * If the (soon to be former) current version
2224 * isn't being used by anyone, we can clean
2228 cleanup_version = cur_version;
2229 APPENDLIST(version->changed_list,
2230 cleanup_version->changed_list,
2234 * Become the current version.
2236 version->writer = ISC_FALSE;
2237 rbtdb->current_version = version;
2238 rbtdb->current_serial = version->serial;
2239 rbtdb->future_version = NULL;
2242 * Keep the current version in the open list, and
2243 * gain a reference for the DB itself (see the DB
2244 * creation function below). This must be the only
2245 * case where we need to increment the counter from
2246 * zero and need to use isc_refcount_increment0().
2248 isc_refcount_increment0(&version->references,
2250 INSIST(cur_ref == 1);
2251 PREPEND(rbtdb->open_versions,
2252 rbtdb->current_version, link);
2253 resigned_list = version->resigned_list;
2254 ISC_LIST_INIT(version->resigned_list);
2257 * We're rolling back this transaction.
2259 cleanup_list = version->changed_list;
2260 ISC_LIST_INIT(version->changed_list);
2261 resigned_list = version->resigned_list;
2262 ISC_LIST_INIT(version->resigned_list);
2263 rollback = ISC_TRUE;
2264 cleanup_version = version;
2265 rbtdb->future_version = NULL;
2268 if (version != rbtdb->current_version) {
2270 * There are no external or internal references
2271 * to this version and it can be cleaned up.
2273 cleanup_version = version;
2276 * Find the version with the least serial
2277 * number greater than ours.
2279 least_greater = PREV(version, link);
2280 if (least_greater == NULL)
2281 least_greater = rbtdb->current_version;
2283 INSIST(version->serial < least_greater->serial);
2285 * Is this the least open version?
2287 if (version->serial == rbtdb->least_serial) {
2289 * Yes. Install the new least open
2292 make_least_version(rbtdb,
2297 * Add any unexecuted cleanups to
2298 * those of the least greater version.
2300 APPENDLIST(least_greater->changed_list,
2301 version->changed_list,
2304 } else if (version->serial == rbtdb->least_serial)
2305 INSIST(EMPTY(version->changed_list));
2306 UNLINK(rbtdb->open_versions, version, link);
2308 least_serial = rbtdb->least_serial;
2309 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2312 * Update the zone's secure status.
2314 if (writer && commit && !IS_CACHE(rbtdb))
2315 iszonesecure(db, version, rbtdb->origin_node);
2317 if (cleanup_version != NULL) {
2318 INSIST(EMPTY(cleanup_version->changed_list));
2319 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2320 sizeof(*cleanup_version));
2324 * Commit/rollback re-signed headers.
2326 for (header = HEAD(resigned_list);
2328 header = HEAD(resigned_list)) {
2331 ISC_LIST_UNLINK(resigned_list, header, link);
2333 lock = &rbtdb->node_locks[header->node->locknum].lock;
2334 NODE_LOCK(lock, isc_rwlocktype_write);
2336 resign_insert(rbtdb, header->node->locknum, header);
2337 decrement_reference(rbtdb, header->node, least_serial,
2338 isc_rwlocktype_write, isc_rwlocktype_none,
2340 NODE_UNLOCK(lock, isc_rwlocktype_write);
2343 if (!EMPTY(cleanup_list)) {
2344 isc_event_t *event = NULL;
2345 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2347 if (rbtdb->task != NULL)
2348 event = isc_event_allocate(rbtdb->common.mctx, NULL,
2349 DNS_EVENT_RBTDEADNODES,
2350 cleanup_dead_nodes_callback,
2351 rbtdb, sizeof(isc_event_t));
2352 if (event == NULL) {
2354 * We acquire a tree write lock here in order to make
2355 * sure that stale nodes will be removed in
2356 * decrement_reference(). If we didn't have the lock,
2357 * those nodes could miss the chance to be removed
2358 * until the server stops. The write lock is
2359 * expensive, but this event should be rare enough
2360 * to justify the cost.
2362 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2363 tlock = isc_rwlocktype_write;
2366 for (changed = HEAD(cleanup_list);
2368 changed = next_changed) {
2371 next_changed = NEXT(changed, link);
2372 rbtnode = changed->node;
2373 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2375 NODE_LOCK(lock, isc_rwlocktype_write);
2377 * This is a good opportunity to purge any dead nodes,
2381 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2384 rollback_node(rbtnode, serial);
2385 decrement_reference(rbtdb, rbtnode, least_serial,
2386 isc_rwlocktype_write, tlock,
2389 NODE_UNLOCK(lock, isc_rwlocktype_write);
2391 isc_mem_put(rbtdb->common.mctx, changed,
2394 if (event != NULL) {
2395 isc_refcount_increment(&rbtdb->references, NULL);
2396 isc_task_send(rbtdb->task, &event);
2398 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2406 * Add the necessary magic for the wildcard name 'name'
2407 * to be found in 'rbtdb'.
2409 * In order for wildcard matching to work correctly in
2410 * zone_find(), we must ensure that a node for the wildcarding
2411 * level exists in the database, and has its 'find_callback'
2412 * and 'wild' bits set.
2414 * E.g. if the wildcard name is "*.sub.example." then we
2415 * must ensure that "sub.example." exists and is marked as
2419 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2420 isc_result_t result;
2421 dns_name_t foundname;
2422 dns_offsets_t offsets;
2424 dns_rbtnode_t *node = NULL;
2426 dns_name_init(&foundname, offsets);
2427 n = dns_name_countlabels(name);
2430 dns_name_getlabelsequence(name, 1, n, &foundname);
2431 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2432 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2435 node->find_callback = 1;
2437 return (ISC_R_SUCCESS);
2441 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2442 isc_result_t result;
2443 dns_name_t foundname;
2444 dns_offsets_t offsets;
2445 unsigned int n, l, i;
2447 dns_name_init(&foundname, offsets);
2448 n = dns_name_countlabels(name);
2449 l = dns_name_countlabels(&rbtdb->common.origin);
2452 dns_rbtnode_t *node = NULL; /* dummy */
2453 dns_name_getlabelsequence(name, n - i, i, &foundname);
2454 if (dns_name_iswildcard(&foundname)) {
2455 result = add_wildcard_magic(rbtdb, &foundname);
2456 if (result != ISC_R_SUCCESS)
2458 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2460 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2466 return (ISC_R_SUCCESS);
2470 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
2471 isc_boolean_t create, dns_dbnode_t **nodep)
2473 dns_rbtnode_t *node = NULL;
2474 dns_name_t nodename;
2475 isc_result_t result;
2476 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2478 INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
2480 dns_name_init(&nodename, NULL);
2481 RWLOCK(&rbtdb->tree_lock, locktype);
2482 result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
2483 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2484 if (result != ISC_R_SUCCESS) {
2485 RWUNLOCK(&rbtdb->tree_lock, locktype);
2487 if (result == DNS_R_PARTIALMATCH)
2488 result = ISC_R_NOTFOUND;
2492 * It would be nice to try to upgrade the lock instead of
2493 * unlocking then relocking.
2495 locktype = isc_rwlocktype_write;
2496 RWLOCK(&rbtdb->tree_lock, locktype);
2498 result = dns_rbt_addnode(tree, name, &node);
2499 if (result == ISC_R_SUCCESS) {
2500 dns_rbt_namefromnode(node, &nodename);
2501 #ifdef DNS_RBT_USEHASH
2502 node->locknum = node->hashval % rbtdb->node_lock_count;
2504 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2505 rbtdb->node_lock_count;
2507 if (tree == rbtdb->tree) {
2509 add_empty_wildcards(rbtdb, name);
2511 if (dns_name_iswildcard(name)) {
2512 result = add_wildcard_magic(rbtdb,
2514 if (result != ISC_R_SUCCESS) {
2515 RWUNLOCK(&rbtdb->tree_lock,
2521 if (tree == rbtdb->nsec3)
2523 } else if (result != ISC_R_EXISTS) {
2524 RWUNLOCK(&rbtdb->tree_lock, locktype);
2529 if (tree == rbtdb->nsec3)
2530 INSIST(node->nsec3 == 1);
2532 reactivate_node(rbtdb, node, locktype);
2533 RWUNLOCK(&rbtdb->tree_lock, locktype);
2535 *nodep = (dns_dbnode_t *)node;
2537 return (ISC_R_SUCCESS);
2541 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2542 dns_dbnode_t **nodep)
2544 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2546 REQUIRE(VALID_RBTDB(rbtdb));
2548 return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
2552 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2553 dns_dbnode_t **nodep)
2555 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2557 REQUIRE(VALID_RBTDB(rbtdb));
2559 return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
2563 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2564 rbtdb_search_t *search = arg;
2565 rdatasetheader_t *header, *header_next;
2566 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2567 rdatasetheader_t *found;
2568 isc_result_t result;
2569 dns_rbtnode_t *onode;
2572 * We only want to remember the topmost zone cut, since it's the one
2573 * that counts, so we'll just continue if we've already found a
2576 if (search->zonecut != NULL)
2577 return (DNS_R_CONTINUE);
2580 result = DNS_R_CONTINUE;
2581 onode = search->rbtdb->origin_node;
2583 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2584 isc_rwlocktype_read);
2587 * Look for an NS or DNAME rdataset active in our version.
2590 dname_header = NULL;
2591 sigdname_header = NULL;
2592 for (header = node->data; header != NULL; header = header_next) {
2593 header_next = header->next;
2594 if (header->type == dns_rdatatype_ns ||
2595 header->type == dns_rdatatype_dname ||
2596 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2598 if (header->serial <= search->serial &&
2601 * Is this a "this rdataset doesn't
2604 if (NONEXISTENT(header))
2608 header = header->down;
2609 } while (header != NULL);
2610 if (header != NULL) {
2611 if (header->type == dns_rdatatype_dname)
2612 dname_header = header;
2613 else if (header->type ==
2614 RBTDB_RDATATYPE_SIGDNAME)
2615 sigdname_header = header;
2616 else if (node != onode ||
2617 IS_STUB(search->rbtdb)) {
2619 * We've found an NS rdataset that
2620 * isn't at the origin node. We check
2621 * that they're not at the origin node,
2622 * because otherwise we'd erroneously
2623 * treat the zone top as if it were
2633 * Did we find anything?
2635 if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2636 ns_header != NULL) {
2638 * Note that NS has precedence over DNAME if both exist
2639 * in a zone. Otherwise DNAME take precedence over NS.
2642 search->zonecut_sigrdataset = NULL;
2643 } else if (dname_header != NULL) {
2644 found = dname_header;
2645 search->zonecut_sigrdataset = sigdname_header;
2646 } else if (ns_header != NULL) {
2648 search->zonecut_sigrdataset = NULL;
2651 if (found != NULL) {
2653 * We increment the reference count on node to ensure that
2654 * search->zonecut_rdataset will still be valid later.
2656 new_reference(search->rbtdb, node);
2657 search->zonecut = node;
2658 search->zonecut_rdataset = found;
2659 search->need_cleanup = ISC_TRUE;
2661 * Since we've found a zonecut, anything beneath it is
2662 * glue and is not subject to wildcard matching, so we
2663 * may clear search->wild.
2665 search->wild = ISC_FALSE;
2666 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2668 * If the caller does not want to find glue, then
2669 * this is the best answer and the search should
2672 result = DNS_R_PARTIALMATCH;
2677 * The search will continue beneath the zone cut.
2678 * This may or may not be the best match. In case it
2679 * is, we need to remember the node name.
2681 zcname = dns_fixedname_name(&search->zonecut_name);
2682 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2684 search->copy_name = ISC_TRUE;
2688 * There is no zonecut at this node which is active in this
2691 * If this is a "wild" node and the caller hasn't disabled
2692 * wildcard matching, remember that we've seen a wild node
2693 * in case we need to go searching for wildcard matches
2696 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2697 search->wild = ISC_TRUE;
2700 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2701 isc_rwlocktype_read);
2707 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2708 rdatasetheader_t *header, isc_stdtime_t now,
2709 dns_rdataset_t *rdataset)
2711 unsigned char *raw; /* RDATASLAB */
2714 * Caller must be holding the node reader lock.
2715 * XXXJT: technically, we need a writer lock, since we'll increment
2716 * the header count below. However, since the actual counter value
2717 * doesn't matter, we prioritize performance here. (We may want to
2718 * use atomic increment when available).
2721 if (rdataset == NULL)
2724 new_reference(rbtdb, node);
2726 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2728 rdataset->methods = &rdataset_methods;
2729 rdataset->rdclass = rbtdb->common.rdclass;
2730 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2731 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2732 rdataset->ttl = header->rdh_ttl - now;
2733 rdataset->trust = header->trust;
2734 if (NEGATIVE(header))
2735 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2736 if (NXDOMAIN(header))
2737 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2739 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2740 rdataset->private1 = rbtdb;
2741 rdataset->private2 = node;
2742 raw = (unsigned char *)header + sizeof(*header);
2743 rdataset->private3 = raw;
2744 rdataset->count = header->count++;
2745 if (rdataset->count == ISC_UINT32_MAX)
2746 rdataset->count = 0;
2749 * Reset iterator state.
2751 rdataset->privateuint4 = 0;
2752 rdataset->private5 = NULL;
2755 * Add noqname proof.
2757 rdataset->private6 = header->noqname;
2758 if (rdataset->private6 != NULL)
2759 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2760 rdataset->private7 = header->closest;
2761 if (rdataset->private7 != NULL)
2762 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2765 * Copy out re-signing information.
2767 if (RESIGN(header)) {
2768 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2769 rdataset->resign = header->resign;
2771 rdataset->resign = 0;
2774 static inline isc_result_t
2775 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2776 dns_name_t *foundname, dns_rdataset_t *rdataset,
2777 dns_rdataset_t *sigrdataset)
2779 isc_result_t result;
2781 rbtdb_rdatatype_t type;
2782 dns_rbtnode_t *node;
2785 * The caller MUST NOT be holding any node locks.
2788 node = search->zonecut;
2789 type = search->zonecut_rdataset->type;
2792 * If we have to set foundname, we do it before anything else.
2793 * If we were to set foundname after we had set nodep or bound the
2794 * rdataset, then we'd have to undo that work if dns_name_copy()
2795 * failed. By setting foundname first, there's nothing to undo if
2798 if (foundname != NULL && search->copy_name) {
2799 zcname = dns_fixedname_name(&search->zonecut_name);
2800 result = dns_name_copy(zcname, foundname, NULL);
2801 if (result != ISC_R_SUCCESS)
2804 if (nodep != NULL) {
2806 * Note that we don't have to increment the node's reference
2807 * count here because we're going to use the reference we
2808 * already have in the search block.
2811 search->need_cleanup = ISC_FALSE;
2813 if (rdataset != NULL) {
2814 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2815 isc_rwlocktype_read);
2816 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2817 search->now, rdataset);
2818 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2819 bind_rdataset(search->rbtdb, node,
2820 search->zonecut_sigrdataset,
2821 search->now, sigrdataset);
2822 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2823 isc_rwlocktype_read);
2826 if (type == dns_rdatatype_dname)
2827 return (DNS_R_DNAME);
2828 return (DNS_R_DELEGATION);
2831 static inline isc_boolean_t
2832 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2833 dns_rbtnode_t *node)
2835 unsigned char *raw; /* RDATASLAB */
2836 unsigned int count, size;
2838 isc_boolean_t valid = ISC_FALSE;
2839 dns_offsets_t offsets;
2840 isc_region_t region;
2841 rdatasetheader_t *header;
2844 * No additional locking is required.
2848 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2849 * if it occurs at a zone cut, but is not valid below it.
2851 if (type == dns_rdatatype_ns) {
2852 if (node != search->zonecut) {
2855 } else if (type != dns_rdatatype_a &&
2856 type != dns_rdatatype_aaaa &&
2857 type != dns_rdatatype_a6) {
2861 header = search->zonecut_rdataset;
2862 raw = (unsigned char *)header + sizeof(*header);
2863 count = raw[0] * 256 + raw[1];
2864 #if DNS_RDATASET_FIXED
2865 raw += 2 + (4 * count);
2872 size = raw[0] * 256 + raw[1];
2873 #if DNS_RDATASET_FIXED
2879 region.length = size;
2882 * XXX Until we have rdata structures, we have no choice but
2883 * to directly access the rdata format.
2885 dns_name_init(&ns_name, offsets);
2886 dns_name_fromregion(&ns_name, ®ion);
2887 if (dns_name_compare(&ns_name, name) == 0) {
2896 static inline isc_boolean_t
2897 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2900 dns_fixedname_t fnext;
2901 dns_fixedname_t forigin;
2906 dns_rbtnode_t *node;
2907 isc_result_t result;
2908 isc_boolean_t answer = ISC_FALSE;
2909 rdatasetheader_t *header;
2911 rbtdb = search->rbtdb;
2913 dns_name_init(&prefix, NULL);
2914 dns_fixedname_init(&fnext);
2915 next = dns_fixedname_name(&fnext);
2916 dns_fixedname_init(&forigin);
2917 origin = dns_fixedname_name(&forigin);
2919 result = dns_rbtnodechain_next(chain, NULL, NULL);
2920 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2922 result = dns_rbtnodechain_current(chain, &prefix,
2924 if (result != ISC_R_SUCCESS)
2926 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2927 isc_rwlocktype_read);
2928 for (header = node->data;
2930 header = header->next) {
2931 if (header->serial <= search->serial &&
2932 !IGNORE(header) && EXISTS(header))
2935 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2936 isc_rwlocktype_read);
2939 result = dns_rbtnodechain_next(chain, NULL, NULL);
2941 if (result == ISC_R_SUCCESS)
2942 result = dns_name_concatenate(&prefix, origin, next, NULL);
2943 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2948 static inline isc_boolean_t
2949 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2950 dns_fixedname_t fnext;
2951 dns_fixedname_t forigin;
2952 dns_fixedname_t fprev;
2960 dns_rbtnode_t *node;
2961 dns_rbtnodechain_t chain;
2962 isc_boolean_t check_next = ISC_TRUE;
2963 isc_boolean_t check_prev = ISC_TRUE;
2964 isc_boolean_t answer = ISC_FALSE;
2965 isc_result_t result;
2966 rdatasetheader_t *header;
2969 rbtdb = search->rbtdb;
2971 dns_name_init(&name, NULL);
2972 dns_name_init(&tname, NULL);
2973 dns_name_init(&rname, NULL);
2974 dns_fixedname_init(&fnext);
2975 next = dns_fixedname_name(&fnext);
2976 dns_fixedname_init(&fprev);
2977 prev = dns_fixedname_name(&fprev);
2978 dns_fixedname_init(&forigin);
2979 origin = dns_fixedname_name(&forigin);
2982 * Find if qname is at or below a empty node.
2983 * Use our own copy of the chain.
2986 chain = search->chain;
2989 result = dns_rbtnodechain_current(&chain, &name,
2991 if (result != ISC_R_SUCCESS)
2993 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2994 isc_rwlocktype_read);
2995 for (header = node->data;
2997 header = header->next) {
2998 if (header->serial <= search->serial &&
2999 !IGNORE(header) && EXISTS(header))
3002 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3003 isc_rwlocktype_read);
3006 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3007 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3008 if (result == ISC_R_SUCCESS)
3009 result = dns_name_concatenate(&name, origin, prev, NULL);
3010 if (result != ISC_R_SUCCESS)
3011 check_prev = ISC_FALSE;
3013 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3014 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3016 result = dns_rbtnodechain_current(&chain, &name,
3018 if (result != ISC_R_SUCCESS)
3020 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3021 isc_rwlocktype_read);
3022 for (header = node->data;
3024 header = header->next) {
3025 if (header->serial <= search->serial &&
3026 !IGNORE(header) && EXISTS(header))
3029 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3030 isc_rwlocktype_read);
3033 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3035 if (result == ISC_R_SUCCESS)
3036 result = dns_name_concatenate(&name, origin, next, NULL);
3037 if (result != ISC_R_SUCCESS)
3038 check_next = ISC_FALSE;
3040 dns_name_clone(qname, &rname);
3043 * Remove the wildcard label to find the terminal name.
3045 n = dns_name_countlabels(wname);
3046 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3049 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3050 (check_next && dns_name_issubdomain(next, &rname))) {
3055 * Remove the left hand label.
3057 n = dns_name_countlabels(&rname);
3058 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3059 } while (!dns_name_equal(&rname, &tname));
3063 static inline isc_result_t
3064 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3068 dns_rbtnode_t *node, *level_node, *wnode;
3069 rdatasetheader_t *header;
3070 isc_result_t result = ISC_R_NOTFOUND;
3073 dns_fixedname_t fwname;
3075 isc_boolean_t done, wild, active;
3076 dns_rbtnodechain_t wchain;
3079 * Caller must be holding the tree lock and MUST NOT be holding
3084 * Examine each ancestor level. If the level's wild bit
3085 * is set, then construct the corresponding wildcard name and
3086 * search for it. If the wildcard node exists, and is active in
3087 * this version, we're done. If not, then we next check to see
3088 * if the ancestor is active in this version. If so, then there
3089 * can be no possible wildcard match and again we're done. If not,
3090 * continue the search.
3093 rbtdb = search->rbtdb;
3094 i = search->chain.level_matches;
3098 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3099 isc_rwlocktype_read);
3102 * First we try to figure out if this node is active in
3103 * the search's version. We do this now, even though we
3104 * may not need the information, because it simplifies the
3105 * locking and code flow.
3107 for (header = node->data;
3109 header = header->next) {
3110 if (header->serial <= search->serial &&
3111 !IGNORE(header) && EXISTS(header))
3124 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3125 isc_rwlocktype_read);
3129 * Construct the wildcard name for this level.
3131 dns_name_init(&name, NULL);
3132 dns_rbt_namefromnode(node, &name);
3133 dns_fixedname_init(&fwname);
3134 wname = dns_fixedname_name(&fwname);
3135 result = dns_name_concatenate(dns_wildcardname, &name,
3138 while (result == ISC_R_SUCCESS && j != 0) {
3140 level_node = search->chain.levels[j];
3141 dns_name_init(&name, NULL);
3142 dns_rbt_namefromnode(level_node, &name);
3143 result = dns_name_concatenate(wname,
3148 if (result != ISC_R_SUCCESS)
3152 dns_rbtnodechain_init(&wchain, NULL);
3153 result = dns_rbt_findnode(rbtdb->tree, wname,
3154 NULL, &wnode, &wchain,
3155 DNS_RBTFIND_EMPTYDATA,
3157 if (result == ISC_R_SUCCESS) {
3161 * We have found the wildcard node. If it
3162 * is active in the search's version, we're
3165 lock = &rbtdb->node_locks[wnode->locknum].lock;
3166 NODE_LOCK(lock, isc_rwlocktype_read);
3167 for (header = wnode->data;
3169 header = header->next) {
3170 if (header->serial <= search->serial &&
3171 !IGNORE(header) && EXISTS(header))
3174 NODE_UNLOCK(lock, isc_rwlocktype_read);
3175 if (header != NULL ||
3176 activeempty(search, &wchain, wname)) {
3177 if (activeemtpynode(search, qname,
3179 return (ISC_R_NOTFOUND);
3182 * The wildcard node is active!
3184 * Note: result is still ISC_R_SUCCESS
3185 * so we don't have to set it.
3190 } else if (result != ISC_R_NOTFOUND &&
3191 result != DNS_R_PARTIALMATCH) {
3193 * An error has occurred. Bail out.
3201 * The level node is active. Any wildcarding
3202 * present at higher levels has no
3203 * effect and we're done.
3205 result = ISC_R_NOTFOUND;
3211 node = search->chain.levels[i];
3219 static isc_boolean_t
3220 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3222 dns_rdata_t rdata = DNS_RDATA_INIT;
3223 dns_rdata_nsec3_t nsec3;
3224 unsigned char *raw; /* RDATASLAB */
3225 unsigned int rdlen, count;
3226 isc_region_t region;
3227 isc_result_t result;
3229 REQUIRE(header->type == dns_rdatatype_nsec3);
3231 raw = (unsigned char *)header + sizeof(*header);
3232 count = raw[0] * 256 + raw[1]; /* count */
3233 #if DNS_RDATASET_FIXED
3234 raw += count * 4 + 2;
3238 while (count-- > 0) {
3239 rdlen = raw[0] * 256 + raw[1];
3240 #if DNS_RDATASET_FIXED
3246 region.length = rdlen;
3247 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3248 dns_rdatatype_nsec3, ®ion);
3250 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3251 INSIST(result == ISC_R_SUCCESS);
3252 if (nsec3.hash == search->rbtversion->hash &&
3253 nsec3.iterations == search->rbtversion->iterations &&
3254 nsec3.salt_length == search->rbtversion->salt_length &&
3255 memcmp(nsec3.salt, search->rbtversion->salt,
3256 nsec3.salt_length) == 0)
3258 dns_rdata_reset(&rdata);
3264 * Find node of the NSEC/NSEC3 record that is 'name'.
3266 static inline isc_result_t
3267 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3268 dns_name_t *foundname, dns_rdataset_t *rdataset,
3269 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3270 dns_db_secure_t secure)
3272 dns_rbtnode_t *node;
3273 rdatasetheader_t *header, *header_next, *found, *foundsig;
3274 isc_boolean_t empty_node;
3275 isc_result_t result;
3276 dns_fixedname_t fname, forigin;
3277 dns_name_t *name, *origin;
3278 dns_rdatatype_t type;
3279 rbtdb_rdatatype_t sigtype;
3280 isc_boolean_t wraps;
3281 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3283 if (tree == search->rbtdb->nsec3) {
3284 type = dns_rdatatype_nsec3;
3285 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3288 type = dns_rdatatype_nsec;
3289 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3296 dns_fixedname_init(&fname);
3297 name = dns_fixedname_name(&fname);
3298 dns_fixedname_init(&forigin);
3299 origin = dns_fixedname_name(&forigin);
3300 result = dns_rbtnodechain_current(&search->chain, name,
3302 if (result != ISC_R_SUCCESS)
3304 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3305 isc_rwlocktype_read);
3308 empty_node = ISC_TRUE;
3309 for (header = node->data;
3311 header = header_next) {
3312 header_next = header->next;
3314 * Look for an active, extant NSEC or RRSIG NSEC.
3317 if (header->serial <= search->serial &&
3320 * Is this a "this rdataset doesn't
3323 if (NONEXISTENT(header))
3327 header = header->down;
3328 } while (header != NULL);
3329 if (header != NULL) {
3331 * We now know that there is at least one
3332 * active rdataset at this node.
3334 empty_node = ISC_FALSE;
3335 if (header->type == type) {
3337 if (foundsig != NULL)
3339 } else if (header->type == sigtype) {
3347 if (found != NULL && search->rbtversion->havensec3 &&
3348 found->type == dns_rdatatype_nsec3 &&
3349 !matchparams(found, search)) {
3350 empty_node = ISC_TRUE;
3353 result = dns_rbtnodechain_prev(&search->chain,
3355 } else if (found != NULL &&
3356 (foundsig != NULL || !need_sig))
3359 * We've found the right NSEC/NSEC3 record.
3361 * Note: for this to really be the right
3362 * NSEC record, it's essential that the NSEC
3363 * records of any nodes obscured by a zone
3364 * cut have been removed; we assume this is
3367 result = dns_name_concatenate(name, origin,
3369 if (result == ISC_R_SUCCESS) {
3370 if (nodep != NULL) {
3371 new_reference(search->rbtdb,
3375 bind_rdataset(search->rbtdb, node,
3378 if (foundsig != NULL)
3379 bind_rdataset(search->rbtdb,
3385 } else if (found == NULL && foundsig == NULL) {
3387 * This node is active, but has no NSEC or
3388 * RRSIG NSEC. That means it's glue or
3389 * other obscured zone data that isn't
3390 * relevant for our search. Treat the
3391 * node as if it were empty and keep looking.
3393 empty_node = ISC_TRUE;
3394 result = dns_rbtnodechain_prev(&search->chain,
3398 * We found an active node, but either the
3399 * NSEC or the RRSIG NSEC is missing. This
3402 result = DNS_R_BADDB;
3406 * This node isn't active. We've got to keep
3409 result = dns_rbtnodechain_prev(&search->chain, NULL,
3412 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3413 isc_rwlocktype_read);
3414 } while (empty_node && result == ISC_R_SUCCESS);
3416 if (result == ISC_R_NOMORE && wraps) {
3417 result = dns_rbtnodechain_last(&search->chain, tree,
3419 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3426 * If the result is ISC_R_NOMORE, then we got to the beginning of
3427 * the database and didn't find a NSEC record. This shouldn't
3430 if (result == ISC_R_NOMORE)
3431 result = DNS_R_BADDB;
3437 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3438 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3439 dns_dbnode_t **nodep, dns_name_t *foundname,
3440 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3442 dns_rbtnode_t *node = NULL;
3443 isc_result_t result;
3444 rbtdb_search_t search;
3445 isc_boolean_t cname_ok = ISC_TRUE;
3446 isc_boolean_t close_version = ISC_FALSE;
3447 isc_boolean_t maybe_zonecut = ISC_FALSE;
3448 isc_boolean_t at_zonecut = ISC_FALSE;
3450 isc_boolean_t empty_node;
3451 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3452 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3453 rbtdb_rdatatype_t sigtype;
3454 isc_boolean_t active;
3455 dns_rbtnodechain_t chain;
3459 search.rbtdb = (dns_rbtdb_t *)db;
3461 REQUIRE(VALID_RBTDB(search.rbtdb));
3462 INSIST(version == NULL ||
3463 ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
3466 * We don't care about 'now'.
3471 * If the caller didn't supply a version, attach to the current
3474 if (version == NULL) {
3475 currentversion(db, &version);
3476 close_version = ISC_TRUE;
3479 search.rbtversion = version;
3480 search.serial = search.rbtversion->serial;
3481 search.options = options;
3482 search.copy_name = ISC_FALSE;
3483 search.need_cleanup = ISC_FALSE;
3484 search.wild = ISC_FALSE;
3485 search.zonecut = NULL;
3486 dns_fixedname_init(&search.zonecut_name);
3487 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3491 * 'wild' will be true iff. we've matched a wildcard.
3495 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3498 * Search down from the root of the tree. If, while going down, we
3499 * encounter a callback node, zone_zonecut_callback() will search the
3500 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3502 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3504 result = dns_rbt_findnode(tree, name, foundname, &node,
3505 &search.chain, DNS_RBTFIND_EMPTYDATA,
3506 zone_zonecut_callback, &search);
3508 if (result == DNS_R_PARTIALMATCH) {
3510 if (search.zonecut != NULL) {
3511 result = setup_delegation(&search, nodep, foundname,
3512 rdataset, sigrdataset);
3518 * At least one of the levels in the search chain
3519 * potentially has a wildcard. For each such level,
3520 * we must see if there's a matching wildcard active
3521 * in the current version.
3523 result = find_wildcard(&search, &node, name);
3524 if (result == ISC_R_SUCCESS) {
3525 result = dns_name_copy(name, foundname, NULL);
3526 if (result != ISC_R_SUCCESS)
3531 else if (result != ISC_R_NOTFOUND)
3535 chain = search.chain;
3536 active = activeempty(&search, &chain, name);
3539 * If we're here, then the name does not exist, is not
3540 * beneath a zonecut, and there's no matching wildcard.
3542 if ((search.rbtversion->secure == dns_db_secure &&
3543 !search.rbtversion->havensec3) ||
3544 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3545 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3547 result = find_closest_nsec(&search, nodep, foundname,
3548 rdataset, sigrdataset, tree,
3549 search.rbtversion->secure);
3550 if (result == ISC_R_SUCCESS)
3551 result = active ? DNS_R_EMPTYNAME :
3554 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3556 } else if (result != ISC_R_SUCCESS)
3561 * We have found a node whose name is the desired name, or we
3562 * have matched a wildcard.
3565 if (search.zonecut != NULL) {
3567 * If we're beneath a zone cut, we don't want to look for
3568 * CNAMEs because they're not legitimate zone glue.
3570 cname_ok = ISC_FALSE;
3573 * The node may be a zone cut itself. If it might be one,
3574 * make sure we check for it later.
3576 * DS records live above the zone cut in ordinary zone so
3577 * we want to ignore any referral.
3579 * Stub zones don't have anything "above" the delgation so
3580 * we always return a referral.
3582 if (node->find_callback &&
3583 ((node != search.rbtdb->origin_node &&
3584 !dns_rdatatype_atparent(type)) ||
3585 IS_STUB(search.rbtdb)))
3586 maybe_zonecut = ISC_TRUE;
3590 * Certain DNSSEC types are not subject to CNAME matching
3591 * (RFC4035, section 2.5 and RFC3007).
3593 * We don't check for RRSIG, because we don't store RRSIG records
3596 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3597 cname_ok = ISC_FALSE;
3600 * We now go looking for rdata...
3603 lock = &search.rbtdb->node_locks[node->locknum].lock;
3604 NODE_LOCK(lock, isc_rwlocktype_read);
3608 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3612 empty_node = ISC_TRUE;
3613 for (header = node->data; header != NULL; header = header_next) {
3614 header_next = header->next;
3616 * Look for an active, extant rdataset.
3619 if (header->serial <= search.serial &&
3622 * Is this a "this rdataset doesn't
3625 if (NONEXISTENT(header))
3629 header = header->down;
3630 } while (header != NULL);
3631 if (header != NULL) {
3633 * We now know that there is at least one active
3634 * rdataset at this node.
3636 empty_node = ISC_FALSE;
3639 * Do special zone cut handling, if requested.
3641 if (maybe_zonecut &&
3642 header->type == dns_rdatatype_ns) {
3644 * We increment the reference count on node to
3645 * ensure that search->zonecut_rdataset will
3646 * still be valid later.
3648 new_reference(search.rbtdb, node);
3649 search.zonecut = node;
3650 search.zonecut_rdataset = header;
3651 search.zonecut_sigrdataset = NULL;
3652 search.need_cleanup = ISC_TRUE;
3653 maybe_zonecut = ISC_FALSE;
3654 at_zonecut = ISC_TRUE;
3656 * It is not clear if KEY should still be
3657 * allowed at the parent side of the zone
3658 * cut or not. It is needed for RFC3007
3659 * validated updates.
3661 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3662 && type != dns_rdatatype_nsec
3663 && type != dns_rdatatype_key) {
3665 * Glue is not OK, but any answer we
3666 * could return would be glue. Return
3672 if (found != NULL && foundsig != NULL)
3678 * If the NSEC3 record doesn't match the chain
3679 * we are using behave as if it isn't here.
3681 if (header->type == dns_rdatatype_nsec3 &&
3682 !matchparams(header, &search)) {
3683 NODE_UNLOCK(lock, isc_rwlocktype_read);
3687 * If we found a type we were looking for,
3690 if (header->type == type ||
3691 type == dns_rdatatype_any ||
3692 (header->type == dns_rdatatype_cname &&
3695 * We've found the answer!
3698 if (header->type == dns_rdatatype_cname &&
3701 * We may be finding a CNAME instead
3702 * of the desired type.
3704 * If we've already got the CNAME RRSIG,
3705 * use it, otherwise change sigtype
3706 * so that we find it.
3708 if (cnamesig != NULL)
3709 foundsig = cnamesig;
3712 RBTDB_RDATATYPE_SIGCNAME;
3715 * If we've got all we need, end the search.
3717 if (!maybe_zonecut && foundsig != NULL)
3719 } else if (header->type == sigtype) {
3721 * We've found the RRSIG rdataset for our
3722 * target type. Remember it.
3726 * If we've got all we need, end the search.
3728 if (!maybe_zonecut && found != NULL)
3730 } else if (header->type == dns_rdatatype_nsec &&
3731 !search.rbtversion->havensec3) {
3733 * Remember a NSEC rdataset even if we're
3734 * not specifically looking for it, because
3735 * we might need it later.
3737 nsecheader = header;
3738 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3739 !search.rbtversion->havensec3) {
3741 * If we need the NSEC rdataset, we'll also
3742 * need its signature.
3745 } else if (cname_ok &&
3746 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3748 * If we get a CNAME match, we'll also need
3758 * We have an exact match for the name, but there are no
3759 * active rdatasets in the desired version. That means that
3760 * this node doesn't exist in the desired version, and that
3761 * we really have a partial match.
3764 NODE_UNLOCK(lock, isc_rwlocktype_read);
3770 * If we didn't find what we were looking for...
3772 if (found == NULL) {
3773 if (search.zonecut != NULL) {
3775 * We were trying to find glue at a node beneath a
3776 * zone cut, but didn't.
3778 * Return the delegation.
3780 NODE_UNLOCK(lock, isc_rwlocktype_read);
3781 result = setup_delegation(&search, nodep, foundname,
3782 rdataset, sigrdataset);
3786 * The desired type doesn't exist.
3788 result = DNS_R_NXRRSET;
3789 if (search.rbtversion->secure == dns_db_secure &&
3790 !search.rbtversion->havensec3 &&
3791 (nsecheader == NULL || nsecsig == NULL)) {
3793 * The zone is secure but there's no NSEC,
3794 * or the NSEC has no signature!
3797 result = DNS_R_BADDB;
3801 NODE_UNLOCK(lock, isc_rwlocktype_read);
3802 result = find_closest_nsec(&search, nodep, foundname,
3803 rdataset, sigrdataset,
3805 search.rbtversion->secure);
3806 if (result == ISC_R_SUCCESS)
3807 result = DNS_R_EMPTYWILD;
3810 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3814 * There's no NSEC record, and we were told
3817 result = DNS_R_BADDB;
3820 if (nodep != NULL) {
3821 new_reference(search.rbtdb, node);
3824 if ((search.rbtversion->secure == dns_db_secure &&
3825 !search.rbtversion->havensec3) ||
3826 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3828 bind_rdataset(search.rbtdb, node, nsecheader,
3830 if (nsecsig != NULL)
3831 bind_rdataset(search.rbtdb, node,
3832 nsecsig, 0, sigrdataset);
3835 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3840 * We found what we were looking for, or we found a CNAME.
3843 if (type != found->type &&
3844 type != dns_rdatatype_any &&
3845 found->type == dns_rdatatype_cname) {
3847 * We weren't doing an ANY query and we found a CNAME instead
3848 * of the type we were looking for, so we need to indicate
3849 * that result to the caller.
3851 result = DNS_R_CNAME;
3852 } else if (search.zonecut != NULL) {
3854 * If we're beneath a zone cut, we must indicate that the
3855 * result is glue, unless we're actually at the zone cut
3856 * and the type is NSEC or KEY.
3858 if (search.zonecut == node) {
3860 * It is not clear if KEY should still be
3861 * allowed at the parent side of the zone
3862 * cut or not. It is needed for RFC3007
3863 * validated updates.
3865 if (type == dns_rdatatype_nsec ||
3866 type == dns_rdatatype_nsec3 ||
3867 type == dns_rdatatype_key)
3868 result = ISC_R_SUCCESS;
3869 else if (type == dns_rdatatype_any)
3870 result = DNS_R_ZONECUT;
3872 result = DNS_R_GLUE;
3874 result = DNS_R_GLUE;
3876 * We might have found data that isn't glue, but was occluded
3877 * by a dynamic update. If the caller cares about this, they
3878 * will have told us to validate glue.
3880 * XXX We should cache the glue validity state!
3882 if (result == DNS_R_GLUE &&
3883 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3884 !valid_glue(&search, foundname, type, node)) {
3885 NODE_UNLOCK(lock, isc_rwlocktype_read);
3886 result = setup_delegation(&search, nodep, foundname,
3887 rdataset, sigrdataset);
3892 * An ordinary successful query!
3894 result = ISC_R_SUCCESS;
3897 if (nodep != NULL) {
3899 new_reference(search.rbtdb, node);
3901 search.need_cleanup = ISC_FALSE;
3905 if (type != dns_rdatatype_any) {
3906 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3907 if (foundsig != NULL)
3908 bind_rdataset(search.rbtdb, node, foundsig, 0,
3913 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3916 NODE_UNLOCK(lock, isc_rwlocktype_read);
3919 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3922 * If we found a zonecut but aren't going to use it, we have to
3925 if (search.need_cleanup) {
3926 node = search.zonecut;
3927 INSIST(node != NULL);
3928 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3930 NODE_LOCK(lock, isc_rwlocktype_read);
3931 decrement_reference(search.rbtdb, node, 0,
3932 isc_rwlocktype_read, isc_rwlocktype_none,
3934 NODE_UNLOCK(lock, isc_rwlocktype_read);
3938 closeversion(db, &version, ISC_FALSE);
3940 dns_rbtnodechain_reset(&search.chain);
3946 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3947 isc_stdtime_t now, dns_dbnode_t **nodep,
3948 dns_name_t *foundname,
3949 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3958 UNUSED(sigrdataset);
3960 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3962 return (ISC_R_NOTIMPLEMENTED);
3966 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3967 rbtdb_search_t *search = arg;
3968 rdatasetheader_t *header, *header_prev, *header_next;
3969 rdatasetheader_t *dname_header, *sigdname_header;
3970 isc_result_t result;
3972 isc_rwlocktype_t locktype;
3976 REQUIRE(search->zonecut == NULL);
3979 * Keep compiler silent.
3983 lock = &(search->rbtdb->node_locks[node->locknum].lock);
3984 locktype = isc_rwlocktype_read;
3985 NODE_LOCK(lock, locktype);
3988 * Look for a DNAME or RRSIG DNAME rdataset.
3990 dname_header = NULL;
3991 sigdname_header = NULL;
3993 for (header = node->data; header != NULL; header = header_next) {
3994 header_next = header->next;
3995 if (header->rdh_ttl <= search->now) {
3997 * This rdataset is stale. If no one else is
3998 * using the node, we can clean it up right
3999 * now, otherwise we mark it as stale, and
4000 * the node as dirty, so it will get cleaned
4003 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4004 (locktype == isc_rwlocktype_write ||
4005 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4007 * We update the node's status only when we
4008 * can get write access; otherwise, we leave
4009 * others to this work. Periodical cleaning
4010 * will eventually take the job as the last
4012 * We won't downgrade the lock, since other
4013 * rdatasets are probably stale, too.
4015 locktype = isc_rwlocktype_write;
4017 if (dns_rbtnode_refcurrent(node) == 0) {
4021 * header->down can be non-NULL if the
4022 * refcount has just decremented to 0
4023 * but decrement_reference() has not
4024 * performed clean_cache_node(), in
4025 * which case we need to purge the
4026 * stale headers first.
4028 mctx = search->rbtdb->common.mctx;
4029 clean_stale_headers(search->rbtdb,
4032 if (header_prev != NULL)
4036 node->data = header->next;
4037 free_rdataset(search->rbtdb, mctx,
4040 header->attributes |=
4041 RDATASET_ATTR_STALE;
4043 header_prev = header;
4046 header_prev = header;
4047 } else if (header->type == dns_rdatatype_dname &&
4049 dname_header = header;
4050 header_prev = header;
4051 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4053 sigdname_header = header;
4054 header_prev = header;
4056 header_prev = header;
4059 if (dname_header != NULL &&
4060 (!DNS_TRUST_PENDING(dname_header->trust) ||
4061 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4063 * We increment the reference count on node to ensure that
4064 * search->zonecut_rdataset will still be valid later.
4066 new_reference(search->rbtdb, node);
4067 INSIST(!ISC_LINK_LINKED(node, deadlink));
4068 search->zonecut = node;
4069 search->zonecut_rdataset = dname_header;
4070 search->zonecut_sigrdataset = sigdname_header;
4071 search->need_cleanup = ISC_TRUE;
4072 result = DNS_R_PARTIALMATCH;
4074 result = DNS_R_CONTINUE;
4076 NODE_UNLOCK(lock, locktype);
4081 static inline isc_result_t
4082 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4083 dns_dbnode_t **nodep, dns_name_t *foundname,
4084 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4087 dns_rbtnode_t *level_node;
4088 rdatasetheader_t *header, *header_prev, *header_next;
4089 rdatasetheader_t *found, *foundsig;
4090 isc_result_t result = ISC_R_NOTFOUND;
4095 isc_rwlocktype_t locktype;
4098 * Caller must be holding the tree lock.
4101 rbtdb = search->rbtdb;
4102 i = search->chain.level_matches;
4105 locktype = isc_rwlocktype_read;
4106 lock = &rbtdb->node_locks[node->locknum].lock;
4107 NODE_LOCK(lock, locktype);
4110 * Look for NS and RRSIG NS rdatasets.
4115 for (header = node->data;
4117 header = header_next) {
4118 header_next = header->next;
4119 if (header->rdh_ttl <= search->now) {
4121 * This rdataset is stale. If no one else is
4122 * using the node, we can clean it up right
4123 * now, otherwise we mark it as stale, and
4124 * the node as dirty, so it will get cleaned
4127 if ((header->rdh_ttl <= search->now -
4129 (locktype == isc_rwlocktype_write ||
4130 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4132 * We update the node's status only
4133 * when we can get write access.
4135 locktype = isc_rwlocktype_write;
4137 if (dns_rbtnode_refcurrent(node)
4141 m = search->rbtdb->common.mctx;
4142 clean_stale_headers(
4145 if (header_prev != NULL)
4151 free_rdataset(rbtdb, m,
4154 header->attributes |=
4155 RDATASET_ATTR_STALE;
4157 header_prev = header;
4160 header_prev = header;
4161 } else if (EXISTS(header)) {
4163 * We've found an extant rdataset. See if
4164 * we're interested in it.
4166 if (header->type == dns_rdatatype_ns) {
4168 if (foundsig != NULL)
4170 } else if (header->type ==
4171 RBTDB_RDATATYPE_SIGNS) {
4176 header_prev = header;
4178 header_prev = header;
4181 if (found != NULL) {
4183 * If we have to set foundname, we do it before
4184 * anything else. If we were to set foundname after
4185 * we had set nodep or bound the rdataset, then we'd
4186 * have to undo that work if dns_name_concatenate()
4187 * failed. By setting foundname first, there's
4188 * nothing to undo if we have trouble.
4190 if (foundname != NULL) {
4191 dns_name_init(&name, NULL);
4192 dns_rbt_namefromnode(node, &name);
4193 result = dns_name_copy(&name, foundname, NULL);
4194 while (result == ISC_R_SUCCESS && i > 0) {
4196 level_node = search->chain.levels[i];
4197 dns_name_init(&name, NULL);
4198 dns_rbt_namefromnode(level_node,
4201 dns_name_concatenate(foundname,
4206 if (result != ISC_R_SUCCESS) {
4211 result = DNS_R_DELEGATION;
4212 if (nodep != NULL) {
4213 new_reference(search->rbtdb, node);
4216 bind_rdataset(search->rbtdb, node, found, search->now,
4218 if (foundsig != NULL)
4219 bind_rdataset(search->rbtdb, node, foundsig,
4220 search->now, sigrdataset);
4221 if (need_headerupdate(found, search->now) ||
4222 (foundsig != NULL &&
4223 need_headerupdate(foundsig, search->now))) {
4224 if (locktype != isc_rwlocktype_write) {
4225 NODE_UNLOCK(lock, locktype);
4226 NODE_LOCK(lock, isc_rwlocktype_write);
4227 locktype = isc_rwlocktype_write;
4230 if (need_headerupdate(found, search->now))
4231 update_header(search->rbtdb, found,
4233 if (foundsig != NULL &&
4234 need_headerupdate(foundsig, search->now)) {
4235 update_header(search->rbtdb, foundsig,
4242 NODE_UNLOCK(lock, locktype);
4244 if (found == NULL && i > 0) {
4246 node = search->chain.levels[i];
4256 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4257 isc_stdtime_t now, dns_name_t *foundname,
4258 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4260 dns_rbtnode_t *node;
4261 rdatasetheader_t *header, *header_next, *header_prev;
4262 rdatasetheader_t *found, *foundsig;
4263 isc_boolean_t empty_node;
4264 isc_result_t result;
4265 dns_fixedname_t fname, forigin;
4266 dns_name_t *name, *origin;
4267 rbtdb_rdatatype_t matchtype, sigmatchtype;
4269 isc_rwlocktype_t locktype;
4271 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4272 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4273 dns_rdatatype_nsec);
4277 dns_fixedname_init(&fname);
4278 name = dns_fixedname_name(&fname);
4279 dns_fixedname_init(&forigin);
4280 origin = dns_fixedname_name(&forigin);
4281 result = dns_rbtnodechain_current(&search->chain, name,
4283 if (result != ISC_R_SUCCESS)
4285 locktype = isc_rwlocktype_read;
4286 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4287 NODE_LOCK(lock, locktype);
4290 empty_node = ISC_TRUE;
4292 for (header = node->data;
4294 header = header_next) {
4295 header_next = header->next;
4296 if (header->rdh_ttl <= now) {
4298 * This rdataset is stale. If no one else is
4299 * using the node, we can clean it up right
4300 * now, otherwise we mark it as stale, and the
4301 * node as dirty, so it will get cleaned up
4304 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4305 (locktype == isc_rwlocktype_write ||
4306 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4308 * We update the node's status only
4309 * when we can get write access.
4311 locktype = isc_rwlocktype_write;
4313 if (dns_rbtnode_refcurrent(node)
4317 m = search->rbtdb->common.mctx;
4318 clean_stale_headers(
4321 if (header_prev != NULL)
4325 node->data = header->next;
4326 free_rdataset(search->rbtdb, m,
4329 header->attributes |=
4330 RDATASET_ATTR_STALE;
4332 header_prev = header;
4335 header_prev = header;
4338 if (NONEXISTENT(header) ||
4339 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4340 header_prev = header;
4343 empty_node = ISC_FALSE;
4344 if (header->type == matchtype)
4346 else if (header->type == sigmatchtype)
4348 header_prev = header;
4350 if (found != NULL) {
4351 result = dns_name_concatenate(name, origin,
4353 if (result != ISC_R_SUCCESS)
4355 bind_rdataset(search->rbtdb, node, found,
4357 if (foundsig != NULL)
4358 bind_rdataset(search->rbtdb, node, foundsig,
4360 new_reference(search->rbtdb, node);
4362 result = DNS_R_COVERINGNSEC;
4363 } else if (!empty_node) {
4364 result = ISC_R_NOTFOUND;
4366 result = dns_rbtnodechain_prev(&search->chain, NULL,
4369 NODE_UNLOCK(lock, locktype);
4370 } while (empty_node && result == ISC_R_SUCCESS);
4375 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4376 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4377 dns_dbnode_t **nodep, dns_name_t *foundname,
4378 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4380 dns_rbtnode_t *node = NULL;
4381 isc_result_t result;
4382 rbtdb_search_t search;
4383 isc_boolean_t cname_ok = ISC_TRUE;
4384 isc_boolean_t empty_node;
4386 isc_rwlocktype_t locktype;
4387 rdatasetheader_t *header, *header_prev, *header_next;
4388 rdatasetheader_t *found, *nsheader;
4389 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4390 rdatasetheader_t *update, *updatesig;
4391 rbtdb_rdatatype_t sigtype, negtype;
4395 search.rbtdb = (dns_rbtdb_t *)db;
4397 REQUIRE(VALID_RBTDB(search.rbtdb));
4398 REQUIRE(version == NULL);
4401 isc_stdtime_get(&now);
4403 search.rbtversion = NULL;
4405 search.options = options;
4406 search.copy_name = ISC_FALSE;
4407 search.need_cleanup = ISC_FALSE;
4408 search.wild = ISC_FALSE;
4409 search.zonecut = NULL;
4410 dns_fixedname_init(&search.zonecut_name);
4411 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4416 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4419 * Search down from the root of the tree. If, while going down, we
4420 * encounter a callback node, cache_zonecut_callback() will search the
4421 * rdatasets at the zone cut for a DNAME rdataset.
4423 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4424 &search.chain, DNS_RBTFIND_EMPTYDATA,
4425 cache_zonecut_callback, &search);
4427 if (result == DNS_R_PARTIALMATCH) {
4428 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4429 result = find_coveringnsec(&search, nodep, now,
4430 foundname, rdataset,
4432 if (result == DNS_R_COVERINGNSEC)
4435 if (search.zonecut != NULL) {
4436 result = setup_delegation(&search, nodep, foundname,
4437 rdataset, sigrdataset);
4441 result = find_deepest_zonecut(&search, node, nodep,
4442 foundname, rdataset,
4446 } else if (result != ISC_R_SUCCESS)
4450 * Certain DNSSEC types are not subject to CNAME matching
4451 * (RFC4035, section 2.5 and RFC3007).
4453 * We don't check for RRSIG, because we don't store RRSIG records
4456 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4457 cname_ok = ISC_FALSE;
4460 * We now go looking for rdata...
4463 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4464 locktype = isc_rwlocktype_read;
4465 NODE_LOCK(lock, locktype);
4469 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4470 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4474 empty_node = ISC_TRUE;
4476 for (header = node->data; header != NULL; header = header_next) {
4477 header_next = header->next;
4478 if (header->rdh_ttl <= now) {
4480 * This rdataset is stale. If no one else is using the
4481 * node, we can clean it up right now, otherwise we
4482 * mark it as stale, and the node as dirty, so it will
4483 * get cleaned up later.
4485 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4486 (locktype == isc_rwlocktype_write ||
4487 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4489 * We update the node's status only when we
4490 * can get write access.
4492 locktype = isc_rwlocktype_write;
4494 if (dns_rbtnode_refcurrent(node) == 0) {
4497 mctx = search.rbtdb->common.mctx;
4498 clean_stale_headers(search.rbtdb, mctx,
4500 if (header_prev != NULL)
4504 node->data = header->next;
4505 free_rdataset(search.rbtdb, mctx,
4508 header->attributes |=
4509 RDATASET_ATTR_STALE;
4511 header_prev = header;
4514 header_prev = header;
4515 } else if (EXISTS(header)) {
4517 * We now know that there is at least one active
4518 * non-stale rdataset at this node.
4520 empty_node = ISC_FALSE;
4523 * If we found a type we were looking for, remember
4526 if (header->type == type ||
4527 (type == dns_rdatatype_any &&
4528 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4529 (cname_ok && header->type ==
4530 dns_rdatatype_cname)) {
4532 * We've found the answer.
4535 if (header->type == dns_rdatatype_cname &&
4539 * If we've already got the
4540 * CNAME RRSIG, use it.
4542 foundsig = cnamesig;
4544 } else if (header->type == sigtype) {
4546 * We've found the RRSIG rdataset for our
4547 * target type. Remember it.
4550 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4551 header->type == negtype) {
4553 * We've found a negative cache entry.
4556 } else if (header->type == dns_rdatatype_ns) {
4558 * Remember a NS rdataset even if we're
4559 * not specifically looking for it, because
4560 * we might need it later.
4563 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4565 * If we need the NS rdataset, we'll also
4566 * need its signature.
4569 } else if (cname_ok &&
4570 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4572 * If we get a CNAME match, we'll also need
4577 header_prev = header;
4579 header_prev = header;
4584 * We have an exact match for the name, but there are no
4585 * extant rdatasets. That means that this node doesn't
4586 * meaningfully exist, and that we really have a partial match.
4588 NODE_UNLOCK(lock, locktype);
4593 * If we didn't find what we were looking for...
4595 if (found == NULL ||
4596 (DNS_TRUST_ADDITIONAL(found->trust) &&
4597 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4598 (found->trust == dns_trust_glue &&
4599 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4600 (DNS_TRUST_PENDING(found->trust) &&
4601 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4603 * If there is an NS rdataset at this node, then this is the
4606 if (nsheader != NULL) {
4607 if (nodep != NULL) {
4608 new_reference(search.rbtdb, node);
4609 INSIST(!ISC_LINK_LINKED(node, deadlink));
4612 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4614 if (need_headerupdate(nsheader, search.now))
4616 if (nssig != NULL) {
4617 bind_rdataset(search.rbtdb, node, nssig,
4618 search.now, sigrdataset);
4619 if (need_headerupdate(nssig, search.now))
4622 result = DNS_R_DELEGATION;
4627 * Go find the deepest zone cut.
4629 NODE_UNLOCK(lock, locktype);
4634 * We found what we were looking for, or we found a CNAME.
4637 if (nodep != NULL) {
4638 new_reference(search.rbtdb, node);
4639 INSIST(!ISC_LINK_LINKED(node, deadlink));
4643 if (NEGATIVE(found)) {
4645 * We found a negative cache entry.
4647 if (NXDOMAIN(found))
4648 result = DNS_R_NCACHENXDOMAIN;
4650 result = DNS_R_NCACHENXRRSET;
4651 } else if (type != found->type &&
4652 type != dns_rdatatype_any &&
4653 found->type == dns_rdatatype_cname) {
4655 * We weren't doing an ANY query and we found a CNAME instead
4656 * of the type we were looking for, so we need to indicate
4657 * that result to the caller.
4659 result = DNS_R_CNAME;
4662 * An ordinary successful query!
4664 result = ISC_R_SUCCESS;
4667 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4668 result == DNS_R_NCACHENXRRSET) {
4669 bind_rdataset(search.rbtdb, node, found, search.now,
4671 if (need_headerupdate(found, search.now))
4673 if (!NEGATIVE(found) && foundsig != NULL) {
4674 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4676 if (need_headerupdate(foundsig, search.now))
4677 updatesig = foundsig;
4682 if ((update != NULL || updatesig != NULL) &&
4683 locktype != isc_rwlocktype_write) {
4684 NODE_UNLOCK(lock, locktype);
4685 NODE_LOCK(lock, isc_rwlocktype_write);
4686 locktype = isc_rwlocktype_write;
4689 if (update != NULL && need_headerupdate(update, search.now))
4690 update_header(search.rbtdb, update, search.now);
4691 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4692 update_header(search.rbtdb, updatesig, search.now);
4694 NODE_UNLOCK(lock, locktype);
4697 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4700 * If we found a zonecut but aren't going to use it, we have to
4703 if (search.need_cleanup) {
4704 node = search.zonecut;
4705 INSIST(node != NULL);
4706 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4708 NODE_LOCK(lock, isc_rwlocktype_read);
4709 decrement_reference(search.rbtdb, node, 0,
4710 isc_rwlocktype_read, isc_rwlocktype_none,
4712 NODE_UNLOCK(lock, isc_rwlocktype_read);
4715 dns_rbtnodechain_reset(&search.chain);
4721 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4722 isc_stdtime_t now, dns_dbnode_t **nodep,
4723 dns_name_t *foundname,
4724 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4726 dns_rbtnode_t *node = NULL;
4728 isc_result_t result;
4729 rbtdb_search_t search;
4730 rdatasetheader_t *header, *header_prev, *header_next;
4731 rdatasetheader_t *found, *foundsig;
4732 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4733 isc_rwlocktype_t locktype;
4735 search.rbtdb = (dns_rbtdb_t *)db;
4737 REQUIRE(VALID_RBTDB(search.rbtdb));
4740 isc_stdtime_get(&now);
4742 search.rbtversion = NULL;
4744 search.options = options;
4745 search.copy_name = ISC_FALSE;
4746 search.need_cleanup = ISC_FALSE;
4747 search.wild = ISC_FALSE;
4748 search.zonecut = NULL;
4749 dns_fixedname_init(&search.zonecut_name);
4750 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4753 if ((options & DNS_DBFIND_NOEXACT) != 0)
4754 rbtoptions |= DNS_RBTFIND_NOEXACT;
4756 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4759 * Search down from the root of the tree.
4761 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4762 &search.chain, rbtoptions, NULL, &search);
4764 if (result == DNS_R_PARTIALMATCH) {
4766 result = find_deepest_zonecut(&search, node, nodep, foundname,
4767 rdataset, sigrdataset);
4769 } else if (result != ISC_R_SUCCESS)
4773 * We now go looking for an NS rdataset at the node.
4776 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4777 locktype = isc_rwlocktype_read;
4778 NODE_LOCK(lock, locktype);
4783 for (header = node->data; header != NULL; header = header_next) {
4784 header_next = header->next;
4785 if (header->rdh_ttl <= now) {
4787 * This rdataset is stale. If no one else is using the
4788 * node, we can clean it up right now, otherwise we
4789 * mark it as stale, and the node as dirty, so it will
4790 * get cleaned up later.
4792 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4793 (locktype == isc_rwlocktype_write ||
4794 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4796 * We update the node's status only when we
4797 * can get write access.
4799 locktype = isc_rwlocktype_write;
4801 if (dns_rbtnode_refcurrent(node) == 0) {
4804 mctx = search.rbtdb->common.mctx;
4805 clean_stale_headers(search.rbtdb, mctx,
4807 if (header_prev != NULL)
4811 node->data = header->next;
4812 free_rdataset(search.rbtdb, mctx,
4815 header->attributes |=
4816 RDATASET_ATTR_STALE;
4818 header_prev = header;
4821 header_prev = header;
4822 } else if (EXISTS(header)) {
4824 * If we found a type we were looking for, remember
4827 if (header->type == dns_rdatatype_ns) {
4829 * Remember a NS rdataset even if we're
4830 * not specifically looking for it, because
4831 * we might need it later.
4834 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4836 * If we need the NS rdataset, we'll also
4837 * need its signature.
4841 header_prev = header;
4843 header_prev = header;
4846 if (found == NULL) {
4848 * No NS records here.
4850 NODE_UNLOCK(lock, locktype);
4854 if (nodep != NULL) {
4855 new_reference(search.rbtdb, node);
4856 INSIST(!ISC_LINK_LINKED(node, deadlink));
4860 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4861 if (foundsig != NULL)
4862 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4865 if (need_headerupdate(found, search.now) ||
4866 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
4867 if (locktype != isc_rwlocktype_write) {
4868 NODE_UNLOCK(lock, locktype);
4869 NODE_LOCK(lock, isc_rwlocktype_write);
4870 locktype = isc_rwlocktype_write;
4873 if (need_headerupdate(found, search.now))
4874 update_header(search.rbtdb, found, search.now);
4875 if (foundsig != NULL &&
4876 need_headerupdate(foundsig, search.now)) {
4877 update_header(search.rbtdb, foundsig, search.now);
4881 NODE_UNLOCK(lock, locktype);
4884 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4886 INSIST(!search.need_cleanup);
4888 dns_rbtnodechain_reset(&search.chain);
4890 if (result == DNS_R_DELEGATION)
4891 result = ISC_R_SUCCESS;
4897 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4898 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4899 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4902 REQUIRE(VALID_RBTDB(rbtdb));
4903 REQUIRE(targetp != NULL && *targetp == NULL);
4905 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4906 dns_rbtnode_refincrement(node, &refs);
4908 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4914 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4915 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4916 dns_rbtnode_t *node;
4917 isc_boolean_t want_free = ISC_FALSE;
4918 isc_boolean_t inactive = ISC_FALSE;
4919 rbtdb_nodelock_t *nodelock;
4921 REQUIRE(VALID_RBTDB(rbtdb));
4922 REQUIRE(targetp != NULL && *targetp != NULL);
4924 node = (dns_rbtnode_t *)(*targetp);
4925 nodelock = &rbtdb->node_locks[node->locknum];
4927 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4929 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4930 isc_rwlocktype_none, ISC_FALSE)) {
4931 if (isc_refcount_current(&nodelock->references) == 0 &&
4932 nodelock->exiting) {
4933 inactive = ISC_TRUE;
4937 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4942 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4944 if (rbtdb->active == 0)
4945 want_free = ISC_TRUE;
4946 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4948 char buf[DNS_NAME_FORMATSIZE];
4949 if (dns_name_dynamic(&rbtdb->common.origin))
4950 dns_name_format(&rbtdb->common.origin, buf,
4953 strcpy(buf, "<UNKNOWN>");
4954 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4955 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4956 "calling free_rbtdb(%s)", buf);
4957 free_rbtdb(rbtdb, ISC_TRUE, NULL);
4963 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4964 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4965 dns_rbtnode_t *rbtnode = node;
4966 rdatasetheader_t *header;
4967 isc_boolean_t force_expire = ISC_FALSE;
4969 * These are the category and module used by the cache cleaner.
4971 isc_boolean_t log = ISC_FALSE;
4972 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4973 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4974 int level = ISC_LOG_DEBUG(2);
4975 char printname[DNS_NAME_FORMATSIZE];
4977 REQUIRE(VALID_RBTDB(rbtdb));
4980 * Caller must hold a tree lock.
4984 isc_stdtime_get(&now);
4986 if (isc_mem_isovermem(rbtdb->common.mctx)) {
4989 isc_random_get(&val);
4991 * XXXDCL Could stand to have a better policy, like LRU.
4993 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4996 * Note that 'log' can be true IFF overmem is also true.
4997 * overmem can currently only be true for cache
4998 * databases -- hence all of the "overmem cache" log strings.
5000 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5002 isc_log_write(dns_lctx, category, module, level,
5003 "overmem cache: %s %s",
5004 force_expire ? "FORCE" : "check",
5005 dns_rbt_formatnodename(rbtnode,
5007 sizeof(printname)));
5011 * We may not need write access, but this code path is not performance
5012 * sensitive, so it should be okay to always lock as a writer.
5014 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5015 isc_rwlocktype_write);
5017 for (header = rbtnode->data; header != NULL; header = header->next)
5018 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5020 * We don't check if refcurrent(rbtnode) == 0 and try
5021 * to free like we do in cache_find(), because
5022 * refcurrent(rbtnode) must be non-zero. This is so
5023 * because 'node' is an argument to the function.
5025 header->attributes |= RDATASET_ATTR_STALE;
5028 isc_log_write(dns_lctx, category, module,
5029 level, "overmem cache: stale %s",
5031 } else if (force_expire) {
5032 if (! RETAIN(header)) {
5033 set_ttl(rbtdb, header, 0);
5034 header->attributes |= RDATASET_ATTR_STALE;
5037 isc_log_write(dns_lctx, category, module,
5038 level, "overmem cache: "
5039 "reprieve by RETAIN() %s",
5042 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5043 isc_log_write(dns_lctx, category, module, level,
5044 "overmem cache: saved %s", printname);
5046 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5047 isc_rwlocktype_write);
5049 return (ISC_R_SUCCESS);
5053 overmem(dns_db_t *db, isc_boolean_t overmem) {
5054 /* This is an empty callback. See adb.c:water() */
5063 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5064 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5065 dns_rbtnode_t *rbtnode = node;
5066 isc_boolean_t first;
5068 REQUIRE(VALID_RBTDB(rbtdb));
5070 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5071 isc_rwlocktype_read);
5073 fprintf(out, "node %p, %u references, locknum = %u\n",
5074 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5076 if (rbtnode->data != NULL) {
5077 rdatasetheader_t *current, *top_next;
5079 for (current = rbtnode->data; current != NULL;
5080 current = top_next) {
5081 top_next = current->next;
5083 fprintf(out, "\ttype %u", current->type);
5089 "\tserial = %lu, ttl = %u, "
5090 "trust = %u, attributes = %u, "
5092 (unsigned long)current->serial,
5095 current->attributes,
5097 current = current->down;
5098 } while (current != NULL);
5101 fprintf(out, "(empty)\n");
5103 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5104 isc_rwlocktype_read);
5108 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5110 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5111 rbtdb_dbiterator_t *rbtdbiter;
5113 REQUIRE(VALID_RBTDB(rbtdb));
5115 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5116 if (rbtdbiter == NULL)
5117 return (ISC_R_NOMEMORY);
5119 rbtdbiter->common.methods = &dbiterator_methods;
5120 rbtdbiter->common.db = NULL;
5121 dns_db_attach(db, &rbtdbiter->common.db);
5122 rbtdbiter->common.relative_names =
5123 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5124 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5125 rbtdbiter->common.cleaning = ISC_FALSE;
5126 rbtdbiter->paused = ISC_TRUE;
5127 rbtdbiter->tree_locked = isc_rwlocktype_none;
5128 rbtdbiter->result = ISC_R_SUCCESS;
5129 dns_fixedname_init(&rbtdbiter->name);
5130 dns_fixedname_init(&rbtdbiter->origin);
5131 rbtdbiter->node = NULL;
5132 rbtdbiter->delete = 0;
5133 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5134 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5135 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5136 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5137 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5138 if (rbtdbiter->nsec3only)
5139 rbtdbiter->current = &rbtdbiter->nsec3chain;
5141 rbtdbiter->current = &rbtdbiter->chain;
5143 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5145 return (ISC_R_SUCCESS);
5149 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5150 dns_rdatatype_t type, dns_rdatatype_t covers,
5151 isc_stdtime_t now, dns_rdataset_t *rdataset,
5152 dns_rdataset_t *sigrdataset)
5154 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5155 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5156 rdatasetheader_t *header, *header_next, *found, *foundsig;
5157 rbtdb_serial_t serial;
5158 rbtdb_version_t *rbtversion = version;
5159 isc_boolean_t close_version = ISC_FALSE;
5160 rbtdb_rdatatype_t matchtype, sigmatchtype;
5162 REQUIRE(VALID_RBTDB(rbtdb));
5163 REQUIRE(type != dns_rdatatype_any);
5164 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
5166 if (rbtversion == NULL) {
5167 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5168 close_version = ISC_TRUE;
5170 serial = rbtversion->serial;
5173 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5174 isc_rwlocktype_read);
5178 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5180 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5184 for (header = rbtnode->data; header != NULL; header = header_next) {
5185 header_next = header->next;
5187 if (header->serial <= serial &&
5190 * Is this a "this rdataset doesn't
5193 if (NONEXISTENT(header))
5197 header = header->down;
5198 } while (header != NULL);
5199 if (header != NULL) {
5201 * We have an active, extant rdataset. If it's a
5202 * type we're looking for, remember it.
5204 if (header->type == matchtype) {
5206 if (foundsig != NULL)
5208 } else if (header->type == sigmatchtype) {
5215 if (found != NULL) {
5216 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5217 if (foundsig != NULL)
5218 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5222 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5223 isc_rwlocktype_read);
5226 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5230 return (ISC_R_NOTFOUND);
5232 return (ISC_R_SUCCESS);
5236 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5237 dns_rdatatype_t type, dns_rdatatype_t covers,
5238 isc_stdtime_t now, dns_rdataset_t *rdataset,
5239 dns_rdataset_t *sigrdataset)
5241 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5242 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5243 rdatasetheader_t *header, *header_next, *found, *foundsig;
5244 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5245 isc_result_t result;
5247 isc_rwlocktype_t locktype;
5249 REQUIRE(VALID_RBTDB(rbtdb));
5250 REQUIRE(type != dns_rdatatype_any);
5254 result = ISC_R_SUCCESS;
5257 isc_stdtime_get(&now);
5259 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5260 locktype = isc_rwlocktype_read;
5261 NODE_LOCK(lock, locktype);
5265 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5266 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5268 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5272 for (header = rbtnode->data; header != NULL; header = header_next) {
5273 header_next = header->next;
5274 if (header->rdh_ttl <= now) {
5275 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5276 (locktype == isc_rwlocktype_write ||
5277 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5279 * We update the node's status only when we
5280 * can get write access.
5282 locktype = isc_rwlocktype_write;
5285 * We don't check if refcurrent(rbtnode) == 0
5286 * and try to free like we do in cache_find(),
5287 * because refcurrent(rbtnode) must be
5288 * non-zero. This is so because 'node' is an
5289 * argument to the function.
5291 header->attributes |= RDATASET_ATTR_STALE;
5294 } else if (EXISTS(header)) {
5295 if (header->type == matchtype)
5297 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5298 header->type == negtype)
5300 else if (header->type == sigmatchtype)
5304 if (found != NULL) {
5305 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5306 if (!NEGATIVE(found) && foundsig != NULL)
5307 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5311 NODE_UNLOCK(lock, locktype);
5314 return (ISC_R_NOTFOUND);
5316 if (NEGATIVE(found)) {
5318 * We found a negative cache entry.
5320 if (NXDOMAIN(found))
5321 result = DNS_R_NCACHENXDOMAIN;
5323 result = DNS_R_NCACHENXRRSET;
5330 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5331 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5333 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5334 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5335 rbtdb_version_t *rbtversion = version;
5336 rbtdb_rdatasetiter_t *iterator;
5339 REQUIRE(VALID_RBTDB(rbtdb));
5341 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5342 if (iterator == NULL)
5343 return (ISC_R_NOMEMORY);
5345 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5347 if (rbtversion == NULL)
5349 (dns_dbversion_t **) (void *)(&rbtversion));
5353 INSIST(rbtversion->rbtdb == rbtdb);
5355 isc_refcount_increment(&rbtversion->references,
5361 isc_stdtime_get(&now);
5365 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5366 iterator->common.methods = &rdatasetiter_methods;
5367 iterator->common.db = db;
5368 iterator->common.node = node;
5369 iterator->common.version = (dns_dbversion_t *)rbtversion;
5370 iterator->common.now = now;
5372 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5374 dns_rbtnode_refincrement(rbtnode, &refs);
5377 iterator->current = NULL;
5379 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5381 *iteratorp = (dns_rdatasetiter_t *)iterator;
5383 return (ISC_R_SUCCESS);
5386 static isc_boolean_t
5387 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5388 rdatasetheader_t *header, *header_next;
5389 isc_boolean_t cname, other_data;
5390 dns_rdatatype_t rdtype;
5393 * The caller must hold the node lock.
5397 * Look for CNAME and "other data" rdatasets active in our version.
5400 other_data = ISC_FALSE;
5401 for (header = node->data; header != NULL; header = header_next) {
5402 header_next = header->next;
5403 if (header->type == dns_rdatatype_cname) {
5405 * Look for an active extant CNAME.
5408 if (header->serial <= serial &&
5411 * Is this a "this rdataset doesn't
5414 if (NONEXISTENT(header))
5418 header = header->down;
5419 } while (header != NULL);
5424 * Look for active extant "other data".
5426 * "Other data" is any rdataset whose type is not
5427 * KEY, NSEC, SIG or RRSIG.
5429 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5430 if (rdtype != dns_rdatatype_key &&
5431 rdtype != dns_rdatatype_sig &&
5432 rdtype != dns_rdatatype_nsec &&
5433 rdtype != dns_rdatatype_rrsig) {
5435 * Is it active and extant?
5438 if (header->serial <= serial &&
5441 * Is this a "this rdataset
5442 * doesn't exist" record?
5444 if (NONEXISTENT(header))
5448 header = header->down;
5449 } while (header != NULL);
5451 other_data = ISC_TRUE;
5456 if (cname && other_data)
5463 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5464 isc_result_t result;
5466 INSIST(!IS_CACHE(rbtdb));
5467 INSIST(newheader->heap_index == 0);
5468 INSIST(!ISC_LINK_LINKED(newheader, link));
5470 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5475 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5476 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5477 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5479 rbtdb_changed_t *changed = NULL;
5480 rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5481 unsigned char *merged;
5482 isc_result_t result;
5483 isc_boolean_t header_nx;
5484 isc_boolean_t newheader_nx;
5485 isc_boolean_t merge;
5486 dns_rdatatype_t rdtype, covers;
5487 rbtdb_rdatatype_t negtype, sigtype;
5492 * Add an rdatasetheader_t to a node.
5496 * Caller must be holding the node lock.
5499 if ((options & DNS_DBADD_MERGE) != 0) {
5500 REQUIRE(rbtversion != NULL);
5505 if ((options & DNS_DBADD_FORCE) != 0)
5506 trust = dns_trust_ultimate;
5508 trust = newheader->trust;
5510 if (rbtversion != NULL && !loading) {
5512 * We always add a changed record, even if no changes end up
5513 * being made to this node, because it's harmless and
5514 * simplifies the code.
5516 changed = add_changed(rbtdb, rbtversion, rbtnode);
5517 if (changed == NULL) {
5518 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5519 return (ISC_R_NOMEMORY);
5523 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5524 topheader_prev = NULL;
5527 if (rbtversion == NULL && !newheader_nx) {
5528 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5529 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5530 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
5531 if (NEGATIVE(newheader)) {
5533 * We're adding a negative cache entry.
5535 for (topheader = rbtnode->data;
5537 topheader = topheader->next) {
5539 * If we're adding an negative cache entry
5540 * which covers all types (NXDOMAIN,
5541 * NODATA(QTYPE=ANY)).
5543 * We make all other data stale so that the
5544 * only rdataset that can be found at this
5545 * node is the negative cache entry.
5547 * Otherwise look for any RRSIGs of the
5548 * given type so they can be marked stale
5551 if (covers == dns_rdatatype_any) {
5552 set_ttl(rbtdb, topheader, 0);
5553 topheader->attributes |=
5554 RDATASET_ATTR_STALE;
5556 } else if (topheader->type == sigtype)
5557 sigheader = topheader;
5559 if (covers == dns_rdatatype_any)
5561 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5564 * We're adding something that isn't a
5565 * negative cache entry. Look for an extant
5566 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5567 * cache entry. If we're adding an RRSIG, also
5568 * check for an extant non-stale NODATA ncache
5569 * entry which covers the same type as the RRSIG.
5571 for (topheader = rbtnode->data;
5573 topheader = topheader->next) {
5574 if ((topheader->type ==
5575 RBTDB_RDATATYPE_NCACHEANY) ||
5576 (newheader->type == sigtype &&
5578 RBTDB_RDATATYPE_VALUE(0, covers))) {
5582 if (topheader != NULL && EXISTS(topheader) &&
5583 topheader->rdh_ttl > now) {
5587 if (trust < topheader->trust) {
5589 * The NXDOMAIN/NODATA(QTYPE=ANY)
5592 free_rdataset(rbtdb,
5595 if (addedrdataset != NULL)
5596 bind_rdataset(rbtdb, rbtnode,
5599 return (DNS_R_UNCHANGED);
5602 * The new rdataset is better. Expire the
5605 set_ttl(rbtdb, topheader, 0);
5606 topheader->attributes |= RDATASET_ATTR_STALE;
5611 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5615 for (topheader = rbtnode->data;
5617 topheader = topheader->next) {
5618 if (topheader->type == newheader->type ||
5619 topheader->type == negtype)
5621 topheader_prev = topheader;
5626 * If header isn't NULL, we've found the right type. There may be
5627 * IGNORE rdatasets between the top of the chain and the first real
5628 * data. We skip over them.
5631 while (header != NULL && IGNORE(header))
5632 header = header->down;
5633 if (header != NULL) {
5634 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5637 * Deleting an already non-existent rdataset has no effect.
5639 if (header_nx && newheader_nx) {
5640 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5641 return (DNS_R_UNCHANGED);
5645 * Trying to add an rdataset with lower trust to a cache DB
5646 * has no effect, provided that the cache data isn't stale.
5648 if (rbtversion == NULL && trust < header->trust &&
5649 (header->rdh_ttl > now || header_nx)) {
5650 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5651 if (addedrdataset != NULL)
5652 bind_rdataset(rbtdb, rbtnode, header, now,
5654 return (DNS_R_UNCHANGED);
5658 * Don't merge if a nonexistent rdataset is involved.
5660 if (merge && (header_nx || newheader_nx))
5664 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5665 * that is the union of 'newheader' and 'header'.
5668 unsigned int flags = 0;
5669 INSIST(rbtversion->serial >= header->serial);
5671 result = ISC_R_SUCCESS;
5673 if ((options & DNS_DBADD_EXACT) != 0)
5674 flags |= DNS_RDATASLAB_EXACT;
5675 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5676 newheader->rdh_ttl != header->rdh_ttl)
5677 result = DNS_R_NOTEXACT;
5678 else if (newheader->rdh_ttl != header->rdh_ttl)
5679 flags |= DNS_RDATASLAB_FORCE;
5680 if (result == ISC_R_SUCCESS)
5681 result = dns_rdataslab_merge(
5682 (unsigned char *)header,
5683 (unsigned char *)newheader,
5684 (unsigned int)(sizeof(*newheader)),
5686 rbtdb->common.rdclass,
5687 (dns_rdatatype_t)header->type,
5689 if (result == ISC_R_SUCCESS) {
5691 * If 'header' has the same serial number as
5692 * we do, we could clean it up now if we knew
5693 * that our caller had no references to it.
5694 * We don't know this, however, so we leave it
5695 * alone. It will get cleaned up when
5696 * clean_zone_node() runs.
5698 free_rdataset(rbtdb, rbtdb->common.mctx,
5700 newheader = (rdatasetheader_t *)merged;
5701 if (loading && RESIGN(newheader) &&
5703 header->resign < newheader->resign)
5704 newheader->resign = header->resign;
5706 free_rdataset(rbtdb, rbtdb->common.mctx,
5712 * Don't replace existing NS, A and AAAA RRsets
5713 * in the cache if they are already exist. This
5714 * prevents named being locked to old servers.
5715 * Don't lower trust of existing record if the
5718 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5719 header->type == dns_rdatatype_ns &&
5720 !header_nx && !newheader_nx &&
5721 header->trust >= newheader->trust &&
5722 dns_rdataslab_equalx((unsigned char *)header,
5723 (unsigned char *)newheader,
5724 (unsigned int)(sizeof(*newheader)),
5725 rbtdb->common.rdclass,
5726 (dns_rdatatype_t)header->type)) {
5728 * Honour the new ttl if it is less than the
5731 if (header->rdh_ttl > newheader->rdh_ttl)
5732 set_ttl(rbtdb, header, newheader->rdh_ttl);
5733 if (header->noqname == NULL &&
5734 newheader->noqname != NULL) {
5735 header->noqname = newheader->noqname;
5736 newheader->noqname = NULL;
5738 if (header->closest == NULL &&
5739 newheader->closest != NULL) {
5740 header->closest = newheader->closest;
5741 newheader->closest = NULL;
5743 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5744 if (addedrdataset != NULL)
5745 bind_rdataset(rbtdb, rbtnode, header, now,
5747 return (ISC_R_SUCCESS);
5750 * If we have will be replacing a NS RRset force its TTL
5751 * to be no more than the current NS RRset's TTL. This
5752 * ensures the delegations that are withdrawn are honoured.
5754 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5755 header->type == dns_rdatatype_ns &&
5756 !header_nx && !newheader_nx &&
5757 header->trust <= newheader->trust) {
5758 if (newheader->rdh_ttl > header->rdh_ttl) {
5759 newheader->rdh_ttl = header->rdh_ttl;
5762 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5763 (header->type == dns_rdatatype_a ||
5764 header->type == dns_rdatatype_aaaa ||
5765 header->type == dns_rdatatype_ds ||
5766 header->type == RBTDB_RDATATYPE_SIGDDS) &&
5767 !header_nx && !newheader_nx &&
5768 header->trust >= newheader->trust &&
5769 dns_rdataslab_equal((unsigned char *)header,
5770 (unsigned char *)newheader,
5771 (unsigned int)(sizeof(*newheader)))) {
5773 * Honour the new ttl if it is less than the
5776 if (header->rdh_ttl > newheader->rdh_ttl)
5777 set_ttl(rbtdb, header, newheader->rdh_ttl);
5778 if (header->noqname == NULL &&
5779 newheader->noqname != NULL) {
5780 header->noqname = newheader->noqname;
5781 newheader->noqname = NULL;
5783 if (header->closest == NULL &&
5784 newheader->closest != NULL) {
5785 header->closest = newheader->closest;
5786 newheader->closest = NULL;
5788 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5789 if (addedrdataset != NULL)
5790 bind_rdataset(rbtdb, rbtnode, header, now,
5792 return (ISC_R_SUCCESS);
5794 INSIST(rbtversion == NULL ||
5795 rbtversion->serial >= topheader->serial);
5796 if (topheader_prev != NULL)
5797 topheader_prev->next = newheader;
5799 rbtnode->data = newheader;
5800 newheader->next = topheader->next;
5803 * There are no other references to 'header' when
5804 * loading, so we MAY clean up 'header' now.
5805 * Since we don't generate changed records when
5806 * loading, we MUST clean up 'header' now.
5808 newheader->down = NULL;
5809 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5811 newheader->down = topheader;
5812 topheader->next = newheader;
5814 if (changed != NULL)
5815 changed->dirty = ISC_TRUE;
5816 if (rbtversion == NULL) {
5817 set_ttl(rbtdb, header, 0);
5818 header->attributes |= RDATASET_ATTR_STALE;
5819 if (sigheader != NULL) {
5820 set_ttl(rbtdb, sigheader, 0);
5821 sigheader->attributes |=
5822 RDATASET_ATTR_STALE;
5825 idx = newheader->node->locknum;
5826 if (IS_CACHE(rbtdb)) {
5827 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5830 * XXXMLG We don't check the return value
5831 * here. If it fails, we will not do TTL
5832 * based expiry on this node. However, we
5833 * will do it on the LRU side, so memory
5834 * will not leak... for long.
5836 isc_heap_insert(rbtdb->heaps[idx], newheader);
5837 } else if (RESIGN(newheader))
5838 resign_insert(rbtdb, idx, newheader);
5842 * No non-IGNORED rdatasets of the given type exist at
5847 * If we're trying to delete the type, don't bother.
5850 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5851 return (DNS_R_UNCHANGED);
5854 if (topheader != NULL) {
5856 * We have an list of rdatasets of the given type,
5857 * but they're all marked IGNORE. We simply insert
5858 * the new rdataset at the head of the list.
5860 * Ignored rdatasets cannot occur during loading, so
5864 INSIST(rbtversion == NULL ||
5865 rbtversion->serial >= topheader->serial);
5866 if (topheader_prev != NULL)
5867 topheader_prev->next = newheader;
5869 rbtnode->data = newheader;
5870 newheader->next = topheader->next;
5871 newheader->down = topheader;
5872 topheader->next = newheader;
5874 if (changed != NULL)
5875 changed->dirty = ISC_TRUE;
5878 * No rdatasets of the given type exist at the node.
5880 newheader->next = rbtnode->data;
5881 newheader->down = NULL;
5882 rbtnode->data = newheader;
5884 idx = newheader->node->locknum;
5885 if (IS_CACHE(rbtdb)) {
5886 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5888 isc_heap_insert(rbtdb->heaps[idx], newheader);
5889 } else if (RESIGN(newheader)) {
5890 resign_insert(rbtdb, idx, newheader);
5895 * Check if the node now contains CNAME and other data.
5897 if (rbtversion != NULL &&
5898 cname_and_other_data(rbtnode, rbtversion->serial))
5899 return (DNS_R_CNAMEANDOTHER);
5901 if (addedrdataset != NULL)
5902 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5904 return (ISC_R_SUCCESS);
5907 static inline isc_boolean_t
5908 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5909 rbtdb_rdatatype_t type)
5911 if (IS_CACHE(rbtdb)) {
5912 if (type == dns_rdatatype_dname)
5916 } else if (type == dns_rdatatype_dname ||
5917 (type == dns_rdatatype_ns &&
5918 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5923 static inline isc_result_t
5924 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5925 dns_rdataset_t *rdataset)
5927 struct noqname *noqname;
5928 isc_mem_t *mctx = rbtdb->common.mctx;
5930 dns_rdataset_t neg, negsig;
5931 isc_result_t result;
5934 dns_name_init(&name, NULL);
5935 dns_rdataset_init(&neg);
5936 dns_rdataset_init(&negsig);
5938 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5939 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5941 noqname = isc_mem_get(mctx, sizeof(*noqname));
5942 if (noqname == NULL) {
5943 result = ISC_R_NOMEMORY;
5946 dns_name_init(&noqname->name, NULL);
5947 noqname->neg = NULL;
5948 noqname->negsig = NULL;
5949 noqname->type = neg.type;
5950 result = dns_name_dup(&name, mctx, &noqname->name);
5951 if (result != ISC_R_SUCCESS)
5953 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5954 if (result != ISC_R_SUCCESS)
5956 noqname->neg = r.base;
5957 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5958 if (result != ISC_R_SUCCESS)
5960 noqname->negsig = r.base;
5961 dns_rdataset_disassociate(&neg);
5962 dns_rdataset_disassociate(&negsig);
5963 newheader->noqname = noqname;
5964 return (ISC_R_SUCCESS);
5967 dns_rdataset_disassociate(&neg);
5968 dns_rdataset_disassociate(&negsig);
5969 free_noqname(mctx, &noqname);
5973 static inline isc_result_t
5974 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5975 dns_rdataset_t *rdataset)
5977 struct noqname *closest;
5978 isc_mem_t *mctx = rbtdb->common.mctx;
5980 dns_rdataset_t neg, negsig;
5981 isc_result_t result;
5984 dns_name_init(&name, NULL);
5985 dns_rdataset_init(&neg);
5986 dns_rdataset_init(&negsig);
5988 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5989 RUNTIME_CHECK(result == ISC_R_SUCCESS);
5991 closest = isc_mem_get(mctx, sizeof(*closest));
5992 if (closest == NULL) {
5993 result = ISC_R_NOMEMORY;
5996 dns_name_init(&closest->name, NULL);
5997 closest->neg = NULL;
5998 closest->negsig = NULL;
5999 closest->type = neg.type;
6000 result = dns_name_dup(&name, mctx, &closest->name);
6001 if (result != ISC_R_SUCCESS)
6003 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6004 if (result != ISC_R_SUCCESS)
6006 closest->neg = r.base;
6007 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6008 if (result != ISC_R_SUCCESS)
6010 closest->negsig = r.base;
6011 dns_rdataset_disassociate(&neg);
6012 dns_rdataset_disassociate(&negsig);
6013 newheader->closest = closest;
6014 return (ISC_R_SUCCESS);
6017 dns_rdataset_disassociate(&neg);
6018 dns_rdataset_disassociate(&negsig);
6019 free_noqname(mctx, &closest);
6023 static dns_dbmethods_t zone_methods;
6026 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6027 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6028 dns_rdataset_t *addedrdataset)
6030 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6031 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6032 rbtdb_version_t *rbtversion = version;
6033 isc_region_t region;
6034 rdatasetheader_t *newheader;
6035 rdatasetheader_t *header;
6036 isc_result_t result;
6037 isc_boolean_t delegating;
6038 isc_boolean_t tree_locked = ISC_FALSE;
6039 isc_boolean_t cache_is_overmem = ISC_FALSE;
6041 REQUIRE(VALID_RBTDB(rbtdb));
6042 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6044 if (rbtdb->common.methods == &zone_methods)
6045 REQUIRE(((rbtnode->nsec3 &&
6046 (rdataset->type == dns_rdatatype_nsec3 ||
6047 rdataset->covers == dns_rdatatype_nsec3)) ||
6049 rdataset->type != dns_rdatatype_nsec3 &&
6050 rdataset->covers != dns_rdatatype_nsec3)));
6052 if (rbtversion == NULL) {
6054 isc_stdtime_get(&now);
6058 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6059 ®ion, sizeof(rdatasetheader_t));
6060 if (result != ISC_R_SUCCESS)
6063 newheader = (rdatasetheader_t *)region.base;
6064 init_rdataset(rbtdb, newheader);
6065 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6066 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6068 newheader->attributes = 0;
6069 newheader->noqname = NULL;
6070 newheader->closest = NULL;
6071 newheader->count = init_count++;
6072 newheader->trust = rdataset->trust;
6073 newheader->additional_auth = NULL;
6074 newheader->additional_glue = NULL;
6075 newheader->last_used = now;
6076 newheader->node = rbtnode;
6077 if (rbtversion != NULL) {
6078 newheader->serial = rbtversion->serial;
6081 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6082 newheader->attributes |= RDATASET_ATTR_RESIGN;
6083 newheader->resign = rdataset->resign;
6085 newheader->resign = 0;
6087 newheader->serial = 1;
6088 newheader->resign = 0;
6089 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6090 newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6091 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6092 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6093 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6094 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6095 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6096 result = addnoqname(rbtdb, newheader, rdataset);
6097 if (result != ISC_R_SUCCESS) {
6098 free_rdataset(rbtdb, rbtdb->common.mctx,
6103 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6104 result = addclosest(rbtdb, newheader, rdataset);
6105 if (result != ISC_R_SUCCESS) {
6106 free_rdataset(rbtdb, rbtdb->common.mctx,
6114 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6115 * just DNAME for the cache), then we need to set the callback bit
6118 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6119 delegating = ISC_TRUE;
6121 delegating = ISC_FALSE;
6124 * If we're adding a delegation type or the DB is a cache in an overmem
6125 * state, hold an exclusive lock on the tree. In the latter case
6126 * the lock does not necessarily have to be acquired but it will help
6127 * purge stale entries more effectively.
6129 if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6130 cache_is_overmem = ISC_TRUE;
6131 if (delegating || cache_is_overmem) {
6132 tree_locked = ISC_TRUE;
6133 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6136 if (cache_is_overmem)
6137 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6139 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6140 isc_rwlocktype_write);
6142 if (rbtdb->rrsetstats != NULL) {
6143 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6144 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6147 if (IS_CACHE(rbtdb)) {
6149 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6151 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6152 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6153 expire_header(rbtdb, header, tree_locked);
6156 * If we've been holding a write lock on the tree just for
6157 * cleaning, we can release it now. However, we still need the
6160 if (tree_locked && !delegating) {
6161 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6162 tree_locked = ISC_FALSE;
6166 result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6167 addedrdataset, now);
6168 if (result == ISC_R_SUCCESS && delegating)
6169 rbtnode->find_callback = 1;
6171 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6172 isc_rwlocktype_write);
6175 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6178 * Update the zone's secure status. If version is non-NULL
6179 * this is deferred until closeversion() is called.
6181 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6182 iszonesecure(db, version, rbtdb->origin_node);
6188 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6189 dns_rdataset_t *rdataset, unsigned int options,
6190 dns_rdataset_t *newrdataset)
6192 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6193 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6194 rbtdb_version_t *rbtversion = version;
6195 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6196 unsigned char *subresult;
6197 isc_region_t region;
6198 isc_result_t result;
6199 rbtdb_changed_t *changed;
6201 REQUIRE(VALID_RBTDB(rbtdb));
6202 REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
6204 if (rbtdb->common.methods == &zone_methods)
6205 REQUIRE(((rbtnode->nsec3 &&
6206 (rdataset->type == dns_rdatatype_nsec3 ||
6207 rdataset->covers == dns_rdatatype_nsec3)) ||
6209 rdataset->type != dns_rdatatype_nsec3 &&
6210 rdataset->covers != dns_rdatatype_nsec3)));
6212 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6214 sizeof(rdatasetheader_t));
6215 if (result != ISC_R_SUCCESS)
6217 newheader = (rdatasetheader_t *)region.base;
6218 init_rdataset(rbtdb, newheader);
6219 set_ttl(rbtdb, newheader, rdataset->ttl);
6220 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6222 newheader->attributes = 0;
6223 newheader->serial = rbtversion->serial;
6224 newheader->trust = 0;
6225 newheader->noqname = NULL;
6226 newheader->closest = NULL;
6227 newheader->count = init_count++;
6228 newheader->additional_auth = NULL;
6229 newheader->additional_glue = NULL;
6230 newheader->last_used = 0;
6231 newheader->node = rbtnode;
6232 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6233 newheader->attributes |= RDATASET_ATTR_RESIGN;
6234 newheader->resign = rdataset->resign;
6236 newheader->resign = 0;
6238 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6239 isc_rwlocktype_write);
6241 changed = add_changed(rbtdb, rbtversion, rbtnode);
6242 if (changed == NULL) {
6243 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6244 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6245 isc_rwlocktype_write);
6246 return (ISC_R_NOMEMORY);
6249 topheader_prev = NULL;
6250 for (topheader = rbtnode->data;
6252 topheader = topheader->next) {
6253 if (topheader->type == newheader->type)
6255 topheader_prev = topheader;
6258 * If header isn't NULL, we've found the right type. There may be
6259 * IGNORE rdatasets between the top of the chain and the first real
6260 * data. We skip over them.
6263 while (header != NULL && IGNORE(header))
6264 header = header->down;
6265 if (header != NULL && EXISTS(header)) {
6266 unsigned int flags = 0;
6268 result = ISC_R_SUCCESS;
6269 if ((options & DNS_DBSUB_EXACT) != 0) {
6270 flags |= DNS_RDATASLAB_EXACT;
6271 if (newheader->rdh_ttl != header->rdh_ttl)
6272 result = DNS_R_NOTEXACT;
6274 if (result == ISC_R_SUCCESS)
6275 result = dns_rdataslab_subtract(
6276 (unsigned char *)header,
6277 (unsigned char *)newheader,
6278 (unsigned int)(sizeof(*newheader)),
6280 rbtdb->common.rdclass,
6281 (dns_rdatatype_t)header->type,
6283 if (result == ISC_R_SUCCESS) {
6284 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6285 newheader = (rdatasetheader_t *)subresult;
6286 init_rdataset(rbtdb, newheader);
6288 * We have to set the serial since the rdataslab
6289 * subtraction routine copies the reserved portion of
6290 * header, not newheader.
6292 newheader->serial = rbtversion->serial;
6294 * XXXJT: dns_rdataslab_subtract() copied the pointers
6295 * to additional info. We need to clear these fields
6296 * to avoid having duplicated references.
6298 newheader->additional_auth = NULL;
6299 newheader->additional_glue = NULL;
6300 } else if (result == DNS_R_NXRRSET) {
6302 * This subtraction would remove all of the rdata;
6303 * add a nonexistent header instead.
6305 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6306 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6307 if (newheader == NULL) {
6308 result = ISC_R_NOMEMORY;
6311 set_ttl(rbtdb, newheader, 0);
6312 newheader->type = topheader->type;
6313 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6314 newheader->trust = 0;
6315 newheader->serial = rbtversion->serial;
6316 newheader->noqname = NULL;
6317 newheader->closest = NULL;
6318 newheader->count = 0;
6319 newheader->additional_auth = NULL;
6320 newheader->additional_glue = NULL;
6321 newheader->node = rbtnode;
6322 newheader->resign = 0;
6323 newheader->last_used = 0;
6325 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6330 * If we're here, we want to link newheader in front of
6333 INSIST(rbtversion->serial >= topheader->serial);
6334 if (topheader_prev != NULL)
6335 topheader_prev->next = newheader;
6337 rbtnode->data = newheader;
6338 newheader->next = topheader->next;
6339 newheader->down = topheader;
6340 topheader->next = newheader;
6342 changed->dirty = ISC_TRUE;
6345 * The rdataset doesn't exist, so we don't need to do anything
6346 * to satisfy the deletion request.
6348 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6349 if ((options & DNS_DBSUB_EXACT) != 0)
6350 result = DNS_R_NOTEXACT;
6352 result = DNS_R_UNCHANGED;
6355 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6356 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6359 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6360 isc_rwlocktype_write);
6363 * Update the zone's secure status. If version is non-NULL
6364 * this is deferred until closeversion() is called.
6366 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6367 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6373 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6374 dns_rdatatype_t type, dns_rdatatype_t covers)
6376 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6377 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6378 rbtdb_version_t *rbtversion = version;
6379 isc_result_t result;
6380 rdatasetheader_t *newheader;
6382 REQUIRE(VALID_RBTDB(rbtdb));
6383 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6385 if (type == dns_rdatatype_any)
6386 return (ISC_R_NOTIMPLEMENTED);
6387 if (type == dns_rdatatype_rrsig && covers == 0)
6388 return (ISC_R_NOTIMPLEMENTED);
6390 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6391 if (newheader == NULL)
6392 return (ISC_R_NOMEMORY);
6393 set_ttl(rbtdb, newheader, 0);
6394 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6395 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6396 newheader->trust = 0;
6397 newheader->noqname = NULL;
6398 newheader->closest = NULL;
6399 newheader->additional_auth = NULL;
6400 newheader->additional_glue = NULL;
6401 if (rbtversion != NULL)
6402 newheader->serial = rbtversion->serial;
6404 newheader->serial = 0;
6405 newheader->count = 0;
6406 newheader->last_used = 0;
6407 newheader->node = rbtnode;
6409 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6410 isc_rwlocktype_write);
6412 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6413 ISC_FALSE, NULL, 0);
6415 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6416 isc_rwlocktype_write);
6419 * Update the zone's secure status. If version is non-NULL
6420 * this is deferred until closeversion() is called.
6422 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6423 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6429 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6430 rbtdb_load_t *loadctx = arg;
6431 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6432 dns_rbtnode_t *node;
6433 isc_result_t result;
6434 isc_region_t region;
6435 rdatasetheader_t *newheader;
6438 * This routine does no node locking. See comments in
6439 * 'load' below for more information on loading and
6445 * SOA records are only allowed at top of zone.
6447 if (rdataset->type == dns_rdatatype_soa &&
6448 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6449 return (DNS_R_NOTZONETOP);
6451 if (rdataset->type != dns_rdatatype_nsec3 &&
6452 rdataset->covers != dns_rdatatype_nsec3)
6453 add_empty_wildcards(rbtdb, name);
6455 if (dns_name_iswildcard(name)) {
6457 * NS record owners cannot legally be wild cards.
6459 if (rdataset->type == dns_rdatatype_ns)
6460 return (DNS_R_INVALIDNS);
6462 * NSEC3 record owners cannot legally be wild cards.
6464 if (rdataset->type == dns_rdatatype_nsec3)
6465 return (DNS_R_INVALIDNSEC3);
6466 result = add_wildcard_magic(rbtdb, name);
6467 if (result != ISC_R_SUCCESS)
6472 if (rdataset->type == dns_rdatatype_nsec3 ||
6473 rdataset->covers == dns_rdatatype_nsec3) {
6474 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6475 if (result == ISC_R_SUCCESS)
6478 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6479 if (result == ISC_R_SUCCESS)
6482 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6484 if (result != ISC_R_EXISTS) {
6485 dns_name_t foundname;
6486 dns_name_init(&foundname, NULL);
6487 dns_rbt_namefromnode(node, &foundname);
6488 #ifdef DNS_RBT_USEHASH
6489 node->locknum = node->hashval % rbtdb->node_lock_count;
6491 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6492 rbtdb->node_lock_count;
6496 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6498 sizeof(rdatasetheader_t));
6499 if (result != ISC_R_SUCCESS)
6501 newheader = (rdatasetheader_t *)region.base;
6502 init_rdataset(rbtdb, newheader);
6503 set_ttl(rbtdb, newheader,
6504 rdataset->ttl + loadctx->now); /* XXX overflow check */
6505 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6507 newheader->attributes = 0;
6508 newheader->trust = rdataset->trust;
6509 newheader->serial = 1;
6510 newheader->noqname = NULL;
6511 newheader->closest = NULL;
6512 newheader->count = init_count++;
6513 newheader->additional_auth = NULL;
6514 newheader->additional_glue = NULL;
6515 newheader->last_used = 0;
6516 newheader->node = node;
6517 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6518 newheader->attributes |= RDATASET_ATTR_RESIGN;
6519 newheader->resign = rdataset->resign;
6521 newheader->resign = 0;
6523 result = add(rbtdb, node, rbtdb->current_version, newheader,
6524 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6525 if (result == ISC_R_SUCCESS &&
6526 delegating_type(rbtdb, node, rdataset->type))
6527 node->find_callback = 1;
6528 else if (result == DNS_R_UNCHANGED)
6529 result = ISC_R_SUCCESS;
6535 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6536 rbtdb_load_t *loadctx;
6539 rbtdb = (dns_rbtdb_t *)db;
6541 REQUIRE(VALID_RBTDB(rbtdb));
6543 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6544 if (loadctx == NULL)
6545 return (ISC_R_NOMEMORY);
6547 loadctx->rbtdb = rbtdb;
6548 if (IS_CACHE(rbtdb))
6549 isc_stdtime_get(&loadctx->now);
6553 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6555 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6557 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6559 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6561 *addp = loading_addrdataset;
6564 return (ISC_R_SUCCESS);
6568 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6569 rbtdb_load_t *loadctx;
6570 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6572 REQUIRE(VALID_RBTDB(rbtdb));
6573 REQUIRE(dbloadp != NULL);
6575 REQUIRE(loadctx->rbtdb == rbtdb);
6577 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6579 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6580 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6582 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6583 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6585 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6588 * If there's a KEY rdataset at the zone origin containing a
6589 * zone key, we consider the zone secure.
6591 if (! IS_CACHE(rbtdb))
6592 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6596 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6598 return (ISC_R_SUCCESS);
6602 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6603 dns_masterformat_t masterformat) {
6606 rbtdb = (dns_rbtdb_t *)db;
6608 REQUIRE(VALID_RBTDB(rbtdb));
6610 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6611 &dns_master_style_default,
6612 filename, masterformat));
6616 delete_callback(void *data, void *arg) {
6617 dns_rbtdb_t *rbtdb = arg;
6618 rdatasetheader_t *current, *next;
6619 unsigned int locknum;
6622 locknum = current->node->locknum;
6623 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6624 while (current != NULL) {
6625 next = current->next;
6626 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6629 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6632 static isc_boolean_t
6633 issecure(dns_db_t *db) {
6635 isc_boolean_t secure;
6637 rbtdb = (dns_rbtdb_t *)db;
6639 REQUIRE(VALID_RBTDB(rbtdb));
6641 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6642 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6643 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6648 static isc_boolean_t
6649 isdnssec(dns_db_t *db) {
6651 isc_boolean_t dnssec;
6653 rbtdb = (dns_rbtdb_t *)db;
6655 REQUIRE(VALID_RBTDB(rbtdb));
6657 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6658 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6659 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6665 nodecount(dns_db_t *db) {
6669 rbtdb = (dns_rbtdb_t *)db;
6671 REQUIRE(VALID_RBTDB(rbtdb));
6673 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6674 count = dns_rbt_nodecount(rbtdb->tree);
6675 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6681 settask(dns_db_t *db, isc_task_t *task) {
6684 rbtdb = (dns_rbtdb_t *)db;
6686 REQUIRE(VALID_RBTDB(rbtdb));
6688 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6689 if (rbtdb->task != NULL)
6690 isc_task_detach(&rbtdb->task);
6692 isc_task_attach(task, &rbtdb->task);
6693 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6696 static isc_boolean_t
6697 ispersistent(dns_db_t *db) {
6703 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6704 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6705 dns_rbtnode_t *onode;
6706 isc_result_t result = ISC_R_SUCCESS;
6708 REQUIRE(VALID_RBTDB(rbtdb));
6709 REQUIRE(nodep != NULL && *nodep == NULL);
6711 /* Note that the access to origin_node doesn't require a DB lock */
6712 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6713 if (onode != NULL) {
6714 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6715 new_reference(rbtdb, onode);
6716 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6718 *nodep = rbtdb->origin_node;
6720 INSIST(IS_CACHE(rbtdb));
6721 result = ISC_R_NOTFOUND;
6728 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6729 isc_uint8_t *flags, isc_uint16_t *iterations,
6730 unsigned char *salt, size_t *salt_length)
6733 isc_result_t result = ISC_R_NOTFOUND;
6734 rbtdb_version_t *rbtversion = version;
6736 rbtdb = (dns_rbtdb_t *)db;
6738 REQUIRE(VALID_RBTDB(rbtdb));
6739 INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6741 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6743 if (rbtversion == NULL)
6744 rbtversion = rbtdb->current_version;
6746 if (rbtversion->havensec3) {
6748 *hash = rbtversion->hash;
6749 if (salt != NULL && salt_length != NULL) {
6750 REQUIRE(*salt_length >= rbtversion->salt_length);
6751 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6753 if (salt_length != NULL)
6754 *salt_length = rbtversion->salt_length;
6755 if (iterations != NULL)
6756 *iterations = rbtversion->iterations;
6758 *flags = rbtversion->flags;
6759 result = ISC_R_SUCCESS;
6761 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6767 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6768 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6769 isc_stdtime_t oldresign;
6770 isc_result_t result = ISC_R_SUCCESS;
6771 rdatasetheader_t *header;
6773 REQUIRE(VALID_RBTDB(rbtdb));
6774 REQUIRE(!IS_CACHE(rbtdb));
6775 REQUIRE(rdataset != NULL);
6777 header = rdataset->private3;
6780 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6781 isc_rwlocktype_write);
6783 oldresign = header->resign;
6784 header->resign = resign;
6785 if (header->heap_index != 0) {
6786 INSIST(RESIGN(header));
6788 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6789 header->heap_index);
6790 header->heap_index = 0;
6791 } else if (resign < oldresign)
6792 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6793 header->heap_index);
6795 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6796 header->heap_index);
6797 } else if (resign && header->heap_index == 0) {
6798 header->attributes |= RDATASET_ATTR_RESIGN;
6799 result = resign_insert(rbtdb, header->node->locknum, header);
6801 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6802 isc_rwlocktype_write);
6807 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6808 dns_name_t *foundname)
6810 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6811 rdatasetheader_t *header = NULL, *this;
6813 isc_result_t result = ISC_R_NOTFOUND;
6814 unsigned int locknum;
6816 REQUIRE(VALID_RBTDB(rbtdb));
6818 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6820 for (i = 0; i < rbtdb->node_lock_count; i++) {
6821 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6822 this = isc_heap_element(rbtdb->heaps[i], 1);
6824 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6825 isc_rwlocktype_read);
6830 else if (isc_serial_lt(this->resign, header->resign)) {
6831 locknum = header->node->locknum;
6832 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6833 isc_rwlocktype_read);
6836 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6837 isc_rwlocktype_read);
6843 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6845 if (foundname != NULL)
6846 dns_rbt_fullnamefromnode(header->node, foundname);
6848 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6849 isc_rwlocktype_read);
6851 result = ISC_R_SUCCESS;
6854 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6860 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6862 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6863 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6864 dns_rbtnode_t *node;
6865 rdatasetheader_t *header;
6867 REQUIRE(VALID_RBTDB(rbtdb));
6868 REQUIRE(rdataset != NULL);
6869 REQUIRE(rdataset->methods == &rdataset_methods);
6870 REQUIRE(rbtdb->future_version == rbtversion);
6871 REQUIRE(rbtversion != NULL);
6872 REQUIRE(rbtversion->writer);
6873 REQUIRE(rbtversion->rbtdb == rbtdb);
6875 node = rdataset->private2;
6876 INSIST(node != NULL);
6877 header = rdataset->private3;
6878 INSIST(header != NULL);
6881 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6882 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6883 isc_rwlocktype_write);
6885 * Delete from heap and save to re-signed list so that it can
6886 * be restored if we backout of this change.
6888 new_reference(rbtdb, node);
6889 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6890 header->heap_index = 0;
6891 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6893 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6894 isc_rwlocktype_write);
6895 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6898 static dns_stats_t *
6899 getrrsetstats(dns_db_t *db) {
6900 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6902 REQUIRE(VALID_RBTDB(rbtdb));
6903 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6905 return (rbtdb->rrsetstats);
6908 static dns_dbmethods_t zone_methods = {
6947 static dns_dbmethods_t cache_methods = {
6987 #ifdef DNS_RBTDB_VERSION64
6992 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6993 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6994 void *driverarg, dns_db_t **dbp)
6997 isc_result_t result;
7000 isc_boolean_t (*sooner)(void *, void *);
7001 isc_mem_t *hmctx = mctx;
7003 /* Keep the compiler happy. */
7006 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7008 return (ISC_R_NOMEMORY);
7011 * If argv[0] exists, it points to a memory context to use for heap
7014 hmctx = (isc_mem_t *) argv[0];
7016 memset(rbtdb, '\0', sizeof(*rbtdb));
7017 dns_name_init(&rbtdb->common.origin, NULL);
7018 rbtdb->common.attributes = 0;
7019 if (type == dns_dbtype_cache) {
7020 rbtdb->common.methods = &cache_methods;
7021 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7022 } else if (type == dns_dbtype_stub) {
7023 rbtdb->common.methods = &zone_methods;
7024 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7026 rbtdb->common.methods = &zone_methods;
7027 rbtdb->common.rdclass = rdclass;
7028 rbtdb->common.mctx = NULL;
7030 result = RBTDB_INITLOCK(&rbtdb->lock);
7031 if (result != ISC_R_SUCCESS)
7034 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7035 if (result != ISC_R_SUCCESS)
7039 * Initialize node_lock_count in a generic way to support future
7040 * extension which allows the user to specify this value on creation.
7041 * Note that when specified for a cache DB it must be larger than 1
7042 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7044 if (rbtdb->node_lock_count == 0) {
7045 if (IS_CACHE(rbtdb))
7046 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7048 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7049 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7050 result = ISC_R_RANGE;
7051 goto cleanup_tree_lock;
7053 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7054 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7055 sizeof(rbtdb_nodelock_t));
7056 if (rbtdb->node_locks == NULL) {
7057 result = ISC_R_NOMEMORY;
7058 goto cleanup_tree_lock;
7061 rbtdb->rrsetstats = NULL;
7062 if (IS_CACHE(rbtdb)) {
7063 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7064 if (result != ISC_R_SUCCESS)
7065 goto cleanup_node_locks;
7066 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7067 sizeof(rdatasetheaderlist_t));
7068 if (rbtdb->rdatasets == NULL) {
7069 result = ISC_R_NOMEMORY;
7070 goto cleanup_rrsetstats;
7072 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7073 ISC_LIST_INIT(rbtdb->rdatasets[i]);
7075 rbtdb->rdatasets = NULL;
7080 rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7081 sizeof(isc_heap_t *));
7082 if (rbtdb->heaps == NULL) {
7083 result = ISC_R_NOMEMORY;
7084 goto cleanup_rdatasets;
7086 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7087 rbtdb->heaps[i] = NULL;
7088 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7089 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7090 result = isc_heap_create(hmctx, sooner, set_index, 0,
7092 if (result != ISC_R_SUCCESS)
7097 * Create deadnode lists.
7099 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7100 sizeof(rbtnodelist_t));
7101 if (rbtdb->deadnodes == NULL) {
7102 result = ISC_R_NOMEMORY;
7105 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7106 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7108 rbtdb->active = rbtdb->node_lock_count;
7110 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7111 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7112 if (result == ISC_R_SUCCESS) {
7113 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7114 if (result != ISC_R_SUCCESS)
7115 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7117 if (result != ISC_R_SUCCESS) {
7119 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7120 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7121 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7123 goto cleanup_deadnodes;
7125 rbtdb->node_locks[i].exiting = ISC_FALSE;
7129 * Attach to the mctx. The database will persist so long as there
7130 * are references to it, and attaching to the mctx ensures that our
7131 * mctx won't disappear out from under us.
7133 isc_mem_attach(mctx, &rbtdb->common.mctx);
7134 isc_mem_attach(hmctx, &rbtdb->hmctx);
7137 * Must be initialized before free_rbtdb() is called.
7139 isc_ondestroy_init(&rbtdb->common.ondest);
7142 * Make a copy of the origin name.
7144 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7145 if (result != ISC_R_SUCCESS) {
7146 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7151 * Make the Red-Black Trees.
7153 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7154 if (result != ISC_R_SUCCESS) {
7155 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7159 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7160 if (result != ISC_R_SUCCESS) {
7161 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7166 * In order to set the node callback bit correctly in zone databases,
7167 * we need to know if the node has the origin name of the zone.
7168 * In loading_addrdataset() we could simply compare the new name
7169 * to the origin name, but this is expensive. Also, we don't know the
7170 * node name in addrdataset(), so we need another way of knowing the
7173 * We now explicitly create a node for the zone's origin, and then
7174 * we simply remember the node's address. This is safe, because
7175 * the top-of-zone node can never be deleted, nor can its address
7178 if (!IS_CACHE(rbtdb)) {
7179 dns_rbtnode_t *nsec3node;
7181 rbtdb->origin_node = NULL;
7182 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7183 &rbtdb->origin_node);
7184 if (result != ISC_R_SUCCESS) {
7185 INSIST(result != ISC_R_EXISTS);
7186 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7189 rbtdb->origin_node->nsec3 = 0;
7191 * We need to give the origin node the right locknum.
7193 dns_name_init(&name, NULL);
7194 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7195 #ifdef DNS_RBT_USEHASH
7196 rbtdb->origin_node->locknum =
7197 rbtdb->origin_node->hashval %
7198 rbtdb->node_lock_count;
7200 rbtdb->origin_node->locknum =
7201 dns_name_hash(&name, ISC_TRUE) %
7202 rbtdb->node_lock_count;
7205 * Add an apex node to the NSEC3 tree so that NSEC3 searches
7206 * return partial matches when there is only a single NSEC3
7207 * record in the tree.
7210 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7212 if (result != ISC_R_SUCCESS) {
7213 INSIST(result != ISC_R_EXISTS);
7214 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7217 nsec3node->nsec3 = 1;
7219 * We need to give the nsec3 origin node the right locknum.
7221 dns_name_init(&name, NULL);
7222 dns_rbt_namefromnode(nsec3node, &name);
7223 #ifdef DNS_RBT_USEHASH
7224 nsec3node->locknum = nsec3node->hashval %
7225 rbtdb->node_lock_count;
7227 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7228 rbtdb->node_lock_count;
7233 * Misc. Initialization.
7235 result = isc_refcount_init(&rbtdb->references, 1);
7236 if (result != ISC_R_SUCCESS) {
7237 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7240 rbtdb->attributes = 0;
7244 * Version Initialization.
7246 rbtdb->current_serial = 1;
7247 rbtdb->least_serial = 1;
7248 rbtdb->next_serial = 2;
7249 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7250 if (rbtdb->current_version == NULL) {
7251 isc_refcount_decrement(&rbtdb->references, NULL);
7252 isc_refcount_destroy(&rbtdb->references);
7253 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7254 return (ISC_R_NOMEMORY);
7256 rbtdb->current_version->rbtdb = rbtdb;
7257 rbtdb->current_version->secure = dns_db_insecure;
7258 rbtdb->current_version->havensec3 = ISC_FALSE;
7259 rbtdb->current_version->flags = 0;
7260 rbtdb->current_version->iterations = 0;
7261 rbtdb->current_version->hash = 0;
7262 rbtdb->current_version->salt_length = 0;
7263 memset(rbtdb->current_version->salt, 0,
7264 sizeof(rbtdb->current_version->salt));
7265 rbtdb->future_version = NULL;
7266 ISC_LIST_INIT(rbtdb->open_versions);
7268 * Keep the current version in the open list so that list operation
7269 * won't happen in normal lookup operations.
7271 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7273 rbtdb->common.magic = DNS_DB_MAGIC;
7274 rbtdb->common.impmagic = RBTDB_MAGIC;
7276 *dbp = (dns_db_t *)rbtdb;
7278 return (ISC_R_SUCCESS);
7281 isc_mem_put(mctx, rbtdb->deadnodes,
7282 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7285 if (rbtdb->heaps != NULL) {
7286 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7287 if (rbtdb->heaps[i] != NULL)
7288 isc_heap_destroy(&rbtdb->heaps[i]);
7289 isc_mem_put(hmctx, rbtdb->heaps,
7290 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7294 if (rbtdb->rdatasets != NULL)
7295 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7296 sizeof(rdatasetheaderlist_t));
7298 if (rbtdb->rrsetstats != NULL)
7299 dns_stats_detach(&rbtdb->rrsetstats);
7302 isc_mem_put(mctx, rbtdb->node_locks,
7303 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7306 isc_rwlock_destroy(&rbtdb->tree_lock);
7309 RBTDB_DESTROYLOCK(&rbtdb->lock);
7312 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7318 * Slabbed Rdataset Methods
7322 rdataset_disassociate(dns_rdataset_t *rdataset) {
7323 dns_db_t *db = rdataset->private1;
7324 dns_dbnode_t *node = rdataset->private2;
7326 detachnode(db, &node);
7330 rdataset_first(dns_rdataset_t *rdataset) {
7331 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7334 count = raw[0] * 256 + raw[1];
7336 rdataset->private5 = NULL;
7337 return (ISC_R_NOMORE);
7340 #if DNS_RDATASET_FIXED
7341 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7342 raw += 2 + (4 * count);
7348 * The privateuint4 field is the number of rdata beyond the
7349 * cursor position, so we decrement the total count by one
7350 * before storing it.
7352 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7353 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7354 * to the first entry in the offset table.
7357 rdataset->privateuint4 = count;
7358 rdataset->private5 = raw;
7360 return (ISC_R_SUCCESS);
7364 rdataset_next(dns_rdataset_t *rdataset) {
7366 unsigned int length;
7367 unsigned char *raw; /* RDATASLAB */
7369 count = rdataset->privateuint4;
7371 return (ISC_R_NOMORE);
7373 rdataset->privateuint4 = count;
7376 * Skip forward one record (length + 4) or one offset (4).
7378 raw = rdataset->private5;
7379 #if DNS_RDATASET_FIXED
7380 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7382 length = raw[0] * 256 + raw[1];
7384 #if DNS_RDATASET_FIXED
7386 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7388 rdataset->private5 = raw + 2; /* length(2) */
7391 return (ISC_R_SUCCESS);
7395 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7396 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7397 #if DNS_RDATASET_FIXED
7398 unsigned int offset;
7400 unsigned int length;
7402 unsigned int flags = 0;
7404 REQUIRE(raw != NULL);
7407 * Find the start of the record if not already in private5
7408 * then skip the length and order fields.
7410 #if DNS_RDATASET_FIXED
7411 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7412 offset = (raw[0] << 24) + (raw[1] << 16) +
7413 (raw[2] << 8) + raw[3];
7414 raw = rdataset->private3;
7418 length = raw[0] * 256 + raw[1];
7419 #if DNS_RDATASET_FIXED
7424 if (rdataset->type == dns_rdatatype_rrsig) {
7425 if (*raw & DNS_RDATASLAB_OFFLINE)
7426 flags |= DNS_RDATA_OFFLINE;
7432 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7433 rdata->flags |= flags;
7437 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7438 dns_db_t *db = source->private1;
7439 dns_dbnode_t *node = source->private2;
7440 dns_dbnode_t *cloned_node = NULL;
7442 attachnode(db, node, &cloned_node);
7446 * Reset iterator state.
7448 target->privateuint4 = 0;
7449 target->private5 = NULL;
7453 rdataset_count(dns_rdataset_t *rdataset) {
7454 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7457 count = raw[0] * 256 + raw[1];
7463 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7464 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7466 dns_db_t *db = rdataset->private1;
7467 dns_dbnode_t *node = rdataset->private2;
7468 dns_dbnode_t *cloned_node;
7469 struct noqname *noqname = rdataset->private6;
7472 attachnode(db, node, &cloned_node);
7473 nsec->methods = &rdataset_methods;
7474 nsec->rdclass = db->rdclass;
7475 nsec->type = noqname->type;
7477 nsec->ttl = rdataset->ttl;
7478 nsec->trust = rdataset->trust;
7479 nsec->private1 = rdataset->private1;
7480 nsec->private2 = rdataset->private2;
7481 nsec->private3 = noqname->neg;
7482 nsec->privateuint4 = 0;
7483 nsec->private5 = NULL;
7484 nsec->private6 = NULL;
7485 nsec->private7 = NULL;
7488 attachnode(db, node, &cloned_node);
7489 nsecsig->methods = &rdataset_methods;
7490 nsecsig->rdclass = db->rdclass;
7491 nsecsig->type = dns_rdatatype_rrsig;
7492 nsecsig->covers = noqname->type;
7493 nsecsig->ttl = rdataset->ttl;
7494 nsecsig->trust = rdataset->trust;
7495 nsecsig->private1 = rdataset->private1;
7496 nsecsig->private2 = rdataset->private2;
7497 nsecsig->private3 = noqname->negsig;
7498 nsecsig->privateuint4 = 0;
7499 nsecsig->private5 = NULL;
7500 nsec->private6 = NULL;
7501 nsec->private7 = NULL;
7503 dns_name_clone(&noqname->name, name);
7505 return (ISC_R_SUCCESS);
7509 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7510 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7512 dns_db_t *db = rdataset->private1;
7513 dns_dbnode_t *node = rdataset->private2;
7514 dns_dbnode_t *cloned_node;
7515 struct noqname *closest = rdataset->private7;
7518 attachnode(db, node, &cloned_node);
7519 nsec->methods = &rdataset_methods;
7520 nsec->rdclass = db->rdclass;
7521 nsec->type = closest->type;
7523 nsec->ttl = rdataset->ttl;
7524 nsec->trust = rdataset->trust;
7525 nsec->private1 = rdataset->private1;
7526 nsec->private2 = rdataset->private2;
7527 nsec->private3 = closest->neg;
7528 nsec->privateuint4 = 0;
7529 nsec->private5 = NULL;
7530 nsec->private6 = NULL;
7531 nsec->private7 = NULL;
7534 attachnode(db, node, &cloned_node);
7535 nsecsig->methods = &rdataset_methods;
7536 nsecsig->rdclass = db->rdclass;
7537 nsecsig->type = dns_rdatatype_rrsig;
7538 nsecsig->covers = closest->type;
7539 nsecsig->ttl = rdataset->ttl;
7540 nsecsig->trust = rdataset->trust;
7541 nsecsig->private1 = rdataset->private1;
7542 nsecsig->private2 = rdataset->private2;
7543 nsecsig->private3 = closest->negsig;
7544 nsecsig->privateuint4 = 0;
7545 nsecsig->private5 = NULL;
7546 nsec->private6 = NULL;
7547 nsec->private7 = NULL;
7549 dns_name_clone(&closest->name, name);
7551 return (ISC_R_SUCCESS);
7555 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7556 dns_rbtdb_t *rbtdb = rdataset->private1;
7557 dns_rbtnode_t *rbtnode = rdataset->private2;
7558 rdatasetheader_t *header = rdataset->private3;
7561 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7562 isc_rwlocktype_write);
7563 header->trust = rdataset->trust = trust;
7564 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7565 isc_rwlocktype_write);
7569 rdataset_expire(dns_rdataset_t *rdataset) {
7570 dns_rbtdb_t *rbtdb = rdataset->private1;
7571 dns_rbtnode_t *rbtnode = rdataset->private2;
7572 rdatasetheader_t *header = rdataset->private3;
7575 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7576 isc_rwlocktype_write);
7577 expire_header(rbtdb, header, ISC_FALSE);
7578 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7579 isc_rwlocktype_write);
7583 * Rdataset Iterator Methods
7587 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7588 rbtdb_rdatasetiter_t *rbtiterator;
7590 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7592 if (rbtiterator->common.version != NULL)
7593 closeversion(rbtiterator->common.db,
7594 &rbtiterator->common.version, ISC_FALSE);
7595 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7596 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7597 sizeof(*rbtiterator));
7603 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7604 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7605 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7606 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7607 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7608 rdatasetheader_t *header, *top_next;
7609 rbtdb_serial_t serial;
7612 if (IS_CACHE(rbtdb)) {
7614 now = rbtiterator->common.now;
7616 serial = rbtversion->serial;
7620 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7621 isc_rwlocktype_read);
7623 for (header = rbtnode->data; header != NULL; header = top_next) {
7624 top_next = header->next;
7626 if (header->serial <= serial && !IGNORE(header)) {
7628 * Is this a "this rdataset doesn't exist"
7629 * record? Or is it too old in the cache?
7631 * Note: unlike everywhere else, we
7632 * check for now > header->rdh_ttl instead
7633 * of now >= header->rdh_ttl. This allows
7634 * ANY and RRSIG queries for 0 TTL
7635 * rdatasets to work.
7637 if (NONEXISTENT(header) ||
7638 (now != 0 && now > header->rdh_ttl))
7642 header = header->down;
7643 } while (header != NULL);
7648 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7649 isc_rwlocktype_read);
7651 rbtiterator->current = header;
7654 return (ISC_R_NOMORE);
7656 return (ISC_R_SUCCESS);
7660 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7661 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7662 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7663 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7664 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7665 rdatasetheader_t *header, *top_next;
7666 rbtdb_serial_t serial;
7668 rbtdb_rdatatype_t type, negtype;
7669 dns_rdatatype_t rdtype, covers;
7671 header = rbtiterator->current;
7673 return (ISC_R_NOMORE);
7675 if (IS_CACHE(rbtdb)) {
7677 now = rbtiterator->common.now;
7679 serial = rbtversion->serial;
7683 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7684 isc_rwlocktype_read);
7686 type = header->type;
7687 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7688 if (NEGATIVE(header)) {
7689 covers = RBTDB_RDATATYPE_EXT(header->type);
7690 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7692 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7693 for (header = header->next; header != NULL; header = top_next) {
7694 top_next = header->next;
7696 * If not walking back up the down list.
7698 if (header->type != type && header->type != negtype) {
7700 if (header->serial <= serial &&
7703 * Is this a "this rdataset doesn't
7706 * Note: unlike everywhere else, we
7707 * check for now > header->ttl instead
7708 * of now >= header->ttl. This allows
7709 * ANY and RRSIG queries for 0 TTL
7710 * rdatasets to work.
7712 if ((header->attributes &
7713 RDATASET_ATTR_NONEXISTENT) != 0 ||
7714 (now != 0 && now > header->rdh_ttl))
7718 header = header->down;
7719 } while (header != NULL);
7725 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7726 isc_rwlocktype_read);
7728 rbtiterator->current = header;
7731 return (ISC_R_NOMORE);
7733 return (ISC_R_SUCCESS);
7737 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7738 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7739 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7740 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7741 rdatasetheader_t *header;
7743 header = rbtiterator->current;
7744 REQUIRE(header != NULL);
7746 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7747 isc_rwlocktype_read);
7749 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7752 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7753 isc_rwlocktype_read);
7758 * Database Iterator Methods
7762 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7763 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7764 dns_rbtnode_t *node = rbtdbiter->node;
7769 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7770 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7774 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7775 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7776 dns_rbtnode_t *node = rbtdbiter->node;
7782 lock = &rbtdb->node_locks[node->locknum].lock;
7783 NODE_LOCK(lock, isc_rwlocktype_read);
7784 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7785 rbtdbiter->tree_locked, ISC_FALSE);
7786 NODE_UNLOCK(lock, isc_rwlocktype_read);
7788 rbtdbiter->node = NULL;
7792 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7793 dns_rbtnode_t *node;
7794 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7795 isc_boolean_t was_read_locked = ISC_FALSE;
7799 if (rbtdbiter->delete != 0) {
7801 * Note that "%d node of %d in tree" can report things like
7802 * "flush_deletions: 59 nodes of 41 in tree". This means
7803 * That some nodes appear on the deletions list more than
7804 * once. Only the last occurence will actually be deleted.
7806 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7807 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7808 "flush_deletions: %d nodes of %d in tree",
7810 dns_rbt_nodecount(rbtdb->tree));
7812 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7813 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7814 was_read_locked = ISC_TRUE;
7816 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7817 rbtdbiter->tree_locked = isc_rwlocktype_write;
7819 for (i = 0; i < rbtdbiter->delete; i++) {
7820 node = rbtdbiter->deletions[i];
7821 lock = &rbtdb->node_locks[node->locknum].lock;
7823 NODE_LOCK(lock, isc_rwlocktype_read);
7824 decrement_reference(rbtdb, node, 0,
7825 isc_rwlocktype_read,
7826 rbtdbiter->tree_locked, ISC_FALSE);
7827 NODE_UNLOCK(lock, isc_rwlocktype_read);
7830 rbtdbiter->delete = 0;
7832 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7833 if (was_read_locked) {
7834 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7835 rbtdbiter->tree_locked = isc_rwlocktype_read;
7838 rbtdbiter->tree_locked = isc_rwlocktype_none;
7844 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7845 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7847 REQUIRE(rbtdbiter->paused);
7848 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7850 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7851 rbtdbiter->tree_locked = isc_rwlocktype_read;
7853 rbtdbiter->paused = ISC_FALSE;
7857 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7858 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7859 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7860 dns_db_t *db = NULL;
7862 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7863 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7864 rbtdbiter->tree_locked = isc_rwlocktype_none;
7866 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7868 dereference_iter_node(rbtdbiter);
7870 flush_deletions(rbtdbiter);
7872 dns_db_attach(rbtdbiter->common.db, &db);
7873 dns_db_detach(&rbtdbiter->common.db);
7875 dns_rbtnodechain_reset(&rbtdbiter->chain);
7876 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7877 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7884 dbiterator_first(dns_dbiterator_t *iterator) {
7885 isc_result_t result;
7886 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7887 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7888 dns_name_t *name, *origin;
7890 if (rbtdbiter->result != ISC_R_SUCCESS &&
7891 rbtdbiter->result != ISC_R_NOMORE)
7892 return (rbtdbiter->result);
7894 if (rbtdbiter->paused)
7895 resume_iteration(rbtdbiter);
7897 dereference_iter_node(rbtdbiter);
7899 name = dns_fixedname_name(&rbtdbiter->name);
7900 origin = dns_fixedname_name(&rbtdbiter->origin);
7901 dns_rbtnodechain_reset(&rbtdbiter->chain);
7902 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7904 if (rbtdbiter->nsec3only) {
7905 rbtdbiter->current = &rbtdbiter->nsec3chain;
7906 result = dns_rbtnodechain_first(rbtdbiter->current,
7907 rbtdb->nsec3, name, origin);
7909 rbtdbiter->current = &rbtdbiter->chain;
7910 result = dns_rbtnodechain_first(rbtdbiter->current,
7911 rbtdb->tree, name, origin);
7912 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7913 rbtdbiter->current = &rbtdbiter->nsec3chain;
7914 result = dns_rbtnodechain_first(rbtdbiter->current,
7919 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7920 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7921 NULL, &rbtdbiter->node);
7922 if (result == ISC_R_SUCCESS) {
7923 rbtdbiter->new_origin = ISC_TRUE;
7924 reference_iter_node(rbtdbiter);
7927 INSIST(result == ISC_R_NOTFOUND);
7928 result = ISC_R_NOMORE; /* The tree is empty. */
7931 rbtdbiter->result = result;
7937 dbiterator_last(dns_dbiterator_t *iterator) {
7938 isc_result_t result;
7939 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7940 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7941 dns_name_t *name, *origin;
7943 if (rbtdbiter->result != ISC_R_SUCCESS &&
7944 rbtdbiter->result != ISC_R_NOMORE)
7945 return (rbtdbiter->result);
7947 if (rbtdbiter->paused)
7948 resume_iteration(rbtdbiter);
7950 dereference_iter_node(rbtdbiter);
7952 name = dns_fixedname_name(&rbtdbiter->name);
7953 origin = dns_fixedname_name(&rbtdbiter->origin);
7954 dns_rbtnodechain_reset(&rbtdbiter->chain);
7955 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7957 result = ISC_R_NOTFOUND;
7958 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7959 rbtdbiter->current = &rbtdbiter->nsec3chain;
7960 result = dns_rbtnodechain_last(rbtdbiter->current,
7961 rbtdb->nsec3, name, origin);
7963 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7964 rbtdbiter->current = &rbtdbiter->chain;
7965 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7968 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7969 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7970 NULL, &rbtdbiter->node);
7971 if (result == ISC_R_SUCCESS) {
7972 rbtdbiter->new_origin = ISC_TRUE;
7973 reference_iter_node(rbtdbiter);
7976 INSIST(result == ISC_R_NOTFOUND);
7977 result = ISC_R_NOMORE; /* The tree is empty. */
7980 rbtdbiter->result = result;
7986 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7987 isc_result_t result, tresult;
7988 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7989 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7990 dns_name_t *iname, *origin;
7992 if (rbtdbiter->result != ISC_R_SUCCESS &&
7993 rbtdbiter->result != ISC_R_NOTFOUND &&
7994 rbtdbiter->result != ISC_R_NOMORE)
7995 return (rbtdbiter->result);
7997 if (rbtdbiter->paused)
7998 resume_iteration(rbtdbiter);
8000 dereference_iter_node(rbtdbiter);
8002 iname = dns_fixedname_name(&rbtdbiter->name);
8003 origin = dns_fixedname_name(&rbtdbiter->origin);
8004 dns_rbtnodechain_reset(&rbtdbiter->chain);
8005 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8007 if (rbtdbiter->nsec3only) {
8008 rbtdbiter->current = &rbtdbiter->nsec3chain;
8009 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8012 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8013 } else if (rbtdbiter->nonsec3) {
8014 rbtdbiter->current = &rbtdbiter->chain;
8015 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8018 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8021 * Stay on main chain if not found on either chain.
8023 rbtdbiter->current = &rbtdbiter->chain;
8024 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8027 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8028 if (result == DNS_R_PARTIALMATCH) {
8029 dns_rbtnode_t *node = NULL;
8030 tresult = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8031 &node, &rbtdbiter->nsec3chain,
8032 DNS_RBTFIND_EMPTYDATA,
8034 if (tresult == ISC_R_SUCCESS) {
8035 rbtdbiter->node = node;
8036 rbtdbiter->current = &rbtdbiter->nsec3chain;
8043 if (result == ISC_R_SUCCESS) {
8044 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8046 if (result == ISC_R_SUCCESS) {
8047 rbtdbiter->new_origin = ISC_TRUE;
8048 reference_iter_node(rbtdbiter);
8050 } else if (result == DNS_R_PARTIALMATCH) {
8051 result = ISC_R_NOTFOUND;
8052 rbtdbiter->node = NULL;
8055 rbtdbiter->result = result;
8057 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8058 isc_result_t tresult;
8059 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8061 if (tresult == ISC_R_SUCCESS) {
8062 rbtdbiter->new_origin = ISC_TRUE;
8063 reference_iter_node(rbtdbiter);
8066 rbtdbiter->node = NULL;
8069 rbtdbiter->node = NULL;
8071 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8072 ISC_R_SUCCESS : result;
8079 dbiterator_prev(dns_dbiterator_t *iterator) {
8080 isc_result_t result;
8081 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8082 dns_name_t *name, *origin;
8083 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8085 REQUIRE(rbtdbiter->node != NULL);
8087 if (rbtdbiter->result != ISC_R_SUCCESS)
8088 return (rbtdbiter->result);
8090 if (rbtdbiter->paused)
8091 resume_iteration(rbtdbiter);
8093 name = dns_fixedname_name(&rbtdbiter->name);
8094 origin = dns_fixedname_name(&rbtdbiter->origin);
8095 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8096 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8097 !rbtdbiter->nonsec3 &&
8098 &rbtdbiter->nsec3chain == rbtdbiter->current) {
8099 rbtdbiter->current = &rbtdbiter->chain;
8100 dns_rbtnodechain_reset(rbtdbiter->current);
8101 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8103 if (result == ISC_R_NOTFOUND)
8104 result = ISC_R_NOMORE;
8107 dereference_iter_node(rbtdbiter);
8109 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8110 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8111 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8112 NULL, &rbtdbiter->node);
8115 if (result == ISC_R_SUCCESS)
8116 reference_iter_node(rbtdbiter);
8118 rbtdbiter->result = result;
8124 dbiterator_next(dns_dbiterator_t *iterator) {
8125 isc_result_t result;
8126 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8127 dns_name_t *name, *origin;
8128 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8130 REQUIRE(rbtdbiter->node != NULL);
8132 if (rbtdbiter->result != ISC_R_SUCCESS)
8133 return (rbtdbiter->result);
8135 if (rbtdbiter->paused)
8136 resume_iteration(rbtdbiter);
8138 name = dns_fixedname_name(&rbtdbiter->name);
8139 origin = dns_fixedname_name(&rbtdbiter->origin);
8140 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8141 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8142 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8143 rbtdbiter->current = &rbtdbiter->nsec3chain;
8144 dns_rbtnodechain_reset(rbtdbiter->current);
8145 result = dns_rbtnodechain_first(rbtdbiter->current,
8146 rbtdb->nsec3, name, origin);
8147 if (result == ISC_R_NOTFOUND)
8148 result = ISC_R_NOMORE;
8151 dereference_iter_node(rbtdbiter);
8153 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8154 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8155 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8156 NULL, &rbtdbiter->node);
8158 if (result == ISC_R_SUCCESS)
8159 reference_iter_node(rbtdbiter);
8161 rbtdbiter->result = result;
8167 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8170 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8171 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8172 dns_rbtnode_t *node = rbtdbiter->node;
8173 isc_result_t result;
8174 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8175 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8177 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8178 REQUIRE(rbtdbiter->node != NULL);
8180 if (rbtdbiter->paused)
8181 resume_iteration(rbtdbiter);
8184 if (rbtdbiter->common.relative_names)
8186 result = dns_name_concatenate(nodename, origin, name, NULL);
8187 if (result != ISC_R_SUCCESS)
8189 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8190 result = DNS_R_NEWORIGIN;
8192 result = ISC_R_SUCCESS;
8194 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8195 new_reference(rbtdb, node);
8196 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8198 *nodep = rbtdbiter->node;
8200 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8201 isc_result_t expire_result;
8204 * If the deletion array is full, flush it before trying
8205 * to expire the current node. The current node can't
8206 * fully deleted while the iteration cursor is still on it.
8208 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8209 flush_deletions(rbtdbiter);
8211 expire_result = expirenode(iterator->db, *nodep, 0);
8214 * expirenode() currently always returns success.
8216 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8219 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8220 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8221 dns_rbtnode_refincrement(node, &refs);
8223 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8231 dbiterator_pause(dns_dbiterator_t *iterator) {
8232 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8233 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8235 if (rbtdbiter->result != ISC_R_SUCCESS &&
8236 rbtdbiter->result != ISC_R_NOMORE)
8237 return (rbtdbiter->result);
8239 if (rbtdbiter->paused)
8240 return (ISC_R_SUCCESS);
8242 rbtdbiter->paused = ISC_TRUE;
8244 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8245 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8246 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8247 rbtdbiter->tree_locked = isc_rwlocktype_none;
8250 flush_deletions(rbtdbiter);
8252 return (ISC_R_SUCCESS);
8256 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8257 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8258 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8260 if (rbtdbiter->result != ISC_R_SUCCESS)
8261 return (rbtdbiter->result);
8263 return (dns_name_copy(origin, name, NULL));
8267 * Additional cache routines.
8270 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8271 dns_rdatatype_t qtype, dns_acache_t *acache,
8272 dns_zone_t **zonep, dns_db_t **dbp,
8273 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8274 dns_name_t *fname, dns_message_t *msg,
8277 dns_rbtdb_t *rbtdb = rdataset->private1;
8278 dns_rbtnode_t *rbtnode = rdataset->private2;
8279 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8280 unsigned int current_count = rdataset->privateuint4;
8282 rdatasetheader_t *header;
8283 nodelock_t *nodelock;
8284 unsigned int total_count;
8285 acachectl_t *acarray;
8286 dns_acacheentry_t *entry;
8287 isc_result_t result;
8289 UNUSED(qtype); /* we do not use this value at least for now */
8292 header = (struct rdatasetheader *)(raw - sizeof(*header));
8294 total_count = raw[0] * 256 + raw[1];
8295 INSIST(total_count > current_count);
8296 count = total_count - current_count - 1;
8300 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8301 NODE_LOCK(nodelock, isc_rwlocktype_read);
8304 case dns_rdatasetadditional_fromauth:
8305 acarray = header->additional_auth;
8307 case dns_rdatasetadditional_fromcache:
8310 case dns_rdatasetadditional_fromglue:
8311 acarray = header->additional_glue;
8317 if (acarray == NULL) {
8318 if (type != dns_rdatasetadditional_fromcache)
8319 dns_acache_countquerymiss(acache);
8320 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8321 return (ISC_R_NOTFOUND);
8324 if (acarray[count].entry == NULL) {
8325 dns_acache_countquerymiss(acache);
8326 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8327 return (ISC_R_NOTFOUND);
8331 dns_acache_attachentry(acarray[count].entry, &entry);
8333 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8335 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8336 nodep, fname, msg, now);
8338 dns_acache_detachentry(&entry);
8344 acache_callback(dns_acacheentry_t *entry, void **arg) {
8346 dns_rbtnode_t *rbtnode;
8347 nodelock_t *nodelock;
8348 acachectl_t *acarray = NULL;
8349 acache_cbarg_t *cbarg;
8352 REQUIRE(arg != NULL);
8356 * The caller must hold the entry lock.
8359 rbtdb = (dns_rbtdb_t *)cbarg->db;
8360 rbtnode = (dns_rbtnode_t *)cbarg->node;
8362 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8363 NODE_LOCK(nodelock, isc_rwlocktype_write);
8365 switch (cbarg->type) {
8366 case dns_rdatasetadditional_fromauth:
8367 acarray = cbarg->header->additional_auth;
8369 case dns_rdatasetadditional_fromglue:
8370 acarray = cbarg->header->additional_glue;
8376 count = cbarg->count;
8377 if (acarray != NULL && acarray[count].entry == entry) {
8378 acarray[count].entry = NULL;
8379 INSIST(acarray[count].cbarg == cbarg);
8380 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8381 acarray[count].cbarg = NULL;
8383 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8385 dns_acache_detachentry(&entry);
8387 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8389 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8390 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8396 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8397 acache_cbarg_t **cbargp)
8399 acache_cbarg_t *cbarg;
8401 REQUIRE(mctx != NULL);
8402 REQUIRE(entry != NULL);
8403 REQUIRE(cbargp != NULL && *cbargp != NULL);
8407 dns_acache_cancelentry(entry);
8408 dns_db_detachnode(cbarg->db, &cbarg->node);
8409 dns_db_detach(&cbarg->db);
8411 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8417 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8418 dns_rdatatype_t qtype, dns_acache_t *acache,
8419 dns_zone_t *zone, dns_db_t *db,
8420 dns_dbversion_t *version, dns_dbnode_t *node,
8423 dns_rbtdb_t *rbtdb = rdataset->private1;
8424 dns_rbtnode_t *rbtnode = rdataset->private2;
8425 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8426 unsigned int current_count = rdataset->privateuint4;
8427 rdatasetheader_t *header;
8428 unsigned int total_count, count;
8429 nodelock_t *nodelock;
8430 isc_result_t result;
8431 acachectl_t *acarray;
8432 dns_acacheentry_t *newentry, *oldentry = NULL;
8433 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8437 if (type == dns_rdatasetadditional_fromcache)
8438 return (ISC_R_SUCCESS);
8440 header = (struct rdatasetheader *)(raw - sizeof(*header));
8442 total_count = raw[0] * 256 + raw[1];
8443 INSIST(total_count > current_count);
8444 count = total_count - current_count - 1; /* should be private data */
8446 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8447 if (newcbarg == NULL)
8448 return (ISC_R_NOMEMORY);
8449 newcbarg->type = type;
8450 newcbarg->count = count;
8451 newcbarg->header = header;
8452 newcbarg->db = NULL;
8453 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8454 newcbarg->node = NULL;
8455 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8458 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8459 acache_callback, newcbarg, &newentry);
8460 if (result != ISC_R_SUCCESS)
8462 /* Set cache data in the new entry. */
8463 result = dns_acache_setentry(acache, newentry, zone, db,
8464 version, node, fname);
8465 if (result != ISC_R_SUCCESS)
8468 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8469 NODE_LOCK(nodelock, isc_rwlocktype_write);
8473 case dns_rdatasetadditional_fromauth:
8474 acarray = header->additional_auth;
8476 case dns_rdatasetadditional_fromglue:
8477 acarray = header->additional_glue;
8483 if (acarray == NULL) {
8486 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8487 sizeof(acachectl_t));
8489 if (acarray == NULL) {
8490 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8494 for (i = 0; i < total_count; i++) {
8495 acarray[i].entry = NULL;
8496 acarray[i].cbarg = NULL;
8500 case dns_rdatasetadditional_fromauth:
8501 header->additional_auth = acarray;
8503 case dns_rdatasetadditional_fromglue:
8504 header->additional_glue = acarray;
8510 if (acarray[count].entry != NULL) {
8512 * Swap the entry. Delay cleaning-up the old entry since
8513 * it would require a node lock.
8515 oldentry = acarray[count].entry;
8516 INSIST(acarray[count].cbarg != NULL);
8517 oldcbarg = acarray[count].cbarg;
8519 acarray[count].entry = newentry;
8520 acarray[count].cbarg = newcbarg;
8522 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8524 if (oldentry != NULL) {
8525 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8526 dns_acache_detachentry(&oldentry);
8529 return (ISC_R_SUCCESS);
8532 if (newcbarg != NULL) {
8533 if (newentry != NULL) {
8534 acache_cancelentry(rbtdb->common.mctx, newentry,
8536 dns_acache_detachentry(&newentry);
8538 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8539 dns_db_detach(&newcbarg->db);
8540 isc_mem_put(rbtdb->common.mctx, newcbarg,
8549 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8550 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8552 dns_rbtdb_t *rbtdb = rdataset->private1;
8553 dns_rbtnode_t *rbtnode = rdataset->private2;
8554 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8555 unsigned int current_count = rdataset->privateuint4;
8556 rdatasetheader_t *header;
8557 nodelock_t *nodelock;
8558 unsigned int total_count, count;
8559 acachectl_t *acarray;
8560 dns_acacheentry_t *entry;
8561 acache_cbarg_t *cbarg;
8563 UNUSED(qtype); /* we do not use this value at least for now */
8566 if (type == dns_rdatasetadditional_fromcache)
8567 return (ISC_R_SUCCESS);
8569 header = (struct rdatasetheader *)(raw - sizeof(*header));
8571 total_count = raw[0] * 256 + raw[1];
8572 INSIST(total_count > current_count);
8573 count = total_count - current_count - 1;
8578 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8579 NODE_LOCK(nodelock, isc_rwlocktype_write);
8582 case dns_rdatasetadditional_fromauth:
8583 acarray = header->additional_auth;
8585 case dns_rdatasetadditional_fromglue:
8586 acarray = header->additional_glue;
8592 if (acarray == NULL) {
8593 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8594 return (ISC_R_NOTFOUND);
8597 entry = acarray[count].entry;
8598 if (entry == NULL) {
8599 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8600 return (ISC_R_NOTFOUND);
8603 acarray[count].entry = NULL;
8604 cbarg = acarray[count].cbarg;
8605 acarray[count].cbarg = NULL;
8607 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8609 if (entry != NULL) {
8611 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8612 dns_acache_detachentry(&entry);
8615 return (ISC_R_SUCCESS);
8619 * Routines for LRU-based cache management.
8623 * See if a given cache entry that is being reused needs to be updated
8624 * in the LRU-list. From the LRU management point of view, this function is
8625 * expected to return true for almost all cases. When used with threads,
8626 * however, this may cause a non-negligible performance penalty because a
8627 * writer lock will have to be acquired before updating the list.
8628 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8629 * function returns true if the entry has not been updated for some period of
8630 * time. We differentiate the NS or glue address case and the others since
8631 * experiments have shown that the former tends to be accessed relatively
8632 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8633 * may cause external queries at a higher level zone, involving more
8636 * Caller must hold the node (read or write) lock.
8638 static inline isc_boolean_t
8639 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8640 if ((header->attributes &
8641 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8644 #if DNS_RBTDB_LIMITLRUUPDATE
8645 if (header->type == dns_rdatatype_ns ||
8646 (header->trust == dns_trust_glue &&
8647 (header->type == dns_rdatatype_a ||
8648 header->type == dns_rdatatype_aaaa))) {
8650 * Glue records are updated if at least 60 seconds have passed
8651 * since the previous update time.
8653 return (header->last_used + 60 <= now);
8656 /* Other records are updated if 5 minutes have passed. */
8657 return (header->last_used + 300 <= now);
8666 * Update the timestamp of a given cache entry and move it to the head
8667 * of the corresponding LRU list.
8669 * Caller must hold the node (write) lock.
8671 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8674 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8677 INSIST(IS_CACHE(rbtdb));
8679 /* To be checked: can we really assume this? XXXMLG */
8680 INSIST(ISC_LINK_LINKED(header, link));
8682 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8683 header->last_used = now;
8684 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8688 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8689 * under an overmem condition. To recover from this condition quickly, up to
8690 * 2 entries will be purged. This process is triggered while adding a new
8691 * entry, and we specifically avoid purging entries in the same LRU bucket as
8692 * the one to which the new entry will belong. Otherwise, we might purge
8693 * entries of the same name of different RR types while adding RRsets from a
8694 * single response (consider the case where we're adding A and AAAA glue records
8695 * of the same NS name).
8698 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8699 isc_stdtime_t now, isc_boolean_t tree_locked)
8701 rdatasetheader_t *header, *header_prev;
8702 unsigned int locknum;
8705 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8706 locknum != locknum_start && purgecount > 0;
8707 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8708 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8709 isc_rwlocktype_write);
8711 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8712 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8713 expire_header(rbtdb, header, tree_locked);
8717 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8718 header != NULL && purgecount > 0;
8719 header = header_prev) {
8720 header_prev = ISC_LIST_PREV(header, link);
8722 * Unlink the entry at this point to avoid checking it
8723 * again even if it's currently used someone else and
8724 * cannot be purged at this moment. This entry won't be
8725 * referenced any more (so unlinking is safe) since the
8726 * TTL was reset to 0.
8728 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8730 expire_header(rbtdb, header, tree_locked);
8734 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8735 isc_rwlocktype_write);
8740 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8741 isc_boolean_t tree_locked)
8743 set_ttl(rbtdb, header, 0);
8744 header->attributes |= RDATASET_ATTR_STALE;
8745 header->node->dirty = 1;
8748 * Caller must hold the node (write) lock.
8751 if (dns_rbtnode_refcurrent(header->node) == 0) {
8753 * If no one else is using the node, we can clean it up now.
8754 * We first need to gain a new reference to the node to meet a
8755 * requirement of decrement_reference().
8757 new_reference(rbtdb, header->node);
8758 decrement_reference(rbtdb, header->node, 0,
8759 isc_rwlocktype_write,
8760 tree_locked ? isc_rwlocktype_write :
8761 isc_rwlocktype_none, ISC_FALSE);