]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - contrib/bind9/lib/dns/rbtdb.c
Update to 9.6-ESV-R8.
[FreeBSD/stable/8.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2012  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id$ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_SIGDDS \
116                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
117 #define RBTDB_RDATATYPE_NCACHEANY \
118                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
119
120 /*
121  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
122  * Using rwlock is effective with regard to lookup performance only when
123  * it is implemented in an efficient way.
124  * Otherwise, it is generally wise to stick to the simple locking since rwlock
125  * would require more memory or can even make lookups slower due to its own
126  * overhead (when it internally calls mutex locks).
127  */
128 #ifdef ISC_RWLOCK_USEATOMIC
129 #define DNS_RBTDB_USERWLOCK 1
130 #else
131 #define DNS_RBTDB_USERWLOCK 0
132 #endif
133
134 #if DNS_RBTDB_USERWLOCK
135 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
136 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
137 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
138 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
139 #else
140 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
141 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
142 #define RBTDB_LOCK(l, t)        LOCK(l)
143 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
144 #endif
145
146 /*
147  * Since node locking is sensitive to both performance and memory footprint,
148  * we need some trick here.  If we have both high-performance rwlock and
149  * high performance and small-memory reference counters, we use rwlock for
150  * node lock and isc_refcount for node references.  In this case, we don't have
151  * to protect the access to the counters by locks.
152  * Otherwise, we simply use ordinary mutex lock for node locking, and use
153  * simple integers as reference counters which is protected by the lock.
154  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
155  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
156  * counters first and then protect other parts of a node as read-only data.
157  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
158  * provided for these special cases.  When we can use the efficient backend
159  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
160  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
161  * section including the access to the reference counter.
162  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
163  * section is also protected by NODE_STRONGLOCK().
164  */
165 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
166 typedef isc_rwlock_t nodelock_t;
167
168 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
169 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
170 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
171 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
172 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
173
174 #define NODE_STRONGLOCK(l)      ((void)0)
175 #define NODE_STRONGUNLOCK(l)    ((void)0)
176 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
177 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
178 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
179 #else
180 typedef isc_mutex_t nodelock_t;
181
182 #define NODE_INITLOCK(l)        isc_mutex_init(l)
183 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
184 #define NODE_LOCK(l, t)         LOCK(l)
185 #define NODE_UNLOCK(l, t)       UNLOCK(l)
186 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
187
188 #define NODE_STRONGLOCK(l)      LOCK(l)
189 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
190 #define NODE_WEAKLOCK(l, t)     ((void)0)
191 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
192 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
193 #endif
194
195 /*%
196  * Whether to rate-limit updating the LRU to avoid possible thread contention.
197  * Our performance measurement has shown the cost is marginal, so it's defined
198  * to be 0 by default either with or without threads.
199  */
200 #ifndef DNS_RBTDB_LIMITLRUUPDATE
201 #define DNS_RBTDB_LIMITLRUUPDATE 0
202 #endif
203
204 /*
205  * Allow clients with a virtual time of up to 5 minutes in the past to see
206  * records that would have otherwise have expired.
207  */
208 #define RBTDB_VIRTUAL 300
209
210 struct noqname {
211         dns_name_t      name;
212         void *          neg;
213         void *          negsig;
214         dns_rdatatype_t type;
215 };
216
217 typedef struct acachectl acachectl_t;
218
219 typedef struct rdatasetheader {
220         /*%
221          * Locked by the owning node's lock.
222          */
223         rbtdb_serial_t                  serial;
224         dns_ttl_t                       rdh_ttl;
225         rbtdb_rdatatype_t               type;
226         isc_uint16_t                    attributes;
227         dns_trust_t                     trust;
228         struct noqname                  *noqname;
229         struct noqname                  *closest;
230         /*%<
231          * We don't use the LIST macros, because the LIST structure has
232          * both head and tail pointers, and is doubly linked.
233          */
234
235         struct rdatasetheader           *next;
236         /*%<
237          * If this is the top header for an rdataset, 'next' points
238          * to the top header for the next rdataset (i.e., the next type).
239          * Otherwise, it points up to the header whose down pointer points
240          * at this header.
241          */
242
243         struct rdatasetheader           *down;
244         /*%<
245          * Points to the header for the next older version of
246          * this rdataset.
247          */
248
249         isc_uint32_t                    count;
250         /*%<
251          * Monotonously increased every time this rdataset is bound so that
252          * it is used as the base of the starting point in DNS responses
253          * when the "cyclic" rrset-order is required.  Since the ordering
254          * should not be so crucial, no lock is set for the counter for
255          * performance reasons.
256          */
257
258         acachectl_t                     *additional_auth;
259         acachectl_t                     *additional_glue;
260
261         dns_rbtnode_t                   *node;
262         isc_stdtime_t                   last_used;
263         ISC_LINK(struct rdatasetheader) link;
264
265         unsigned int                    heap_index;
266         /*%<
267          * Used for TTL-based cache cleaning.
268          */
269         isc_stdtime_t                   resign;
270 } rdatasetheader_t;
271
272 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
273 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
274
275 #define RDATASET_ATTR_NONEXISTENT       0x0001
276 #define RDATASET_ATTR_STALE             0x0002
277 #define RDATASET_ATTR_IGNORE            0x0004
278 #define RDATASET_ATTR_RETAIN            0x0008
279 #define RDATASET_ATTR_NXDOMAIN          0x0010
280 #define RDATASET_ATTR_RESIGN            0x0020
281 #define RDATASET_ATTR_STATCOUNT         0x0040
282 #define RDATASET_ATTR_OPTOUT            0x0080
283 #define RDATASET_ATTR_NEGATIVE          0x0100
284
285 typedef struct acache_cbarg {
286         dns_rdatasetadditional_t        type;
287         unsigned int                    count;
288         dns_db_t                        *db;
289         dns_dbnode_t                    *node;
290         rdatasetheader_t                *header;
291 } acache_cbarg_t;
292
293 struct acachectl {
294         dns_acacheentry_t               *entry;
295         acache_cbarg_t                  *cbarg;
296 };
297
298 /*
299  * XXX
300  * When the cache will pre-expire data (due to memory low or other
301  * situations) before the rdataset's TTL has expired, it MUST
302  * respect the RETAIN bit and not expire the data until its TTL is
303  * expired.
304  */
305
306 #undef IGNORE                   /* WIN32 winbase.h defines this. */
307
308 #define EXISTS(header) \
309         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
310 #define NONEXISTENT(header) \
311         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
312 #define IGNORE(header) \
313         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
314 #define RETAIN(header) \
315         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
316 #define NXDOMAIN(header) \
317         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
318 #define RESIGN(header) \
319         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
320 #define OPTOUT(header) \
321         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
322 #define NEGATIVE(header) \
323         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
324
325 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
326
327 /*%
328  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
329  * There is a tradeoff issue about configuring this value: if this is too
330  * small, it may cause heavier contention between threads; if this is too large,
331  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
332  * The default value should work well for most environments, but this can
333  * also be configurable at compilation time via the
334  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
335  * 1 due to the assumption of overmem_purge().
336  */
337 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
338 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
339 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
340 #else
341 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
342 #endif
343 #else
344 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
345 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
346
347 typedef struct {
348         nodelock_t                      lock;
349         /* Protected in the refcount routines. */
350         isc_refcount_t                  references;
351         /* Locked by lock. */
352         isc_boolean_t                   exiting;
353 } rbtdb_nodelock_t;
354
355 typedef struct rbtdb_changed {
356         dns_rbtnode_t *                 node;
357         isc_boolean_t                   dirty;
358         ISC_LINK(struct rbtdb_changed)  link;
359 } rbtdb_changed_t;
360
361 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
362
363 typedef enum {
364         dns_db_insecure,
365         dns_db_partial,
366         dns_db_secure
367 } dns_db_secure_t;
368
369 typedef struct dns_rbtdb dns_rbtdb_t;
370
371 typedef struct rbtdb_version {
372         /* Not locked */
373         rbtdb_serial_t                  serial;
374         dns_rbtdb_t *                   rbtdb;
375         /*
376          * Protected in the refcount routines.
377          * XXXJT: should we change the lock policy based on the refcount
378          * performance?
379          */
380         isc_refcount_t                  references;
381         /* Locked by database lock. */
382         isc_boolean_t                   writer;
383         isc_boolean_t                   commit_ok;
384         rbtdb_changedlist_t             changed_list;
385         rdatasetheaderlist_t            resigned_list;
386         ISC_LINK(struct rbtdb_version)  link;
387         dns_db_secure_t                 secure;
388         isc_boolean_t                   havensec3;
389         /* NSEC3 parameters */
390         dns_hash_t                      hash;
391         isc_uint8_t                     flags;
392         isc_uint16_t                    iterations;
393         isc_uint8_t                     salt_length;
394         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
395 } rbtdb_version_t;
396
397 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
398
399 struct dns_rbtdb {
400         /* Unlocked. */
401         dns_db_t                        common;
402         /* Locks the data in this struct */
403 #if DNS_RBTDB_USERWLOCK
404         isc_rwlock_t                    lock;
405 #else
406         isc_mutex_t                     lock;
407 #endif
408         /* Locks the tree structure (prevents nodes appearing/disappearing) */
409         isc_rwlock_t                    tree_lock;
410         /* Locks for individual tree nodes */
411         unsigned int                    node_lock_count;
412         rbtdb_nodelock_t *              node_locks;
413         dns_rbtnode_t *                 origin_node;
414         dns_stats_t *                   rrsetstats; /* cache DB only */
415         /* Locked by lock. */
416         unsigned int                    active;
417         isc_refcount_t                  references;
418         unsigned int                    attributes;
419         rbtdb_serial_t                  current_serial;
420         rbtdb_serial_t                  least_serial;
421         rbtdb_serial_t                  next_serial;
422         rbtdb_version_t *               current_version;
423         rbtdb_version_t *               future_version;
424         rbtdb_versionlist_t             open_versions;
425         isc_task_t *                    task;
426         dns_dbnode_t                    *soanode;
427         dns_dbnode_t                    *nsnode;
428
429         /*
430          * This is a linked list used to implement the LRU cache.  There will
431          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
432          * placed on the linked list rdatasets[1].
433          */
434         rdatasetheaderlist_t            *rdatasets;
435
436         /*%
437          * Temporary storage for stale cache nodes and dynamically deleted
438          * nodes that await being cleaned up.
439          */
440         rbtnodelist_t                   *deadnodes;
441
442         /*
443          * Heaps.  These are used for TTL based expiry in a cache,
444          * or for zone resigning in a zone DB.  hmctx is the memory
445          * context to use for the heap (which differs from the main
446          * database memory context in the case of a cache).
447          */
448         isc_mem_t *                     hmctx;
449         isc_heap_t                      **heaps;
450
451         /* Locked by tree_lock. */
452         dns_rbt_t *                     tree;
453         dns_rbt_t *                     nsec3;
454
455         /* Unlocked */
456         unsigned int                    quantum;
457 };
458
459 #define RBTDB_ATTR_LOADED               0x01
460 #define RBTDB_ATTR_LOADING              0x02
461
462 /*%
463  * Search Context
464  */
465 typedef struct {
466         dns_rbtdb_t *           rbtdb;
467         rbtdb_version_t *       rbtversion;
468         rbtdb_serial_t          serial;
469         unsigned int            options;
470         dns_rbtnodechain_t      chain;
471         isc_boolean_t           copy_name;
472         isc_boolean_t           need_cleanup;
473         isc_boolean_t           wild;
474         dns_rbtnode_t *         zonecut;
475         rdatasetheader_t *      zonecut_rdataset;
476         rdatasetheader_t *      zonecut_sigrdataset;
477         dns_fixedname_t         zonecut_name;
478         isc_stdtime_t           now;
479 } rbtdb_search_t;
480
481 /*%
482  * Load Context
483  */
484 typedef struct {
485         dns_rbtdb_t *           rbtdb;
486         isc_stdtime_t           now;
487 } rbtdb_load_t;
488
489 static void rdataset_disassociate(dns_rdataset_t *rdataset);
490 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
491 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
492 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
493 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
494 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
495 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
496                                         dns_name_t *name,
497                                         dns_rdataset_t *neg,
498                                         dns_rdataset_t *negsig);
499 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
500                                         dns_name_t *name,
501                                         dns_rdataset_t *neg,
502                                         dns_rdataset_t *negsig);
503 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
504                                            dns_rdatasetadditional_t type,
505                                            dns_rdatatype_t qtype,
506                                            dns_acache_t *acache,
507                                            dns_zone_t **zonep,
508                                            dns_db_t **dbp,
509                                            dns_dbversion_t **versionp,
510                                            dns_dbnode_t **nodep,
511                                            dns_name_t *fname,
512                                            dns_message_t *msg,
513                                            isc_stdtime_t now);
514 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
515                                            dns_rdatasetadditional_t type,
516                                            dns_rdatatype_t qtype,
517                                            dns_acache_t *acache,
518                                            dns_zone_t *zone,
519                                            dns_db_t *db,
520                                            dns_dbversion_t *version,
521                                            dns_dbnode_t *node,
522                                            dns_name_t *fname);
523 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
524                                            dns_rdataset_t *rdataset,
525                                            dns_rdatasetadditional_t type,
526                                            dns_rdatatype_t qtype);
527 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
528                                               isc_stdtime_t now);
529 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
530                           isc_stdtime_t now);
531 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
532                           isc_boolean_t tree_locked);
533 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
534                           isc_stdtime_t now, isc_boolean_t tree_locked);
535 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
536                                   rdatasetheader_t *newheader);
537 static void prune_tree(isc_task_t *task, isc_event_t *event);
538 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
539 static void rdataset_expire(dns_rdataset_t *rdataset);
540
541 static dns_rdatasetmethods_t rdataset_methods = {
542         rdataset_disassociate,
543         rdataset_first,
544         rdataset_next,
545         rdataset_current,
546         rdataset_clone,
547         rdataset_count,
548         NULL,
549         rdataset_getnoqname,
550         NULL,
551         rdataset_getclosest,
552         rdataset_getadditional,
553         rdataset_setadditional,
554         rdataset_putadditional,
555         rdataset_settrust,
556         rdataset_expire
557 };
558
559 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
560 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
561 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
562 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
563                                  dns_rdataset_t *rdataset);
564
565 static dns_rdatasetitermethods_t rdatasetiter_methods = {
566         rdatasetiter_destroy,
567         rdatasetiter_first,
568         rdatasetiter_next,
569         rdatasetiter_current
570 };
571
572 typedef struct rbtdb_rdatasetiter {
573         dns_rdatasetiter_t              common;
574         rdatasetheader_t *              current;
575 } rbtdb_rdatasetiter_t;
576
577 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
578 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
579 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
580 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
581                                         dns_name_t *name);
582 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
583 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
584 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
585                                            dns_dbnode_t **nodep,
586                                            dns_name_t *name);
587 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
588 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
589                                           dns_name_t *name);
590
591 static dns_dbiteratormethods_t dbiterator_methods = {
592         dbiterator_destroy,
593         dbiterator_first,
594         dbiterator_last,
595         dbiterator_seek,
596         dbiterator_prev,
597         dbiterator_next,
598         dbiterator_current,
599         dbiterator_pause,
600         dbiterator_origin
601 };
602
603 #define DELETION_BATCH_MAX 64
604
605 /*
606  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
607  */
608 typedef struct rbtdb_dbiterator {
609         dns_dbiterator_t                common;
610         isc_boolean_t                   paused;
611         isc_boolean_t                   new_origin;
612         isc_rwlocktype_t                tree_locked;
613         isc_result_t                    result;
614         dns_fixedname_t                 name;
615         dns_fixedname_t                 origin;
616         dns_rbtnodechain_t              chain;
617         dns_rbtnodechain_t              nsec3chain;
618         dns_rbtnodechain_t              *current;
619         dns_rbtnode_t                   *node;
620         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
621         int                             delete;
622         isc_boolean_t                   nsec3only;
623         isc_boolean_t                   nonsec3;
624 } rbtdb_dbiterator_t;
625
626
627 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
628 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
629
630 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
631                        isc_event_t *event);
632 static void overmem(dns_db_t *db, isc_boolean_t overmem);
633 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
634                                isc_boolean_t *nsec3createflag);
635
636 /*%
637  * 'init_count' is used to initialize 'newheader->count' which inturn
638  * is used to determine where in the cycle rrset-order cyclic starts.
639  * We don't lock this as we don't care about simultaneous updates.
640  *
641  * Note:
642  *      Both init_count and header->count can be ISC_UINT32_MAX.
643  *      The count on the returned rdataset however can't be as
644  *      that indicates that the database does not implement cyclic
645  *      processing.
646  */
647 static unsigned int init_count;
648
649 /*
650  * Locking
651  *
652  * If a routine is going to lock more than one lock in this module, then
653  * the locking must be done in the following order:
654  *
655  *      Tree Lock
656  *
657  *      Node Lock       (Only one from the set may be locked at one time by
658  *                       any caller)
659  *
660  *      Database Lock
661  *
662  * Failure to follow this hierarchy can result in deadlock.
663  */
664
665 /*
666  * Deleting Nodes
667  *
668  * For zone databases the node for the origin of the zone MUST NOT be deleted.
669  */
670
671
672 /*
673  * DB Routines
674  */
675
676 static void
677 attach(dns_db_t *source, dns_db_t **targetp) {
678         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
679
680         REQUIRE(VALID_RBTDB(rbtdb));
681
682         isc_refcount_increment(&rbtdb->references, NULL);
683
684         *targetp = source;
685 }
686
687 static void
688 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
689         dns_rbtdb_t *rbtdb = event->ev_arg;
690
691         UNUSED(task);
692
693         free_rbtdb(rbtdb, ISC_TRUE, event);
694 }
695
696 static void
697 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
698                   isc_boolean_t increment)
699 {
700         dns_rdatastatstype_t statattributes = 0;
701         dns_rdatastatstype_t base = 0;
702         dns_rdatastatstype_t type;
703
704         /* At the moment we count statistics only for cache DB */
705         INSIST(IS_CACHE(rbtdb));
706
707         if (NEGATIVE(header)) {
708                 if (NXDOMAIN(header))
709                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
710                 else {
711                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
712                         base = RBTDB_RDATATYPE_EXT(header->type);
713                 }
714         } else
715                 base = RBTDB_RDATATYPE_BASE(header->type);
716
717         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
718         if (increment)
719                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
720         else
721                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
722 }
723
724 static void
725 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
726         int idx;
727         isc_heap_t *heap;
728         dns_ttl_t oldttl;
729
730         oldttl = header->rdh_ttl;
731         header->rdh_ttl = newttl;
732
733         if (!IS_CACHE(rbtdb))
734                 return;
735
736         /*
737          * It's possible the rbtdb is not a cache.  If this is the case,
738          * we will not have a heap, and we move on.  If we do, though,
739          * we might need to adjust things.
740          */
741         if (header->heap_index == 0 || newttl == oldttl)
742                 return;
743         idx = header->node->locknum;
744         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
745             return;
746         heap = rbtdb->heaps[idx];
747
748         if (newttl < oldttl)
749                 isc_heap_increased(heap, header->heap_index);
750         else
751                 isc_heap_decreased(heap, header->heap_index);
752 }
753
754 /*%
755  * These functions allow the heap code to rank the priority of each
756  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
757  */
758 static isc_boolean_t
759 ttl_sooner(void *v1, void *v2) {
760         rdatasetheader_t *h1 = v1;
761         rdatasetheader_t *h2 = v2;
762
763         if (h1->rdh_ttl < h2->rdh_ttl)
764                 return (ISC_TRUE);
765         return (ISC_FALSE);
766 }
767
768 static isc_boolean_t
769 resign_sooner(void *v1, void *v2) {
770         rdatasetheader_t *h1 = v1;
771         rdatasetheader_t *h2 = v2;
772
773         if (h1->resign < h2->resign)
774                 return (ISC_TRUE);
775         return (ISC_FALSE);
776 }
777
778 /*%
779  * This function sets the heap index into the header.
780  */
781 static void
782 set_index(void *what, unsigned int index) {
783         rdatasetheader_t *h = what;
784
785         h->heap_index = index;
786 }
787
788 /*%
789  * Work out how many nodes can be deleted in the time between two
790  * requests to the nameserver.  Smooth the resulting number and use it
791  * as a estimate for the number of nodes to be deleted in the next
792  * iteration.
793  */
794 static unsigned int
795 adjust_quantum(unsigned int old, isc_time_t *start) {
796         unsigned int pps = dns_pps;     /* packets per second */
797         unsigned int interval;
798         isc_uint64_t usecs;
799         isc_time_t end;
800         unsigned int new;
801
802         if (pps < 100)
803                 pps = 100;
804         isc_time_now(&end);
805
806         interval = 1000000 / pps;       /* interval in usec */
807         if (interval == 0)
808                 interval = 1;
809         usecs = isc_time_microdiff(&end, start);
810         if (usecs == 0) {
811                 /*
812                  * We were unable to measure the amount of time taken.
813                  * Double the nodes deleted next time.
814                  */
815                 old *= 2;
816                 if (old > 1000)
817                         old = 1000;
818                 return (old);
819         }
820         new = old * interval;
821         new /= (unsigned int)usecs;
822         if (new == 0)
823                 new = 1;
824         else if (new > 1000)
825                 new = 1000;
826
827         /* Smooth */
828         new = (new + old * 3) / 4;
829
830         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
831                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
832
833         return (new);
834 }
835
836 static void
837 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
838         unsigned int i;
839         isc_ondestroy_t ondest;
840         isc_result_t result;
841         char buf[DNS_NAME_FORMATSIZE];
842         isc_time_t start;
843
844         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
845                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
846
847         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
848         REQUIRE(rbtdb->future_version == NULL);
849
850         if (rbtdb->current_version != NULL) {
851                 unsigned int refs;
852
853                 isc_refcount_decrement(&rbtdb->current_version->references,
854                                        &refs);
855                 INSIST(refs == 0);
856                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
857                 isc_refcount_destroy(&rbtdb->current_version->references);
858                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
859                             sizeof(rbtdb_version_t));
860         }
861
862         /*
863          * We assume the number of remaining dead nodes is reasonably small;
864          * the overhead of unlinking all nodes here should be negligible.
865          */
866         for (i = 0; i < rbtdb->node_lock_count; i++) {
867                 dns_rbtnode_t *node;
868
869                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
870                 while (node != NULL) {
871                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
872                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
873                 }
874         }
875
876         if (event == NULL)
877                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
878  again:
879         if (rbtdb->tree != NULL) {
880                 isc_time_now(&start);
881                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
882                 if (result == ISC_R_QUOTA) {
883                         INSIST(rbtdb->task != NULL);
884                         if (rbtdb->quantum != 0)
885                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
886                                                                 &start);
887                         if (event == NULL)
888                                 event = isc_event_allocate(rbtdb->common.mctx,
889                                                            NULL,
890                                                          DNS_EVENT_FREESTORAGE,
891                                                            free_rbtdb_callback,
892                                                            rbtdb,
893                                                            sizeof(isc_event_t));
894                         if (event == NULL)
895                                 goto again;
896                         isc_task_send(rbtdb->task, &event);
897                         return;
898                 }
899                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
900         }
901
902         if (rbtdb->nsec3 != NULL) {
903                 isc_time_now(&start);
904                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
905                 if (result == ISC_R_QUOTA) {
906                         INSIST(rbtdb->task != NULL);
907                         if (rbtdb->quantum != 0)
908                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
909                                                                 &start);
910                         if (event == NULL)
911                                 event = isc_event_allocate(rbtdb->common.mctx,
912                                                            NULL,
913                                                          DNS_EVENT_FREESTORAGE,
914                                                            free_rbtdb_callback,
915                                                            rbtdb,
916                                                            sizeof(isc_event_t));
917                         if (event == NULL)
918                                 goto again;
919                         isc_task_send(rbtdb->task, &event);
920                         return;
921                 }
922                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
923         }
924
925         if (event != NULL)
926                 isc_event_free(&event);
927         if (log) {
928                 if (dns_name_dynamic(&rbtdb->common.origin))
929                         dns_name_format(&rbtdb->common.origin, buf,
930                                         sizeof(buf));
931                 else
932                         strcpy(buf, "<UNKNOWN>");
933                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
934                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
935                               "done free_rbtdb(%s)", buf);
936         }
937         if (dns_name_dynamic(&rbtdb->common.origin))
938                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
939         for (i = 0; i < rbtdb->node_lock_count; i++) {
940                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
941                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
942         }
943
944         /*
945          * Clean up LRU / re-signing order lists.
946          */
947         if (rbtdb->rdatasets != NULL) {
948                 for (i = 0; i < rbtdb->node_lock_count; i++)
949                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
950                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
951                             rbtdb->node_lock_count *
952                             sizeof(rdatasetheaderlist_t));
953         }
954         /*
955          * Clean up dead node buckets.
956          */
957         if (rbtdb->deadnodes != NULL) {
958                 for (i = 0; i < rbtdb->node_lock_count; i++)
959                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
960                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
961                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
962         }
963         /*
964          * Clean up heap objects.
965          */
966         if (rbtdb->heaps != NULL) {
967                 for (i = 0; i < rbtdb->node_lock_count; i++)
968                         isc_heap_destroy(&rbtdb->heaps[i]);
969                 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
970                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
971         }
972
973         if (rbtdb->rrsetstats != NULL)
974                 dns_stats_detach(&rbtdb->rrsetstats);
975
976         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
977                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
978         isc_rwlock_destroy(&rbtdb->tree_lock);
979         isc_refcount_destroy(&rbtdb->references);
980         if (rbtdb->task != NULL)
981                 isc_task_detach(&rbtdb->task);
982
983         RBTDB_DESTROYLOCK(&rbtdb->lock);
984         rbtdb->common.magic = 0;
985         rbtdb->common.impmagic = 0;
986         ondest = rbtdb->common.ondest;
987         isc_mem_detach(&rbtdb->hmctx);
988         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
989         isc_ondestroy_notify(&ondest, rbtdb);
990 }
991
992 static inline void
993 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
994         isc_boolean_t want_free = ISC_FALSE;
995         unsigned int i;
996         unsigned int inactive = 0;
997
998         /* XXX check for open versions here */
999
1000         if (rbtdb->soanode != NULL)
1001                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1002         if (rbtdb->nsnode != NULL)
1003                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1004
1005         /*
1006          * Even though there are no external direct references, there still
1007          * may be nodes in use.
1008          */
1009         for (i = 0; i < rbtdb->node_lock_count; i++) {
1010                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1011                 rbtdb->node_locks[i].exiting = ISC_TRUE;
1012                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1013                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1014                     == 0) {
1015                         inactive++;
1016                 }
1017         }
1018
1019         if (inactive != 0) {
1020                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1021                 rbtdb->active -= inactive;
1022                 if (rbtdb->active == 0)
1023                         want_free = ISC_TRUE;
1024                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1025                 if (want_free) {
1026                         char buf[DNS_NAME_FORMATSIZE];
1027                         if (dns_name_dynamic(&rbtdb->common.origin))
1028                                 dns_name_format(&rbtdb->common.origin, buf,
1029                                                 sizeof(buf));
1030                         else
1031                                 strcpy(buf, "<UNKNOWN>");
1032                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1033                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1034                                       "calling free_rbtdb(%s)", buf);
1035                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1036                 }
1037         }
1038 }
1039
1040 static void
1041 detach(dns_db_t **dbp) {
1042         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1043         unsigned int refs;
1044
1045         REQUIRE(VALID_RBTDB(rbtdb));
1046
1047         isc_refcount_decrement(&rbtdb->references, &refs);
1048
1049         if (refs == 0)
1050                 maybe_free_rbtdb(rbtdb);
1051
1052         *dbp = NULL;
1053 }
1054
1055 static void
1056 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1057         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1058         rbtdb_version_t *version;
1059         unsigned int refs;
1060
1061         REQUIRE(VALID_RBTDB(rbtdb));
1062
1063         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1064         version = rbtdb->current_version;
1065         isc_refcount_increment(&version->references, &refs);
1066         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1067
1068         *versionp = (dns_dbversion_t *)version;
1069 }
1070
1071 static inline rbtdb_version_t *
1072 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1073                  unsigned int references, isc_boolean_t writer)
1074 {
1075         isc_result_t result;
1076         rbtdb_version_t *version;
1077
1078         version = isc_mem_get(mctx, sizeof(*version));
1079         if (version == NULL)
1080                 return (NULL);
1081         version->serial = serial;
1082         result = isc_refcount_init(&version->references, references);
1083         if (result != ISC_R_SUCCESS) {
1084                 isc_mem_put(mctx, version, sizeof(*version));
1085                 return (NULL);
1086         }
1087         version->writer = writer;
1088         version->commit_ok = ISC_FALSE;
1089         ISC_LIST_INIT(version->changed_list);
1090         ISC_LIST_INIT(version->resigned_list);
1091         ISC_LINK_INIT(version, link);
1092
1093         return (version);
1094 }
1095
1096 static isc_result_t
1097 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1098         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1099         rbtdb_version_t *version;
1100
1101         REQUIRE(VALID_RBTDB(rbtdb));
1102         REQUIRE(versionp != NULL && *versionp == NULL);
1103         REQUIRE(rbtdb->future_version == NULL);
1104
1105         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1106         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1107         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1108                                    ISC_TRUE);
1109         if (version != NULL) {
1110                 version->rbtdb = rbtdb;
1111                 version->commit_ok = ISC_TRUE;
1112                 version->secure = rbtdb->current_version->secure;
1113                 version->havensec3 = rbtdb->current_version->havensec3;
1114                 if (version->havensec3) {
1115                         version->flags = rbtdb->current_version->flags;
1116                         version->iterations =
1117                                 rbtdb->current_version->iterations;
1118                         version->hash = rbtdb->current_version->hash;
1119                         version->salt_length =
1120                                 rbtdb->current_version->salt_length;
1121                         memcpy(version->salt, rbtdb->current_version->salt,
1122                                version->salt_length);
1123                 } else {
1124                         version->flags = 0;
1125                         version->iterations = 0;
1126                         version->hash = 0;
1127                         version->salt_length = 0;
1128                         memset(version->salt, 0, sizeof(version->salt));
1129                 }
1130                 rbtdb->next_serial++;
1131                 rbtdb->future_version = version;
1132         }
1133         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1134
1135         if (version == NULL)
1136                 return (ISC_R_NOMEMORY);
1137
1138         *versionp = version;
1139
1140         return (ISC_R_SUCCESS);
1141 }
1142
1143 static void
1144 attachversion(dns_db_t *db, dns_dbversion_t *source,
1145               dns_dbversion_t **targetp)
1146 {
1147         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1148         rbtdb_version_t *rbtversion = source;
1149         unsigned int refs;
1150
1151         REQUIRE(VALID_RBTDB(rbtdb));
1152         INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
1153
1154         isc_refcount_increment(&rbtversion->references, &refs);
1155         INSIST(refs > 1);
1156
1157         *targetp = rbtversion;
1158 }
1159
1160 static rbtdb_changed_t *
1161 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1162             dns_rbtnode_t *node)
1163 {
1164         rbtdb_changed_t *changed;
1165         unsigned int refs;
1166
1167         /*
1168          * Caller must be holding the node lock if its reference must be
1169          * protected by the lock.
1170          */
1171
1172         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1173
1174         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1175
1176         REQUIRE(version->writer);
1177
1178         if (changed != NULL) {
1179                 dns_rbtnode_refincrement(node, &refs);
1180                 INSIST(refs != 0);
1181                 changed->node = node;
1182                 changed->dirty = ISC_FALSE;
1183                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1184         } else
1185                 version->commit_ok = ISC_FALSE;
1186
1187         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1188
1189         return (changed);
1190 }
1191
1192 static void
1193 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1194                  acachectl_t *array)
1195 {
1196         unsigned int count;
1197         unsigned int i;
1198         unsigned char *raw;     /* RDATASLAB */
1199
1200         /*
1201          * The caller must be holding the corresponding node lock.
1202          */
1203
1204         if (array == NULL)
1205                 return;
1206
1207         raw = (unsigned char *)header + sizeof(*header);
1208         count = raw[0] * 256 + raw[1];
1209
1210         /*
1211          * Sanity check: since an additional cache entry has a reference to
1212          * the original DB node (in the callback arg), there should be no
1213          * acache entries when the node can be freed.
1214          */
1215         for (i = 0; i < count; i++)
1216                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1217
1218         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1219 }
1220
1221 static inline void
1222 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1223
1224         if (dns_name_dynamic(&(*noqname)->name))
1225                 dns_name_free(&(*noqname)->name, mctx);
1226         if ((*noqname)->neg != NULL)
1227                 isc_mem_put(mctx, (*noqname)->neg,
1228                             dns_rdataslab_size((*noqname)->neg, 0));
1229         if ((*noqname)->negsig != NULL)
1230                 isc_mem_put(mctx, (*noqname)->negsig,
1231                             dns_rdataslab_size((*noqname)->negsig, 0));
1232         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1233         *noqname = NULL;
1234 }
1235
1236 static inline void
1237 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1238 {
1239         ISC_LINK_INIT(h, link);
1240         h->heap_index = 0;
1241
1242 #if TRACE_HEADER
1243         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1244                 fprintf(stderr, "initialized header: %p\n", h);
1245 #else
1246         UNUSED(rbtdb);
1247 #endif
1248 }
1249
1250 static inline rdatasetheader_t *
1251 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1252 {
1253         rdatasetheader_t *h;
1254
1255         h = isc_mem_get(mctx, sizeof(*h));
1256         if (h == NULL)
1257                 return (NULL);
1258
1259 #if TRACE_HEADER
1260         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1261                 fprintf(stderr, "allocated header: %p\n", h);
1262 #endif
1263         init_rdataset(rbtdb, h);
1264         return (h);
1265 }
1266
1267 static inline void
1268 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1269 {
1270         unsigned int size;
1271         int idx;
1272
1273         if (EXISTS(rdataset) &&
1274             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1275                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1276         }
1277
1278         idx = rdataset->node->locknum;
1279         if (ISC_LINK_LINKED(rdataset, link)) {
1280                 INSIST(IS_CACHE(rbtdb));
1281                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1282         }
1283         if (rdataset->heap_index != 0)
1284                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1285         rdataset->heap_index = 0;
1286
1287         if (rdataset->noqname != NULL)
1288                 free_noqname(mctx, &rdataset->noqname);
1289         if (rdataset->closest != NULL)
1290                 free_noqname(mctx, &rdataset->closest);
1291
1292         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1293         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1294
1295         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1296                 size = sizeof(*rdataset);
1297         else
1298                 size = dns_rdataslab_size((unsigned char *)rdataset,
1299                                           sizeof(*rdataset));
1300         isc_mem_put(mctx, rdataset, size);
1301 }
1302
1303 static inline void
1304 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1305         rdatasetheader_t *header, *dcurrent;
1306         isc_boolean_t make_dirty = ISC_FALSE;
1307
1308         /*
1309          * Caller must hold the node lock.
1310          */
1311
1312         /*
1313          * We set the IGNORE attribute on rdatasets with serial number
1314          * 'serial'.  When the reference count goes to zero, these rdatasets
1315          * will be cleaned up; until that time, they will be ignored.
1316          */
1317         for (header = node->data; header != NULL; header = header->next) {
1318                 if (header->serial == serial) {
1319                         header->attributes |= RDATASET_ATTR_IGNORE;
1320                         make_dirty = ISC_TRUE;
1321                 }
1322                 for (dcurrent = header->down;
1323                      dcurrent != NULL;
1324                      dcurrent = dcurrent->down) {
1325                         if (dcurrent->serial == serial) {
1326                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1327                                 make_dirty = ISC_TRUE;
1328                         }
1329                 }
1330         }
1331         if (make_dirty)
1332                 node->dirty = 1;
1333 }
1334
1335 static inline void
1336 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1337 {
1338         rdatasetheader_t *d, *down_next;
1339
1340         for (d = top->down; d != NULL; d = down_next) {
1341                 down_next = d->down;
1342                 free_rdataset(rbtdb, mctx, d);
1343         }
1344         top->down = NULL;
1345 }
1346
1347 static inline void
1348 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1349         rdatasetheader_t *current, *top_prev, *top_next;
1350         isc_mem_t *mctx = rbtdb->common.mctx;
1351
1352         /*
1353          * Caller must be holding the node lock.
1354          */
1355
1356         top_prev = NULL;
1357         for (current = node->data; current != NULL; current = top_next) {
1358                 top_next = current->next;
1359                 clean_stale_headers(rbtdb, mctx, current);
1360                 /*
1361                  * If current is nonexistent or stale, we can clean it up.
1362                  */
1363                 if ((current->attributes &
1364                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1365                         if (top_prev != NULL)
1366                                 top_prev->next = current->next;
1367                         else
1368                                 node->data = current->next;
1369                         free_rdataset(rbtdb, mctx, current);
1370                 } else
1371                         top_prev = current;
1372         }
1373         node->dirty = 0;
1374 }
1375
1376 static inline void
1377 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1378                 rbtdb_serial_t least_serial)
1379 {
1380         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1381         rdatasetheader_t *top_prev, *top_next;
1382         isc_mem_t *mctx = rbtdb->common.mctx;
1383         isc_boolean_t still_dirty = ISC_FALSE;
1384
1385         /*
1386          * Caller must be holding the node lock.
1387          */
1388         REQUIRE(least_serial != 0);
1389
1390         top_prev = NULL;
1391         for (current = node->data; current != NULL; current = top_next) {
1392                 top_next = current->next;
1393
1394                 /*
1395                  * First, we clean up any instances of multiple rdatasets
1396                  * with the same serial number, or that have the IGNORE
1397                  * attribute.
1398                  */
1399                 dparent = current;
1400                 for (dcurrent = current->down;
1401                      dcurrent != NULL;
1402                      dcurrent = down_next) {
1403                         down_next = dcurrent->down;
1404                         INSIST(dcurrent->serial <= dparent->serial);
1405                         if (dcurrent->serial == dparent->serial ||
1406                             IGNORE(dcurrent)) {
1407                                 if (down_next != NULL)
1408                                         down_next->next = dparent;
1409                                 dparent->down = down_next;
1410                                 free_rdataset(rbtdb, mctx, dcurrent);
1411                         } else
1412                                 dparent = dcurrent;
1413                 }
1414
1415                 /*
1416                  * We've now eliminated all IGNORE datasets with the possible
1417                  * exception of current, which we now check.
1418                  */
1419                 if (IGNORE(current)) {
1420                         down_next = current->down;
1421                         if (down_next == NULL) {
1422                                 if (top_prev != NULL)
1423                                         top_prev->next = current->next;
1424                                 else
1425                                         node->data = current->next;
1426                                 free_rdataset(rbtdb, mctx, current);
1427                                 /*
1428                                  * current no longer exists, so we can
1429                                  * just continue with the loop.
1430                                  */
1431                                 continue;
1432                         } else {
1433                                 /*
1434                                  * Pull up current->down, making it the new
1435                                  * current.
1436                                  */
1437                                 if (top_prev != NULL)
1438                                         top_prev->next = down_next;
1439                                 else
1440                                         node->data = down_next;
1441                                 down_next->next = top_next;
1442                                 free_rdataset(rbtdb, mctx, current);
1443                                 current = down_next;
1444                         }
1445                 }
1446
1447                 /*
1448                  * We now try to find the first down node less than the
1449                  * least serial.
1450                  */
1451                 dparent = current;
1452                 for (dcurrent = current->down;
1453                      dcurrent != NULL;
1454                      dcurrent = down_next) {
1455                         down_next = dcurrent->down;
1456                         if (dcurrent->serial < least_serial)
1457                                 break;
1458                         dparent = dcurrent;
1459                 }
1460
1461                 /*
1462                  * If there is a such an rdataset, delete it and any older
1463                  * versions.
1464                  */
1465                 if (dcurrent != NULL) {
1466                         do {
1467                                 down_next = dcurrent->down;
1468                                 INSIST(dcurrent->serial <= least_serial);
1469                                 free_rdataset(rbtdb, mctx, dcurrent);
1470                                 dcurrent = down_next;
1471                         } while (dcurrent != NULL);
1472                         dparent->down = NULL;
1473                 }
1474
1475                 /*
1476                  * Note.  The serial number of 'current' might be less than
1477                  * least_serial too, but we cannot delete it because it is
1478                  * the most recent version, unless it is a NONEXISTENT
1479                  * rdataset.
1480                  */
1481                 if (current->down != NULL) {
1482                         still_dirty = ISC_TRUE;
1483                         top_prev = current;
1484                 } else {
1485                         /*
1486                          * If this is a NONEXISTENT rdataset, we can delete it.
1487                          */
1488                         if (NONEXISTENT(current)) {
1489                                 if (top_prev != NULL)
1490                                         top_prev->next = current->next;
1491                                 else
1492                                         node->data = current->next;
1493                                 free_rdataset(rbtdb, mctx, current);
1494                         } else
1495                                 top_prev = current;
1496                 }
1497         }
1498         if (!still_dirty)
1499                 node->dirty = 0;
1500 }
1501
1502 /*%
1503  * Clean up dead nodes.  These are nodes which have no references, and
1504  * have no data.  They are dead but we could not or chose not to delete
1505  * them when we deleted all the data at that node because we did not want
1506  * to wait for the tree write lock.
1507  *
1508  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1509  */
1510 static void
1511 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1512         dns_rbtnode_t *node;
1513         isc_result_t result;
1514         int count = 10;         /* XXXJT: should be adjustable */
1515
1516         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1517         while (node != NULL && count > 0) {
1518                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1519
1520                 /*
1521                  * Since we're holding a tree write lock, it should be
1522                  * impossible for this node to be referenced by others.
1523                  */
1524                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1525                        node->data == NULL);
1526
1527                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1528                 if (node->nsec3)
1529                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1530                                                     ISC_FALSE);
1531                 else
1532                         result = dns_rbt_deletenode(rbtdb->tree, node,
1533                                                     ISC_FALSE);
1534                 if (result != ISC_R_SUCCESS)
1535                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1536                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1537                                       "cleanup_dead_nodes: "
1538                                       "dns_rbt_deletenode: %s",
1539                                       isc_result_totext(result));
1540                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1541                 count--;
1542         }
1543 }
1544
1545 /*
1546  * Caller must be holding the node lock.
1547  */
1548 static inline void
1549 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1550         unsigned int lockrefs, noderefs;
1551         isc_refcount_t *lockref;
1552
1553         INSIST(!ISC_LINK_LINKED(node, deadlink));
1554         dns_rbtnode_refincrement0(node, &noderefs);
1555         if (noderefs == 1) {    /* this is the first reference to the node */
1556                 lockref = &rbtdb->node_locks[node->locknum].references;
1557                 isc_refcount_increment0(lockref, &lockrefs);
1558                 INSIST(lockrefs != 0);
1559         }
1560         INSIST(noderefs != 0);
1561 }
1562
1563 /*
1564  * This function is assumed to be called when a node is newly referenced
1565  * and can be in the deadnode list.  In that case the node must be retrieved
1566  * from the list because it is going to be used.  In addition, if the caller
1567  * happens to hold a write lock on the tree, it's a good chance to purge dead
1568  * nodes.
1569  * Note: while a new reference is gained in multiple places, there are only very
1570  * few cases where the node can be in the deadnode list (only empty nodes can
1571  * have been added to the list).
1572  */
1573 static inline void
1574 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1575                 isc_rwlocktype_t treelocktype)
1576 {
1577         isc_rwlocktype_t locktype = isc_rwlocktype_read;
1578         nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
1579         isc_boolean_t maybe_cleanup = ISC_FALSE;
1580
1581         POST(locktype);
1582
1583         NODE_STRONGLOCK(nodelock);
1584         NODE_WEAKLOCK(nodelock, locktype);
1585
1586         /*
1587          * Check if we can possibly cleanup the dead node.  If so, upgrade
1588          * the node lock below to perform the cleanup.
1589          */
1590         if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1591             treelocktype == isc_rwlocktype_write) {
1592                 maybe_cleanup = ISC_TRUE;
1593         }
1594
1595         if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
1596                 /*
1597                  * Upgrade the lock and test if we still need to unlink.
1598                  */
1599                 NODE_WEAKUNLOCK(nodelock, locktype);
1600                 locktype = isc_rwlocktype_write;
1601                 POST(locktype);
1602                 NODE_WEAKLOCK(nodelock, locktype);
1603                 if (ISC_LINK_LINKED(node, deadlink))
1604                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1605                                         node, deadlink);
1606                 if (maybe_cleanup)
1607                         cleanup_dead_nodes(rbtdb, node->locknum);
1608         }
1609
1610         new_reference(rbtdb, node);
1611
1612         NODE_WEAKUNLOCK(nodelock, locktype);
1613         NODE_STRONGUNLOCK(nodelock);
1614 }
1615
1616 /*
1617  * Caller must be holding the node lock; either the "strong", read or write
1618  * lock.  Note that the lock must be held even when node references are
1619  * atomically modified; in that case the decrement operation itself does not
1620  * have to be protected, but we must avoid a race condition where multiple
1621  * threads are decreasing the reference to zero simultaneously and at least
1622  * one of them is going to free the node.
1623  * This function returns ISC_TRUE if and only if the node reference decreases
1624  * to zero.
1625  */
1626 static isc_boolean_t
1627 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1628                     rbtdb_serial_t least_serial,
1629                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1630                     isc_boolean_t pruning)
1631 {
1632         isc_result_t result;
1633         isc_boolean_t write_locked;
1634         rbtdb_nodelock_t *nodelock;
1635         unsigned int refs, nrefs;
1636         int bucket = node->locknum;
1637         isc_boolean_t no_reference = ISC_TRUE;
1638
1639         nodelock = &rbtdb->node_locks[bucket];
1640
1641         /* Handle easy and typical case first. */
1642         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1643                 dns_rbtnode_refdecrement(node, &nrefs);
1644                 INSIST((int)nrefs >= 0);
1645                 if (nrefs == 0) {
1646                         isc_refcount_decrement(&nodelock->references, &refs);
1647                         INSIST((int)refs >= 0);
1648                 }
1649                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1650         }
1651
1652         /* Upgrade the lock? */
1653         if (nlock == isc_rwlocktype_read) {
1654                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1655                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1656         }
1657
1658         dns_rbtnode_refdecrement(node, &nrefs);
1659         INSIST((int)nrefs >= 0);
1660         if (nrefs > 0) {
1661                 /* Restore the lock? */
1662                 if (nlock == isc_rwlocktype_read)
1663                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1664                 return (ISC_FALSE);
1665         }
1666
1667         if (node->dirty) {
1668                 if (IS_CACHE(rbtdb))
1669                         clean_cache_node(rbtdb, node);
1670                 else {
1671                         if (least_serial == 0) {
1672                                 /*
1673                                  * Caller doesn't know the least serial.
1674                                  * Get it.
1675                                  */
1676                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1677                                 least_serial = rbtdb->least_serial;
1678                                 RBTDB_UNLOCK(&rbtdb->lock,
1679                                              isc_rwlocktype_read);
1680                         }
1681                         clean_zone_node(rbtdb, node, least_serial);
1682                 }
1683         }
1684
1685         /*
1686          * Attempt to switch to a write lock on the tree.  If this fails,
1687          * we will add this node to a linked list of nodes in this locking
1688          * bucket which we will free later.
1689          */
1690         if (tlock != isc_rwlocktype_write) {
1691                 /*
1692                  * Locking hierarchy notwithstanding, we don't need to free
1693                  * the node lock before acquiring the tree write lock because
1694                  * we only do a trylock.
1695                  */
1696                 if (tlock == isc_rwlocktype_read)
1697                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1698                 else
1699                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1700                                                     isc_rwlocktype_write);
1701                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1702                               result == ISC_R_LOCKBUSY);
1703
1704                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1705         } else
1706                 write_locked = ISC_TRUE;
1707
1708         isc_refcount_decrement(&nodelock->references, &refs);
1709         INSIST((int)refs >= 0);
1710
1711         /*
1712          * XXXDCL should this only be done for cache zones?
1713          */
1714         if (node->data != NULL || node->down != NULL)
1715                 goto restore_locks;
1716
1717         if (write_locked) {
1718                 /*
1719                  * We can now delete the node.
1720                  */
1721
1722                 /*
1723                  * If this node is the only one in the level it's in, deleting
1724                  * this node may recursively make its parent the only node in
1725                  * the parent level; if so, and if no one is currently using
1726                  * the parent node, this is almost the only opportunity to
1727                  * clean it up.  But the recursive cleanup is not that trivial
1728                  * since the child and parent may be in different lock buckets,
1729                  * which would cause a lock order reversal problem.  To avoid
1730                  * the trouble, we'll dispatch a separate event for batch
1731                  * cleaning.  We need to check whether we're deleting the node
1732                  * as a result of pruning to avoid infinite dispatching.
1733                  * Note: pruning happens only when a task has been set for the
1734                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1735                  * it's their responsibility to purge stale leaves (e.g. by
1736                  * periodic walk-through).
1737                  */
1738                 if (!pruning && node->parent != NULL &&
1739                     node->parent->down == node && node->left == NULL &&
1740                     node->right == NULL && rbtdb->task != NULL) {
1741                         isc_event_t *ev;
1742                         dns_db_t *db;
1743
1744                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1745                                                 DNS_EVENT_RBTPRUNE,
1746                                                 prune_tree, node,
1747                                                 sizeof(isc_event_t));
1748                         if (ev != NULL) {
1749                                 new_reference(rbtdb, node);
1750                                 db = NULL;
1751                                 attach((dns_db_t *)rbtdb, &db);
1752                                 ev->ev_sender = db;
1753                                 isc_task_send(rbtdb->task, &ev);
1754                                 no_reference = ISC_FALSE;
1755                         } else {
1756                                 /*
1757                                  * XXX: this is a weird situation.  We could
1758                                  * ignore this error case, but then the stale
1759                                  * node will unlikely be purged except via a
1760                                  * rare condition such as manual cleanup.  So
1761                                  * we queue it in the deadnodes list, hoping
1762                                  * the memory shortage is temporary and the node
1763                                  * will be deleted later.
1764                                  */
1765                                 isc_log_write(dns_lctx,
1766                                               DNS_LOGCATEGORY_DATABASE,
1767                                               DNS_LOGMODULE_CACHE,
1768                                               ISC_LOG_INFO,
1769                                               "decrement_reference: failed to "
1770                                               "allocate pruning event");
1771                                 INSIST(node->data == NULL);
1772                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1773                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1774                                                 deadlink);
1775                         }
1776                 } else {
1777                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1778                                 char printname[DNS_NAME_FORMATSIZE];
1779
1780                                 isc_log_write(dns_lctx,
1781                                               DNS_LOGCATEGORY_DATABASE,
1782                                               DNS_LOGMODULE_CACHE,
1783                                               ISC_LOG_DEBUG(1),
1784                                               "decrement_reference: "
1785                                               "delete from rbt: %p %s",
1786                                               node,
1787                                               dns_rbt_formatnodename(node,
1788                                                         printname,
1789                                                         sizeof(printname)));
1790                         }
1791
1792                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1793                         if (node->nsec3)
1794                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1795                                                             ISC_FALSE);
1796                         else
1797                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1798                                                             ISC_FALSE);
1799                         if (result != ISC_R_SUCCESS) {
1800                                 isc_log_write(dns_lctx,
1801                                               DNS_LOGCATEGORY_DATABASE,
1802                                               DNS_LOGMODULE_CACHE,
1803                                               ISC_LOG_WARNING,
1804                                               "decrement_reference: "
1805                                               "dns_rbt_deletenode: %s",
1806                                               isc_result_totext(result));
1807                         }
1808                 }
1809         } else {
1810                 INSIST(node->data == NULL);
1811                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1812                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1813         }
1814
1815  restore_locks:
1816         /* Restore the lock? */
1817         if (nlock == isc_rwlocktype_read)
1818                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1819
1820         /*
1821          * Relock a read lock, or unlock the write lock if no lock was held.
1822          */
1823         if (tlock == isc_rwlocktype_none)
1824                 if (write_locked)
1825                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1826
1827         if (tlock == isc_rwlocktype_read)
1828                 if (write_locked)
1829                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1830
1831         return (no_reference);
1832 }
1833
1834 /*
1835  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1836  * case, the number of iteration is the number of tree levels, which is at
1837  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1838  * should be much smaller (only a few times), and even the worst case would be
1839  * acceptable for a single event.
1840  */
1841 static void
1842 prune_tree(isc_task_t *task, isc_event_t *event) {
1843         dns_rbtdb_t *rbtdb = event->ev_sender;
1844         dns_rbtnode_t *node = event->ev_arg;
1845         dns_rbtnode_t *parent;
1846         unsigned int locknum;
1847
1848         UNUSED(task);
1849
1850         isc_event_free(&event);
1851
1852         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1853         locknum = node->locknum;
1854         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1855         do {
1856                 parent = node->parent;
1857                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1858                                     isc_rwlocktype_write, ISC_TRUE);
1859
1860                 if (parent != NULL && parent->down == NULL) {
1861                         /*
1862                          * node was the only down child of the parent and has
1863                          * just been removed.  We'll then need to examine the
1864                          * parent.  Keep the lock if possible; otherwise,
1865                          * release the old lock and acquire one for the parent.
1866                          */
1867                         if (parent->locknum != locknum) {
1868                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1869                                             isc_rwlocktype_write);
1870                                 locknum = parent->locknum;
1871                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1872                                           isc_rwlocktype_write);
1873                         }
1874
1875                         /*
1876                          * We need to gain a reference to the node before
1877                          * decrementing it in the next iteration.  In addition,
1878                          * if the node is in the dead-nodes list, extract it
1879                          * from the list beforehand as we do in
1880                          * reactivate_node().
1881                          */
1882                         if (ISC_LINK_LINKED(parent, deadlink))
1883                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1884                                                 parent, deadlink);
1885                         new_reference(rbtdb, parent);
1886                 } else
1887                         parent = NULL;
1888
1889                 node = parent;
1890         } while (node != NULL);
1891         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1892         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1893
1894         detach((dns_db_t **)&rbtdb);
1895 }
1896
1897 static inline void
1898 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1899                    rbtdb_changedlist_t *cleanup_list)
1900 {
1901         /*
1902          * Caller must be holding the database lock.
1903          */
1904
1905         rbtdb->least_serial = version->serial;
1906         *cleanup_list = version->changed_list;
1907         ISC_LIST_INIT(version->changed_list);
1908 }
1909
1910 static inline void
1911 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1912         rbtdb_changed_t *changed, *next_changed;
1913
1914         /*
1915          * If the changed record is dirty, then
1916          * an update created multiple versions of
1917          * a given rdataset.  We keep this list
1918          * until we're the least open version, at
1919          * which point it's safe to get rid of any
1920          * older versions.
1921          *
1922          * If the changed record isn't dirty, then
1923          * we don't need it anymore since we're
1924          * committing and not rolling back.
1925          *
1926          * The caller must be holding the database lock.
1927          */
1928         for (changed = HEAD(version->changed_list);
1929              changed != NULL;
1930              changed = next_changed) {
1931                 next_changed = NEXT(changed, link);
1932                 if (!changed->dirty) {
1933                         UNLINK(version->changed_list,
1934                                changed, link);
1935                         APPEND(*cleanup_list,
1936                                changed, link);
1937                 }
1938         }
1939 }
1940
1941 static void
1942 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1943         dns_rdataset_t keyset;
1944         dns_rdataset_t nsecset, signsecset;
1945         dns_rdata_t rdata = DNS_RDATA_INIT;
1946         isc_boolean_t haszonekey = ISC_FALSE;
1947         isc_boolean_t hasnsec = ISC_FALSE;
1948         isc_boolean_t hasoptbit = ISC_FALSE;
1949         isc_boolean_t nsec3createflag = ISC_FALSE;
1950         isc_result_t result;
1951
1952         dns_rdataset_init(&keyset);
1953         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1954                                      0, 0, &keyset, NULL);
1955         if (result == ISC_R_SUCCESS) {
1956                 result = dns_rdataset_first(&keyset);
1957                 while (result == ISC_R_SUCCESS) {
1958                         dns_rdata_t keyrdata = DNS_RDATA_INIT;
1959                         dns_rdataset_current(&keyset, &keyrdata);
1960                         if (dns_zonekey_iszonekey(&keyrdata)) {
1961                                 haszonekey = ISC_TRUE;
1962                                 break;
1963                         }
1964                         result = dns_rdataset_next(&keyset);
1965                 }
1966                 dns_rdataset_disassociate(&keyset);
1967         }
1968         if (!haszonekey) {
1969                 version->secure = dns_db_insecure;
1970                 version->havensec3 = ISC_FALSE;
1971                 return;
1972         }
1973
1974         dns_rdataset_init(&nsecset);
1975         dns_rdataset_init(&signsecset);
1976         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1977                                      0, 0, &nsecset, &signsecset);
1978         if (result == ISC_R_SUCCESS) {
1979                 if (dns_rdataset_isassociated(&signsecset)) {
1980                         hasnsec = ISC_TRUE;
1981                         result = dns_rdataset_first(&nsecset);
1982                         if (result == ISC_R_SUCCESS) {
1983                                 dns_rdataset_current(&nsecset, &rdata);
1984                                 hasoptbit = dns_nsec_typepresent(&rdata,
1985                                                              dns_rdatatype_opt);
1986                         }
1987                         dns_rdataset_disassociate(&signsecset);
1988                 }
1989                 dns_rdataset_disassociate(&nsecset);
1990         }
1991
1992         setnsec3parameters(db, version, &nsec3createflag);
1993
1994         /*
1995          * Do we have a valid NSEC/NSEC3 chain?
1996          */
1997         if (version->havensec3 || (hasnsec && !hasoptbit))
1998                 version->secure = dns_db_secure;
1999         /*
2000          * Do we have a NSEC/NSEC3 chain under creation?
2001          */
2002         else if (hasoptbit || nsec3createflag)
2003                 version->secure = dns_db_partial;
2004         else
2005                 version->secure = dns_db_insecure;
2006 }
2007
2008 /*%<
2009  * Walk the origin node looking for NSEC3PARAM records.
2010  * Cache the nsec3 parameters.
2011  */
2012 static void
2013 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
2014                    isc_boolean_t *nsec3createflag)
2015 {
2016         dns_rbtnode_t *node;
2017         dns_rdata_nsec3param_t nsec3param;
2018         dns_rdata_t rdata = DNS_RDATA_INIT;
2019         isc_region_t region;
2020         isc_result_t result;
2021         rdatasetheader_t *header, *header_next;
2022         unsigned char *raw;             /* RDATASLAB */
2023         unsigned int count, length;
2024         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2025
2026         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2027         version->havensec3 = ISC_FALSE;
2028         node = rbtdb->origin_node;
2029         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2030                   isc_rwlocktype_read);
2031         for (header = node->data;
2032              header != NULL;
2033              header = header_next) {
2034                 header_next = header->next;
2035                 do {
2036                         if (header->serial <= version->serial &&
2037                             !IGNORE(header)) {
2038                                 if (NONEXISTENT(header))
2039                                         header = NULL;
2040                                 break;
2041                         } else
2042                                 header = header->down;
2043                 } while (header != NULL);
2044
2045                 if (header != NULL &&
2046                     header->type == dns_rdatatype_nsec3param) {
2047                         /*
2048                          * Find A NSEC3PARAM with a supported algorithm.
2049                          */
2050                         raw = (unsigned char *)header + sizeof(*header);
2051                         count = raw[0] * 256 + raw[1]; /* count */
2052 #if DNS_RDATASET_FIXED
2053                         raw += count * 4 + 2;
2054 #else
2055                         raw += 2;
2056 #endif
2057                         while (count-- > 0U) {
2058                                 length = raw[0] * 256 + raw[1];
2059 #if DNS_RDATASET_FIXED
2060                                 raw += 4;
2061 #else
2062                                 raw += 2;
2063 #endif
2064                                 region.base = raw;
2065                                 region.length = length;
2066                                 raw += length;
2067                                 dns_rdata_fromregion(&rdata,
2068                                                      rbtdb->common.rdclass,
2069                                                      dns_rdatatype_nsec3param,
2070                                                      &region);
2071                                 result = dns_rdata_tostruct(&rdata,
2072                                                             &nsec3param,
2073                                                             NULL);
2074                                 INSIST(result == ISC_R_SUCCESS);
2075                                 dns_rdata_reset(&rdata);
2076
2077                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2078                                     !dns_nsec3_supportedhash(nsec3param.hash))
2079                                         continue;
2080
2081 #ifdef RFC5155_STRICT
2082                                 if (nsec3param.flags != 0)
2083                                         continue;
2084 #else
2085                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2086                                     != 0)
2087                                         *nsec3createflag = ISC_TRUE;
2088                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2089                                     != 0)
2090                                         continue;
2091 #endif
2092
2093                                 memcpy(version->salt, nsec3param.salt,
2094                                        nsec3param.salt_length);
2095                                 version->hash = nsec3param.hash;
2096                                 version->salt_length = nsec3param.salt_length;
2097                                 version->iterations = nsec3param.iterations;
2098                                 version->flags = nsec3param.flags;
2099                                 version->havensec3 = ISC_TRUE;
2100                                 /*
2101                                  * Look for a better algorithm than the
2102                                  * unknown test algorithm.
2103                                  */
2104                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2105                                         goto unlock;
2106                         }
2107                 }
2108         }
2109  unlock:
2110         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2111                     isc_rwlocktype_read);
2112         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2113 }
2114
2115 static void
2116 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2117         dns_rbtdb_t *rbtdb = event->ev_arg;
2118         isc_boolean_t again = ISC_FALSE;
2119         unsigned int locknum;
2120         unsigned int refs;
2121
2122         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2123         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2124                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2125                           isc_rwlocktype_write);
2126                 cleanup_dead_nodes(rbtdb, locknum);
2127                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2128                         again = ISC_TRUE;
2129                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2130                             isc_rwlocktype_write);
2131         }
2132         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2133         if (again)
2134                 isc_task_send(task, &event);
2135         else {
2136                 isc_event_free(&event);
2137                 isc_refcount_decrement(&rbtdb->references, &refs);
2138                 if (refs == 0)
2139                         maybe_free_rbtdb(rbtdb);
2140         }
2141 }
2142
2143 static void
2144 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2145         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2146         rbtdb_version_t *version, *cleanup_version, *least_greater;
2147         isc_boolean_t rollback = ISC_FALSE;
2148         rbtdb_changedlist_t cleanup_list;
2149         rdatasetheaderlist_t resigned_list;
2150         rbtdb_changed_t *changed, *next_changed;
2151         rbtdb_serial_t serial, least_serial;
2152         dns_rbtnode_t *rbtnode;
2153         unsigned int refs;
2154         rdatasetheader_t *header;
2155         isc_boolean_t writer;
2156
2157         REQUIRE(VALID_RBTDB(rbtdb));
2158         version = (rbtdb_version_t *)*versionp;
2159         INSIST(version->rbtdb == rbtdb);
2160
2161         cleanup_version = NULL;
2162         ISC_LIST_INIT(cleanup_list);
2163         ISC_LIST_INIT(resigned_list);
2164
2165         isc_refcount_decrement(&version->references, &refs);
2166         if (refs > 0) {         /* typical and easy case first */
2167                 if (commit) {
2168                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2169                         INSIST(!version->writer);
2170                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2171                 }
2172                 goto end;
2173         }
2174
2175         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2176         serial = version->serial;
2177         writer = version->writer;
2178         if (version->writer) {
2179                 if (commit) {
2180                         unsigned cur_ref;
2181                         rbtdb_version_t *cur_version;
2182
2183                         INSIST(version->commit_ok);
2184                         INSIST(version == rbtdb->future_version);
2185                         /*
2186                          * The current version is going to be replaced.
2187                          * Release the (likely last) reference to it from the
2188                          * DB itself and unlink it from the open list.
2189                          */
2190                         cur_version = rbtdb->current_version;
2191                         isc_refcount_decrement(&cur_version->references,
2192                                                &cur_ref);
2193                         if (cur_ref == 0) {
2194                                 if (cur_version->serial == rbtdb->least_serial)
2195                                         INSIST(EMPTY(cur_version->changed_list));
2196                                 UNLINK(rbtdb->open_versions,
2197                                        cur_version, link);
2198                         }
2199                         if (EMPTY(rbtdb->open_versions)) {
2200                                 /*
2201                                  * We're going to become the least open
2202                                  * version.
2203                                  */
2204                                 make_least_version(rbtdb, version,
2205                                                    &cleanup_list);
2206                         } else {
2207                                 /*
2208                                  * Some other open version is the
2209                                  * least version.  We can't cleanup
2210                                  * records that were changed in this
2211                                  * version because the older versions
2212                                  * may still be in use by an open
2213                                  * version.
2214                                  *
2215                                  * We can, however, discard the
2216                                  * changed records for things that
2217                                  * we've added that didn't exist in
2218                                  * prior versions.
2219                                  */
2220                                 cleanup_nondirty(version, &cleanup_list);
2221                         }
2222                         /*
2223                          * If the (soon to be former) current version
2224                          * isn't being used by anyone, we can clean
2225                          * it up.
2226                          */
2227                         if (cur_ref == 0) {
2228                                 cleanup_version = cur_version;
2229                                 APPENDLIST(version->changed_list,
2230                                            cleanup_version->changed_list,
2231                                            link);
2232                         }
2233                         /*
2234                          * Become the current version.
2235                          */
2236                         version->writer = ISC_FALSE;
2237                         rbtdb->current_version = version;
2238                         rbtdb->current_serial = version->serial;
2239                         rbtdb->future_version = NULL;
2240
2241                         /*
2242                          * Keep the current version in the open list, and
2243                          * gain a reference for the DB itself (see the DB
2244                          * creation function below).  This must be the only
2245                          * case where we need to increment the counter from
2246                          * zero and need to use isc_refcount_increment0().
2247                          */
2248                         isc_refcount_increment0(&version->references,
2249                                                 &cur_ref);
2250                         INSIST(cur_ref == 1);
2251                         PREPEND(rbtdb->open_versions,
2252                                 rbtdb->current_version, link);
2253                         resigned_list = version->resigned_list;
2254                         ISC_LIST_INIT(version->resigned_list);
2255                 } else {
2256                         /*
2257                          * We're rolling back this transaction.
2258                          */
2259                         cleanup_list = version->changed_list;
2260                         ISC_LIST_INIT(version->changed_list);
2261                         resigned_list = version->resigned_list;
2262                         ISC_LIST_INIT(version->resigned_list);
2263                         rollback = ISC_TRUE;
2264                         cleanup_version = version;
2265                         rbtdb->future_version = NULL;
2266                 }
2267         } else {
2268                 if (version != rbtdb->current_version) {
2269                         /*
2270                          * There are no external or internal references
2271                          * to this version and it can be cleaned up.
2272                          */
2273                         cleanup_version = version;
2274
2275                         /*
2276                          * Find the version with the least serial
2277                          * number greater than ours.
2278                          */
2279                         least_greater = PREV(version, link);
2280                         if (least_greater == NULL)
2281                                 least_greater = rbtdb->current_version;
2282
2283                         INSIST(version->serial < least_greater->serial);
2284                         /*
2285                          * Is this the least open version?
2286                          */
2287                         if (version->serial == rbtdb->least_serial) {
2288                                 /*
2289                                  * Yes.  Install the new least open
2290                                  * version.
2291                                  */
2292                                 make_least_version(rbtdb,
2293                                                    least_greater,
2294                                                    &cleanup_list);
2295                         } else {
2296                                 /*
2297                                  * Add any unexecuted cleanups to
2298                                  * those of the least greater version.
2299                                  */
2300                                 APPENDLIST(least_greater->changed_list,
2301                                            version->changed_list,
2302                                            link);
2303                         }
2304                 } else if (version->serial == rbtdb->least_serial)
2305                         INSIST(EMPTY(version->changed_list));
2306                 UNLINK(rbtdb->open_versions, version, link);
2307         }
2308         least_serial = rbtdb->least_serial;
2309         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2310
2311         /*
2312          * Update the zone's secure status.
2313          */
2314         if (writer && commit && !IS_CACHE(rbtdb))
2315                 iszonesecure(db, version, rbtdb->origin_node);
2316
2317         if (cleanup_version != NULL) {
2318                 INSIST(EMPTY(cleanup_version->changed_list));
2319                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2320                             sizeof(*cleanup_version));
2321         }
2322
2323         /*
2324          * Commit/rollback re-signed headers.
2325          */
2326         for (header = HEAD(resigned_list);
2327              header != NULL;
2328              header = HEAD(resigned_list)) {
2329                 nodelock_t *lock;
2330
2331                 ISC_LIST_UNLINK(resigned_list, header, link);
2332
2333                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2334                 NODE_LOCK(lock, isc_rwlocktype_write);
2335                 if (rollback)
2336                         resign_insert(rbtdb, header->node->locknum, header);
2337                 decrement_reference(rbtdb, header->node, least_serial,
2338                                     isc_rwlocktype_write, isc_rwlocktype_none,
2339                                     ISC_FALSE);
2340                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2341         }
2342
2343         if (!EMPTY(cleanup_list)) {
2344                 isc_event_t *event = NULL;
2345                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2346
2347                 if (rbtdb->task != NULL)
2348                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
2349                                                    DNS_EVENT_RBTDEADNODES,
2350                                                    cleanup_dead_nodes_callback,
2351                                                    rbtdb, sizeof(isc_event_t));
2352                 if (event == NULL) {
2353                         /*
2354                          * We acquire a tree write lock here in order to make
2355                          * sure that stale nodes will be removed in
2356                          * decrement_reference().  If we didn't have the lock,
2357                          * those nodes could miss the chance to be removed
2358                          * until the server stops.  The write lock is
2359                          * expensive, but this event should be rare enough
2360                          * to justify the cost.
2361                          */
2362                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2363                         tlock = isc_rwlocktype_write;
2364                 }
2365
2366                 for (changed = HEAD(cleanup_list);
2367                      changed != NULL;
2368                      changed = next_changed) {
2369                         nodelock_t *lock;
2370
2371                         next_changed = NEXT(changed, link);
2372                         rbtnode = changed->node;
2373                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2374
2375                         NODE_LOCK(lock, isc_rwlocktype_write);
2376                         /*
2377                          * This is a good opportunity to purge any dead nodes,
2378                          * so use it.
2379                          */
2380                         if (event == NULL)
2381                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2382
2383                         if (rollback)
2384                                 rollback_node(rbtnode, serial);
2385                         decrement_reference(rbtdb, rbtnode, least_serial,
2386                                             isc_rwlocktype_write, tlock,
2387                                             ISC_FALSE);
2388
2389                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2390
2391                         isc_mem_put(rbtdb->common.mctx, changed,
2392                                     sizeof(*changed));
2393                 }
2394                 if (event != NULL) {
2395                         isc_refcount_increment(&rbtdb->references, NULL);
2396                         isc_task_send(rbtdb->task, &event);
2397                 } else
2398                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2399         }
2400
2401  end:
2402         *versionp = NULL;
2403 }
2404
2405 /*
2406  * Add the necessary magic for the wildcard name 'name'
2407  * to be found in 'rbtdb'.
2408  *
2409  * In order for wildcard matching to work correctly in
2410  * zone_find(), we must ensure that a node for the wildcarding
2411  * level exists in the database, and has its 'find_callback'
2412  * and 'wild' bits set.
2413  *
2414  * E.g. if the wildcard name is "*.sub.example." then we
2415  * must ensure that "sub.example." exists and is marked as
2416  * a wildcard level.
2417  */
2418 static isc_result_t
2419 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2420         isc_result_t result;
2421         dns_name_t foundname;
2422         dns_offsets_t offsets;
2423         unsigned int n;
2424         dns_rbtnode_t *node = NULL;
2425
2426         dns_name_init(&foundname, offsets);
2427         n = dns_name_countlabels(name);
2428         INSIST(n >= 2);
2429         n--;
2430         dns_name_getlabelsequence(name, 1, n, &foundname);
2431         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2432         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2433                 return (result);
2434         node->nsec3 = 0;
2435         node->find_callback = 1;
2436         node->wild = 1;
2437         return (ISC_R_SUCCESS);
2438 }
2439
2440 static isc_result_t
2441 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2442         isc_result_t result;
2443         dns_name_t foundname;
2444         dns_offsets_t offsets;
2445         unsigned int n, l, i;
2446
2447         dns_name_init(&foundname, offsets);
2448         n = dns_name_countlabels(name);
2449         l = dns_name_countlabels(&rbtdb->common.origin);
2450         i = l + 1;
2451         while (i < n) {
2452                 dns_rbtnode_t *node = NULL;     /* dummy */
2453                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2454                 if (dns_name_iswildcard(&foundname)) {
2455                         result = add_wildcard_magic(rbtdb, &foundname);
2456                         if (result != ISC_R_SUCCESS)
2457                                 return (result);
2458                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2459                                                  &node);
2460                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2461                                 return (result);
2462                         node->nsec3 = 0;
2463                 }
2464                 i++;
2465         }
2466         return (ISC_R_SUCCESS);
2467 }
2468
2469 static isc_result_t
2470 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
2471                isc_boolean_t create, dns_dbnode_t **nodep)
2472 {
2473         dns_rbtnode_t *node = NULL;
2474         dns_name_t nodename;
2475         isc_result_t result;
2476         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2477
2478         INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
2479
2480         dns_name_init(&nodename, NULL);
2481         RWLOCK(&rbtdb->tree_lock, locktype);
2482         result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
2483                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2484         if (result != ISC_R_SUCCESS) {
2485                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2486                 if (!create) {
2487                         if (result == DNS_R_PARTIALMATCH)
2488                                 result = ISC_R_NOTFOUND;
2489                         return (result);
2490                 }
2491                 /*
2492                  * It would be nice to try to upgrade the lock instead of
2493                  * unlocking then relocking.
2494                  */
2495                 locktype = isc_rwlocktype_write;
2496                 RWLOCK(&rbtdb->tree_lock, locktype);
2497                 node = NULL;
2498                 result = dns_rbt_addnode(tree, name, &node);
2499                 if (result == ISC_R_SUCCESS) {
2500                         dns_rbt_namefromnode(node, &nodename);
2501 #ifdef DNS_RBT_USEHASH
2502                         node->locknum = node->hashval % rbtdb->node_lock_count;
2503 #else
2504                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2505                                 rbtdb->node_lock_count;
2506 #endif
2507                         if (tree == rbtdb->tree) {
2508                                 node->nsec3 = 0;
2509                                 add_empty_wildcards(rbtdb, name);
2510
2511                                 if (dns_name_iswildcard(name)) {
2512                                         result = add_wildcard_magic(rbtdb,
2513                                                                     name);
2514                                         if (result != ISC_R_SUCCESS) {
2515                                                 RWUNLOCK(&rbtdb->tree_lock,
2516                                                          locktype);
2517                                                 return (result);
2518                                         }
2519                                 }
2520                         }
2521                         if (tree == rbtdb->nsec3)
2522                                 node->nsec3 = 1;
2523                 } else if (result != ISC_R_EXISTS) {
2524                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2525                         return (result);
2526                 }
2527         }
2528
2529         if (tree == rbtdb->nsec3)
2530                 INSIST(node->nsec3 == 1);
2531
2532         reactivate_node(rbtdb, node, locktype);
2533         RWUNLOCK(&rbtdb->tree_lock, locktype);
2534
2535         *nodep = (dns_dbnode_t *)node;
2536
2537         return (ISC_R_SUCCESS);
2538 }
2539
2540 static isc_result_t
2541 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2542               dns_dbnode_t **nodep)
2543 {
2544         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2545
2546         REQUIRE(VALID_RBTDB(rbtdb));
2547
2548         return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
2549 }
2550
2551 static isc_result_t
2552 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2553               dns_dbnode_t **nodep)
2554 {
2555         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2556
2557         REQUIRE(VALID_RBTDB(rbtdb));
2558
2559         return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
2560 }
2561
2562 static isc_result_t
2563 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2564         rbtdb_search_t *search = arg;
2565         rdatasetheader_t *header, *header_next;
2566         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2567         rdatasetheader_t *found;
2568         isc_result_t result;
2569         dns_rbtnode_t *onode;
2570
2571         /*
2572          * We only want to remember the topmost zone cut, since it's the one
2573          * that counts, so we'll just continue if we've already found a
2574          * zonecut.
2575          */
2576         if (search->zonecut != NULL)
2577                 return (DNS_R_CONTINUE);
2578
2579         found = NULL;
2580         result = DNS_R_CONTINUE;
2581         onode = search->rbtdb->origin_node;
2582
2583         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2584                   isc_rwlocktype_read);
2585
2586         /*
2587          * Look for an NS or DNAME rdataset active in our version.
2588          */
2589         ns_header = NULL;
2590         dname_header = NULL;
2591         sigdname_header = NULL;
2592         for (header = node->data; header != NULL; header = header_next) {
2593                 header_next = header->next;
2594                 if (header->type == dns_rdatatype_ns ||
2595                     header->type == dns_rdatatype_dname ||
2596                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2597                         do {
2598                                 if (header->serial <= search->serial &&
2599                                     !IGNORE(header)) {
2600                                         /*
2601                                          * Is this a "this rdataset doesn't
2602                                          * exist" record?
2603                                          */
2604                                         if (NONEXISTENT(header))
2605                                                 header = NULL;
2606                                         break;
2607                                 } else
2608                                         header = header->down;
2609                         } while (header != NULL);
2610                         if (header != NULL) {
2611                                 if (header->type == dns_rdatatype_dname)
2612                                         dname_header = header;
2613                                 else if (header->type ==
2614                                            RBTDB_RDATATYPE_SIGDNAME)
2615                                         sigdname_header = header;
2616                                 else if (node != onode ||
2617                                          IS_STUB(search->rbtdb)) {
2618                                         /*
2619                                          * We've found an NS rdataset that
2620                                          * isn't at the origin node.  We check
2621                                          * that they're not at the origin node,
2622                                          * because otherwise we'd erroneously
2623                                          * treat the zone top as if it were
2624                                          * a delegation.
2625                                          */
2626                                         ns_header = header;
2627                                 }
2628                         }
2629                 }
2630         }
2631
2632         /*
2633          * Did we find anything?
2634          */
2635         if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2636             ns_header != NULL) {
2637                 /*
2638                  * Note that NS has precedence over DNAME if both exist
2639                  * in a zone.  Otherwise DNAME take precedence over NS.
2640                  */
2641                 found = ns_header;
2642                 search->zonecut_sigrdataset = NULL;
2643         } else if (dname_header != NULL) {
2644                 found = dname_header;
2645                 search->zonecut_sigrdataset = sigdname_header;
2646         } else if (ns_header != NULL) {
2647                 found = ns_header;
2648                 search->zonecut_sigrdataset = NULL;
2649         }
2650
2651         if (found != NULL) {
2652                 /*
2653                  * We increment the reference count on node to ensure that
2654                  * search->zonecut_rdataset will still be valid later.
2655                  */
2656                 new_reference(search->rbtdb, node);
2657                 search->zonecut = node;
2658                 search->zonecut_rdataset = found;
2659                 search->need_cleanup = ISC_TRUE;
2660                 /*
2661                  * Since we've found a zonecut, anything beneath it is
2662                  * glue and is not subject to wildcard matching, so we
2663                  * may clear search->wild.
2664                  */
2665                 search->wild = ISC_FALSE;
2666                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2667                         /*
2668                          * If the caller does not want to find glue, then
2669                          * this is the best answer and the search should
2670                          * stop now.
2671                          */
2672                         result = DNS_R_PARTIALMATCH;
2673                 } else {
2674                         dns_name_t *zcname;
2675
2676                         /*
2677                          * The search will continue beneath the zone cut.
2678                          * This may or may not be the best match.  In case it
2679                          * is, we need to remember the node name.
2680                          */
2681                         zcname = dns_fixedname_name(&search->zonecut_name);
2682                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2683                                       ISC_R_SUCCESS);
2684                         search->copy_name = ISC_TRUE;
2685                 }
2686         } else {
2687                 /*
2688                  * There is no zonecut at this node which is active in this
2689                  * version.
2690                  *
2691                  * If this is a "wild" node and the caller hasn't disabled
2692                  * wildcard matching, remember that we've seen a wild node
2693                  * in case we need to go searching for wildcard matches
2694                  * later on.
2695                  */
2696                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2697                         search->wild = ISC_TRUE;
2698         }
2699
2700         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2701                     isc_rwlocktype_read);
2702
2703         return (result);
2704 }
2705
2706 static inline void
2707 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2708               rdatasetheader_t *header, isc_stdtime_t now,
2709               dns_rdataset_t *rdataset)
2710 {
2711         unsigned char *raw;     /* RDATASLAB */
2712
2713         /*
2714          * Caller must be holding the node reader lock.
2715          * XXXJT: technically, we need a writer lock, since we'll increment
2716          * the header count below.  However, since the actual counter value
2717          * doesn't matter, we prioritize performance here.  (We may want to
2718          * use atomic increment when available).
2719          */
2720
2721         if (rdataset == NULL)
2722                 return;
2723
2724         new_reference(rbtdb, node);
2725
2726         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2727
2728         rdataset->methods = &rdataset_methods;
2729         rdataset->rdclass = rbtdb->common.rdclass;
2730         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2731         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2732         rdataset->ttl = header->rdh_ttl - now;
2733         rdataset->trust = header->trust;
2734         if (NEGATIVE(header))
2735                 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2736         if (NXDOMAIN(header))
2737                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2738         if (OPTOUT(header))
2739                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2740         rdataset->private1 = rbtdb;
2741         rdataset->private2 = node;
2742         raw = (unsigned char *)header + sizeof(*header);
2743         rdataset->private3 = raw;
2744         rdataset->count = header->count++;
2745         if (rdataset->count == ISC_UINT32_MAX)
2746                 rdataset->count = 0;
2747
2748         /*
2749          * Reset iterator state.
2750          */
2751         rdataset->privateuint4 = 0;
2752         rdataset->private5 = NULL;
2753
2754         /*
2755          * Add noqname proof.
2756          */
2757         rdataset->private6 = header->noqname;
2758         if (rdataset->private6 != NULL)
2759                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2760         rdataset->private7 = header->closest;
2761         if (rdataset->private7 != NULL)
2762                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2763
2764         /*
2765          * Copy out re-signing information.
2766          */
2767         if (RESIGN(header)) {
2768                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2769                 rdataset->resign = header->resign;
2770         } else
2771                 rdataset->resign = 0;
2772 }
2773
2774 static inline isc_result_t
2775 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2776                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2777                  dns_rdataset_t *sigrdataset)
2778 {
2779         isc_result_t result;
2780         dns_name_t *zcname;
2781         rbtdb_rdatatype_t type;
2782         dns_rbtnode_t *node;
2783
2784         /*
2785          * The caller MUST NOT be holding any node locks.
2786          */
2787
2788         node = search->zonecut;
2789         type = search->zonecut_rdataset->type;
2790
2791         /*
2792          * If we have to set foundname, we do it before anything else.
2793          * If we were to set foundname after we had set nodep or bound the
2794          * rdataset, then we'd have to undo that work if dns_name_copy()
2795          * failed.  By setting foundname first, there's nothing to undo if
2796          * we have trouble.
2797          */
2798         if (foundname != NULL && search->copy_name) {
2799                 zcname = dns_fixedname_name(&search->zonecut_name);
2800                 result = dns_name_copy(zcname, foundname, NULL);
2801                 if (result != ISC_R_SUCCESS)
2802                         return (result);
2803         }
2804         if (nodep != NULL) {
2805                 /*
2806                  * Note that we don't have to increment the node's reference
2807                  * count here because we're going to use the reference we
2808                  * already have in the search block.
2809                  */
2810                 *nodep = node;
2811                 search->need_cleanup = ISC_FALSE;
2812         }
2813         if (rdataset != NULL) {
2814                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2815                           isc_rwlocktype_read);
2816                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2817                               search->now, rdataset);
2818                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2819                         bind_rdataset(search->rbtdb, node,
2820                                       search->zonecut_sigrdataset,
2821                                       search->now, sigrdataset);
2822                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2823                             isc_rwlocktype_read);
2824         }
2825
2826         if (type == dns_rdatatype_dname)
2827                 return (DNS_R_DNAME);
2828         return (DNS_R_DELEGATION);
2829 }
2830
2831 static inline isc_boolean_t
2832 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2833            dns_rbtnode_t *node)
2834 {
2835         unsigned char *raw;     /* RDATASLAB */
2836         unsigned int count, size;
2837         dns_name_t ns_name;
2838         isc_boolean_t valid = ISC_FALSE;
2839         dns_offsets_t offsets;
2840         isc_region_t region;
2841         rdatasetheader_t *header;
2842
2843         /*
2844          * No additional locking is required.
2845          */
2846
2847         /*
2848          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2849          * if it occurs at a zone cut, but is not valid below it.
2850          */
2851         if (type == dns_rdatatype_ns) {
2852                 if (node != search->zonecut) {
2853                         return (ISC_FALSE);
2854                 }
2855         } else if (type != dns_rdatatype_a &&
2856                    type != dns_rdatatype_aaaa &&
2857                    type != dns_rdatatype_a6) {
2858                 return (ISC_FALSE);
2859         }
2860
2861         header = search->zonecut_rdataset;
2862         raw = (unsigned char *)header + sizeof(*header);
2863         count = raw[0] * 256 + raw[1];
2864 #if DNS_RDATASET_FIXED
2865         raw += 2 + (4 * count);
2866 #else
2867         raw += 2;
2868 #endif
2869
2870         while (count > 0) {
2871                 count--;
2872                 size = raw[0] * 256 + raw[1];
2873 #if DNS_RDATASET_FIXED
2874                 raw += 4;
2875 #else
2876                 raw += 2;
2877 #endif
2878                 region.base = raw;
2879                 region.length = size;
2880                 raw += size;
2881                 /*
2882                  * XXX Until we have rdata structures, we have no choice but
2883                  * to directly access the rdata format.
2884                  */
2885                 dns_name_init(&ns_name, offsets);
2886                 dns_name_fromregion(&ns_name, &region);
2887                 if (dns_name_compare(&ns_name, name) == 0) {
2888                         valid = ISC_TRUE;
2889                         break;
2890                 }
2891         }
2892
2893         return (valid);
2894 }
2895
2896 static inline isc_boolean_t
2897 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2898             dns_name_t *name)
2899 {
2900         dns_fixedname_t fnext;
2901         dns_fixedname_t forigin;
2902         dns_name_t *next;
2903         dns_name_t *origin;
2904         dns_name_t prefix;
2905         dns_rbtdb_t *rbtdb;
2906         dns_rbtnode_t *node;
2907         isc_result_t result;
2908         isc_boolean_t answer = ISC_FALSE;
2909         rdatasetheader_t *header;
2910
2911         rbtdb = search->rbtdb;
2912
2913         dns_name_init(&prefix, NULL);
2914         dns_fixedname_init(&fnext);
2915         next = dns_fixedname_name(&fnext);
2916         dns_fixedname_init(&forigin);
2917         origin = dns_fixedname_name(&forigin);
2918
2919         result = dns_rbtnodechain_next(chain, NULL, NULL);
2920         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2921                 node = NULL;
2922                 result = dns_rbtnodechain_current(chain, &prefix,
2923                                                   origin, &node);
2924                 if (result != ISC_R_SUCCESS)
2925                         break;
2926                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2927                           isc_rwlocktype_read);
2928                 for (header = node->data;
2929                      header != NULL;
2930                      header = header->next) {
2931                         if (header->serial <= search->serial &&
2932                             !IGNORE(header) && EXISTS(header))
2933                                 break;
2934                 }
2935                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2936                             isc_rwlocktype_read);
2937                 if (header != NULL)
2938                         break;
2939                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2940         }
2941         if (result == ISC_R_SUCCESS)
2942                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2943         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2944                 answer = ISC_TRUE;
2945         return (answer);
2946 }
2947
2948 static inline isc_boolean_t
2949 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2950         dns_fixedname_t fnext;
2951         dns_fixedname_t forigin;
2952         dns_fixedname_t fprev;
2953         dns_name_t *next;
2954         dns_name_t *origin;
2955         dns_name_t *prev;
2956         dns_name_t name;
2957         dns_name_t rname;
2958         dns_name_t tname;
2959         dns_rbtdb_t *rbtdb;
2960         dns_rbtnode_t *node;
2961         dns_rbtnodechain_t chain;
2962         isc_boolean_t check_next = ISC_TRUE;
2963         isc_boolean_t check_prev = ISC_TRUE;
2964         isc_boolean_t answer = ISC_FALSE;
2965         isc_result_t result;
2966         rdatasetheader_t *header;
2967         unsigned int n;
2968
2969         rbtdb = search->rbtdb;
2970
2971         dns_name_init(&name, NULL);
2972         dns_name_init(&tname, NULL);
2973         dns_name_init(&rname, NULL);
2974         dns_fixedname_init(&fnext);
2975         next = dns_fixedname_name(&fnext);
2976         dns_fixedname_init(&fprev);
2977         prev = dns_fixedname_name(&fprev);
2978         dns_fixedname_init(&forigin);
2979         origin = dns_fixedname_name(&forigin);
2980
2981         /*
2982          * Find if qname is at or below a empty node.
2983          * Use our own copy of the chain.
2984          */
2985
2986         chain = search->chain;
2987         do {
2988                 node = NULL;
2989                 result = dns_rbtnodechain_current(&chain, &name,
2990                                                   origin, &node);
2991                 if (result != ISC_R_SUCCESS)
2992                         break;
2993                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2994                           isc_rwlocktype_read);
2995                 for (header = node->data;
2996                      header != NULL;
2997                      header = header->next) {
2998                         if (header->serial <= search->serial &&
2999                             !IGNORE(header) && EXISTS(header))
3000                                 break;
3001                 }
3002                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3003                             isc_rwlocktype_read);
3004                 if (header != NULL)
3005                         break;
3006                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3007         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3008         if (result == ISC_R_SUCCESS)
3009                 result = dns_name_concatenate(&name, origin, prev, NULL);
3010         if (result != ISC_R_SUCCESS)
3011                 check_prev = ISC_FALSE;
3012
3013         result = dns_rbtnodechain_next(&chain, NULL, NULL);
3014         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3015                 node = NULL;
3016                 result = dns_rbtnodechain_current(&chain, &name,
3017                                                   origin, &node);
3018                 if (result != ISC_R_SUCCESS)
3019                         break;
3020                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3021                           isc_rwlocktype_read);
3022                 for (header = node->data;
3023                      header != NULL;
3024                      header = header->next) {
3025                         if (header->serial <= search->serial &&
3026                             !IGNORE(header) && EXISTS(header))
3027                                 break;
3028                 }
3029                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3030                             isc_rwlocktype_read);
3031                 if (header != NULL)
3032                         break;
3033                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3034         }
3035         if (result == ISC_R_SUCCESS)
3036                 result = dns_name_concatenate(&name, origin, next, NULL);
3037         if (result != ISC_R_SUCCESS)
3038                 check_next = ISC_FALSE;
3039
3040         dns_name_clone(qname, &rname);
3041
3042         /*
3043          * Remove the wildcard label to find the terminal name.
3044          */
3045         n = dns_name_countlabels(wname);
3046         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3047
3048         do {
3049                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3050                     (check_next && dns_name_issubdomain(next, &rname))) {
3051                         answer = ISC_TRUE;
3052                         break;
3053                 }
3054                 /*
3055                  * Remove the left hand label.
3056                  */
3057                 n = dns_name_countlabels(&rname);
3058                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3059         } while (!dns_name_equal(&rname, &tname));
3060         return (answer);
3061 }
3062
3063 static inline isc_result_t
3064 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3065               dns_name_t *qname)
3066 {
3067         unsigned int i, j;
3068         dns_rbtnode_t *node, *level_node, *wnode;
3069         rdatasetheader_t *header;
3070         isc_result_t result = ISC_R_NOTFOUND;
3071         dns_name_t name;
3072         dns_name_t *wname;
3073         dns_fixedname_t fwname;
3074         dns_rbtdb_t *rbtdb;
3075         isc_boolean_t done, wild, active;
3076         dns_rbtnodechain_t wchain;
3077
3078         /*
3079          * Caller must be holding the tree lock and MUST NOT be holding
3080          * any node locks.
3081          */
3082
3083         /*
3084          * Examine each ancestor level.  If the level's wild bit
3085          * is set, then construct the corresponding wildcard name and
3086          * search for it.  If the wildcard node exists, and is active in
3087          * this version, we're done.  If not, then we next check to see
3088          * if the ancestor is active in this version.  If so, then there
3089          * can be no possible wildcard match and again we're done.  If not,
3090          * continue the search.
3091          */
3092
3093         rbtdb = search->rbtdb;
3094         i = search->chain.level_matches;
3095         done = ISC_FALSE;
3096         node = *nodep;
3097         do {
3098                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3099                           isc_rwlocktype_read);
3100
3101                 /*
3102                  * First we try to figure out if this node is active in
3103                  * the search's version.  We do this now, even though we
3104                  * may not need the information, because it simplifies the
3105                  * locking and code flow.
3106                  */
3107                 for (header = node->data;
3108                      header != NULL;
3109                      header = header->next) {
3110                         if (header->serial <= search->serial &&
3111                             !IGNORE(header) && EXISTS(header))
3112                                 break;
3113                 }
3114                 if (header != NULL)
3115                         active = ISC_TRUE;
3116                 else
3117                         active = ISC_FALSE;
3118
3119                 if (node->wild)
3120                         wild = ISC_TRUE;
3121                 else
3122                         wild = ISC_FALSE;
3123
3124                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3125                             isc_rwlocktype_read);
3126
3127                 if (wild) {
3128                         /*
3129                          * Construct the wildcard name for this level.
3130                          */
3131                         dns_name_init(&name, NULL);
3132                         dns_rbt_namefromnode(node, &name);
3133                         dns_fixedname_init(&fwname);
3134                         wname = dns_fixedname_name(&fwname);
3135                         result = dns_name_concatenate(dns_wildcardname, &name,
3136                                                       wname, NULL);
3137                         j = i;
3138                         while (result == ISC_R_SUCCESS && j != 0) {
3139                                 j--;
3140                                 level_node = search->chain.levels[j];
3141                                 dns_name_init(&name, NULL);
3142                                 dns_rbt_namefromnode(level_node, &name);
3143                                 result = dns_name_concatenate(wname,
3144                                                               &name,
3145                                                               wname,
3146                                                               NULL);
3147                         }
3148                         if (result != ISC_R_SUCCESS)
3149                                 break;
3150
3151                         wnode = NULL;
3152                         dns_rbtnodechain_init(&wchain, NULL);
3153                         result = dns_rbt_findnode(rbtdb->tree, wname,
3154                                                   NULL, &wnode, &wchain,
3155                                                   DNS_RBTFIND_EMPTYDATA,
3156                                                   NULL, NULL);
3157                         if (result == ISC_R_SUCCESS) {
3158                                 nodelock_t *lock;
3159
3160                                 /*
3161                                  * We have found the wildcard node.  If it
3162                                  * is active in the search's version, we're
3163                                  * done.
3164                                  */
3165                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3166                                 NODE_LOCK(lock, isc_rwlocktype_read);
3167                                 for (header = wnode->data;
3168                                      header != NULL;
3169                                      header = header->next) {
3170                                         if (header->serial <= search->serial &&
3171                                             !IGNORE(header) && EXISTS(header))
3172                                                 break;
3173                                 }
3174                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3175                                 if (header != NULL ||
3176                                     activeempty(search, &wchain, wname)) {
3177                                         if (activeemtpynode(search, qname,
3178                                                             wname)) {
3179                                                 return (ISC_R_NOTFOUND);
3180                                         }
3181                                         /*
3182                                          * The wildcard node is active!
3183                                          *
3184                                          * Note: result is still ISC_R_SUCCESS
3185                                          * so we don't have to set it.
3186                                          */
3187                                         *nodep = wnode;
3188                                         break;
3189                                 }
3190                         } else if (result != ISC_R_NOTFOUND &&
3191                                    result != DNS_R_PARTIALMATCH) {
3192                                 /*
3193                                  * An error has occurred.  Bail out.
3194                                  */
3195                                 break;
3196                         }
3197                 }
3198
3199                 if (active) {
3200                         /*
3201                          * The level node is active.  Any wildcarding
3202                          * present at higher levels has no
3203                          * effect and we're done.
3204                          */
3205                         result = ISC_R_NOTFOUND;
3206                         break;
3207                 }
3208
3209                 if (i > 0) {
3210                         i--;
3211                         node = search->chain.levels[i];
3212                 } else
3213                         done = ISC_TRUE;
3214         } while (!done);
3215
3216         return (result);
3217 }
3218
3219 static isc_boolean_t
3220 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3221 {
3222         dns_rdata_t rdata = DNS_RDATA_INIT;
3223         dns_rdata_nsec3_t nsec3;
3224         unsigned char *raw;                     /* RDATASLAB */
3225         unsigned int rdlen, count;
3226         isc_region_t region;
3227         isc_result_t result;
3228
3229         REQUIRE(header->type == dns_rdatatype_nsec3);
3230
3231         raw = (unsigned char *)header + sizeof(*header);
3232         count = raw[0] * 256 + raw[1]; /* count */
3233 #if DNS_RDATASET_FIXED
3234         raw += count * 4 + 2;
3235 #else
3236         raw += 2;
3237 #endif
3238         while (count-- > 0) {
3239                 rdlen = raw[0] * 256 + raw[1];
3240 #if DNS_RDATASET_FIXED
3241                 raw += 4;
3242 #else
3243                 raw += 2;
3244 #endif
3245                 region.base = raw;
3246                 region.length = rdlen;
3247                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3248                                      dns_rdatatype_nsec3, &region);
3249                 raw += rdlen;
3250                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3251                 INSIST(result == ISC_R_SUCCESS);
3252                 if (nsec3.hash == search->rbtversion->hash &&
3253                     nsec3.iterations == search->rbtversion->iterations &&
3254                     nsec3.salt_length == search->rbtversion->salt_length &&
3255                     memcmp(nsec3.salt, search->rbtversion->salt,
3256                            nsec3.salt_length) == 0)
3257                         return (ISC_TRUE);
3258                 dns_rdata_reset(&rdata);
3259         }
3260         return (ISC_FALSE);
3261 }
3262
3263 /*
3264  * Find node of the NSEC/NSEC3 record that is 'name'.
3265  */
3266 static inline isc_result_t
3267 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3268                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3269                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3270                   dns_db_secure_t secure)
3271 {
3272         dns_rbtnode_t *node;
3273         rdatasetheader_t *header, *header_next, *found, *foundsig;
3274         isc_boolean_t empty_node;
3275         isc_result_t result;
3276         dns_fixedname_t fname, forigin;
3277         dns_name_t *name, *origin;
3278         dns_rdatatype_t type;
3279         rbtdb_rdatatype_t sigtype;
3280         isc_boolean_t wraps;
3281         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3282
3283         if (tree == search->rbtdb->nsec3) {
3284                 type = dns_rdatatype_nsec3;
3285                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3286                 wraps = ISC_TRUE;
3287         } else {
3288                 type = dns_rdatatype_nsec;
3289                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3290                 wraps = ISC_FALSE;
3291         }
3292
3293  again:
3294         do {
3295                 node = NULL;
3296                 dns_fixedname_init(&fname);
3297                 name = dns_fixedname_name(&fname);
3298                 dns_fixedname_init(&forigin);
3299                 origin = dns_fixedname_name(&forigin);
3300                 result = dns_rbtnodechain_current(&search->chain, name,
3301                                                   origin, &node);
3302                 if (result != ISC_R_SUCCESS)
3303                         return (result);
3304                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3305                           isc_rwlocktype_read);
3306                 found = NULL;
3307                 foundsig = NULL;
3308                 empty_node = ISC_TRUE;
3309                 for (header = node->data;
3310                      header != NULL;
3311                      header = header_next) {
3312                         header_next = header->next;
3313                         /*
3314                          * Look for an active, extant NSEC or RRSIG NSEC.
3315                          */
3316                         do {
3317                                 if (header->serial <= search->serial &&
3318                                     !IGNORE(header)) {
3319                                         /*
3320                                          * Is this a "this rdataset doesn't
3321                                          * exist" record?
3322                                          */
3323                                         if (NONEXISTENT(header))
3324                                                 header = NULL;
3325                                         break;
3326                                 } else
3327                                         header = header->down;
3328                         } while (header != NULL);
3329                         if (header != NULL) {
3330                                 /*
3331                                  * We now know that there is at least one
3332                                  * active rdataset at this node.
3333                                  */
3334                                 empty_node = ISC_FALSE;
3335                                 if (header->type == type) {
3336                                         found = header;
3337                                         if (foundsig != NULL)
3338                                                 break;
3339                                 } else if (header->type == sigtype) {
3340                                         foundsig = header;
3341                                         if (found != NULL)
3342                                                 break;
3343                                 }
3344                         }
3345                 }
3346                 if (!empty_node) {
3347                         if (found != NULL && search->rbtversion->havensec3 &&
3348                             found->type == dns_rdatatype_nsec3 &&
3349                             !matchparams(found, search)) {
3350                                 empty_node = ISC_TRUE;
3351                                 found = NULL;
3352                                 foundsig = NULL;
3353                                 result = dns_rbtnodechain_prev(&search->chain,
3354                                                                NULL, NULL);
3355                         } else if (found != NULL &&
3356                                    (foundsig != NULL || !need_sig))
3357                         {
3358                                 /*
3359                                  * We've found the right NSEC/NSEC3 record.
3360                                  *
3361                                  * Note: for this to really be the right
3362                                  * NSEC record, it's essential that the NSEC
3363                                  * records of any nodes obscured by a zone
3364                                  * cut have been removed; we assume this is
3365                                  * the case.
3366                                  */
3367                                 result = dns_name_concatenate(name, origin,
3368                                                               foundname, NULL);
3369                                 if (result == ISC_R_SUCCESS) {
3370                                         if (nodep != NULL) {
3371                                                 new_reference(search->rbtdb,
3372                                                               node);
3373                                                 *nodep = node;
3374                                         }
3375                                         bind_rdataset(search->rbtdb, node,
3376                                                       found, search->now,
3377                                                       rdataset);
3378                                         if (foundsig != NULL)
3379                                                 bind_rdataset(search->rbtdb,
3380                                                               node,
3381                                                               foundsig,
3382                                                               search->now,
3383                                                               sigrdataset);
3384                                 }
3385                         } else if (found == NULL && foundsig == NULL) {
3386                                 /*
3387                                  * This node is active, but has no NSEC or
3388                                  * RRSIG NSEC.  That means it's glue or
3389                                  * other obscured zone data that isn't
3390                                  * relevant for our search.  Treat the
3391                                  * node as if it were empty and keep looking.
3392                                  */
3393                                 empty_node = ISC_TRUE;
3394                                 result = dns_rbtnodechain_prev(&search->chain,
3395                                                                NULL, NULL);
3396                         } else {
3397                                 /*
3398                                  * We found an active node, but either the
3399                                  * NSEC or the RRSIG NSEC is missing.  This
3400                                  * shouldn't happen.
3401                                  */
3402                                 result = DNS_R_BADDB;
3403                         }
3404                 } else {
3405                         /*
3406                          * This node isn't active.  We've got to keep
3407                          * looking.
3408                          */
3409                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3410                                                        NULL);
3411                 }
3412                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3413                             isc_rwlocktype_read);
3414         } while (empty_node && result == ISC_R_SUCCESS);
3415
3416         if (result == ISC_R_NOMORE && wraps) {
3417                 result = dns_rbtnodechain_last(&search->chain, tree,
3418                                                NULL, NULL);
3419                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3420                         wraps = ISC_FALSE;
3421                         goto again;
3422                 }
3423         }
3424
3425         /*
3426          * If the result is ISC_R_NOMORE, then we got to the beginning of
3427          * the database and didn't find a NSEC record.  This shouldn't
3428          * happen.
3429          */
3430         if (result == ISC_R_NOMORE)
3431                 result = DNS_R_BADDB;
3432
3433         return (result);
3434 }
3435
3436 static isc_result_t
3437 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3438           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3439           dns_dbnode_t **nodep, dns_name_t *foundname,
3440           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3441 {
3442         dns_rbtnode_t *node = NULL;
3443         isc_result_t result;
3444         rbtdb_search_t search;
3445         isc_boolean_t cname_ok = ISC_TRUE;
3446         isc_boolean_t close_version = ISC_FALSE;
3447         isc_boolean_t maybe_zonecut = ISC_FALSE;
3448         isc_boolean_t at_zonecut = ISC_FALSE;
3449         isc_boolean_t wild;
3450         isc_boolean_t empty_node;
3451         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3452         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3453         rbtdb_rdatatype_t sigtype;
3454         isc_boolean_t active;
3455         dns_rbtnodechain_t chain;
3456         nodelock_t *lock;
3457         dns_rbt_t *tree;
3458
3459         search.rbtdb = (dns_rbtdb_t *)db;
3460
3461         REQUIRE(VALID_RBTDB(search.rbtdb));
3462         INSIST(version == NULL ||
3463                ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
3464
3465         /*
3466          * We don't care about 'now'.
3467          */
3468         UNUSED(now);
3469
3470         /*
3471          * If the caller didn't supply a version, attach to the current
3472          * version.
3473          */
3474         if (version == NULL) {
3475                 currentversion(db, &version);
3476                 close_version = ISC_TRUE;
3477         }
3478
3479         search.rbtversion = version;
3480         search.serial = search.rbtversion->serial;
3481         search.options = options;
3482         search.copy_name = ISC_FALSE;
3483         search.need_cleanup = ISC_FALSE;
3484         search.wild = ISC_FALSE;
3485         search.zonecut = NULL;
3486         dns_fixedname_init(&search.zonecut_name);
3487         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3488         search.now = 0;
3489
3490         /*
3491          * 'wild' will be true iff. we've matched a wildcard.
3492          */
3493         wild = ISC_FALSE;
3494
3495         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3496
3497         /*
3498          * Search down from the root of the tree.  If, while going down, we
3499          * encounter a callback node, zone_zonecut_callback() will search the
3500          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3501          */
3502         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3503                                                          search.rbtdb->tree;
3504         result = dns_rbt_findnode(tree, name, foundname, &node,
3505                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3506                                   zone_zonecut_callback, &search);
3507
3508         if (result == DNS_R_PARTIALMATCH) {
3509         partial_match:
3510                 if (search.zonecut != NULL) {
3511                     result = setup_delegation(&search, nodep, foundname,
3512                                               rdataset, sigrdataset);
3513                     goto tree_exit;
3514                 }
3515
3516                 if (search.wild) {
3517                         /*
3518                          * At least one of the levels in the search chain
3519                          * potentially has a wildcard.  For each such level,
3520                          * we must see if there's a matching wildcard active
3521                          * in the current version.
3522                          */
3523                         result = find_wildcard(&search, &node, name);
3524                         if (result == ISC_R_SUCCESS) {
3525                                 result = dns_name_copy(name, foundname, NULL);
3526                                 if (result != ISC_R_SUCCESS)
3527                                         goto tree_exit;
3528                                 wild = ISC_TRUE;
3529                                 goto found;
3530                         }
3531                         else if (result != ISC_R_NOTFOUND)
3532                                 goto tree_exit;
3533                 }
3534
3535                 chain = search.chain;
3536                 active = activeempty(&search, &chain, name);
3537
3538                 /*
3539                  * If we're here, then the name does not exist, is not
3540                  * beneath a zonecut, and there's no matching wildcard.
3541                  */
3542                 if ((search.rbtversion->secure == dns_db_secure &&
3543                      !search.rbtversion->havensec3) ||
3544                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3545                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3546                 {
3547                         result = find_closest_nsec(&search, nodep, foundname,
3548                                                    rdataset, sigrdataset, tree,
3549                                                    search.rbtversion->secure);
3550                         if (result == ISC_R_SUCCESS)
3551                                 result = active ? DNS_R_EMPTYNAME :
3552                                                   DNS_R_NXDOMAIN;
3553                 } else
3554                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3555                 goto tree_exit;
3556         } else if (result != ISC_R_SUCCESS)
3557                 goto tree_exit;
3558
3559  found:
3560         /*
3561          * We have found a node whose name is the desired name, or we
3562          * have matched a wildcard.
3563          */
3564
3565         if (search.zonecut != NULL) {
3566                 /*
3567                  * If we're beneath a zone cut, we don't want to look for
3568                  * CNAMEs because they're not legitimate zone glue.
3569                  */
3570                 cname_ok = ISC_FALSE;
3571         } else {
3572                 /*
3573                  * The node may be a zone cut itself.  If it might be one,
3574                  * make sure we check for it later.
3575                  *
3576                  * DS records live above the zone cut in ordinary zone so
3577                  * we want to ignore any referral.
3578                  *
3579                  * Stub zones don't have anything "above" the delgation so
3580                  * we always return a referral.
3581                  */
3582                 if (node->find_callback &&
3583                     ((node != search.rbtdb->origin_node &&
3584                       !dns_rdatatype_atparent(type)) ||
3585                      IS_STUB(search.rbtdb)))
3586                         maybe_zonecut = ISC_TRUE;
3587         }
3588
3589         /*
3590          * Certain DNSSEC types are not subject to CNAME matching
3591          * (RFC4035, section 2.5 and RFC3007).
3592          *
3593          * We don't check for RRSIG, because we don't store RRSIG records
3594          * directly.
3595          */
3596         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3597                 cname_ok = ISC_FALSE;
3598
3599         /*
3600          * We now go looking for rdata...
3601          */
3602
3603         lock = &search.rbtdb->node_locks[node->locknum].lock;
3604         NODE_LOCK(lock, isc_rwlocktype_read);
3605
3606         found = NULL;
3607         foundsig = NULL;
3608         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3609         nsecheader = NULL;
3610         nsecsig = NULL;
3611         cnamesig = NULL;
3612         empty_node = ISC_TRUE;
3613         for (header = node->data; header != NULL; header = header_next) {
3614                 header_next = header->next;
3615                 /*
3616                  * Look for an active, extant rdataset.
3617                  */
3618                 do {
3619                         if (header->serial <= search.serial &&
3620                             !IGNORE(header)) {
3621                                 /*
3622                                  * Is this a "this rdataset doesn't
3623                                  * exist" record?
3624                                  */
3625                                 if (NONEXISTENT(header))
3626                                         header = NULL;
3627                                 break;
3628                         } else
3629                                 header = header->down;
3630                 } while (header != NULL);
3631                 if (header != NULL) {
3632                         /*
3633                          * We now know that there is at least one active
3634                          * rdataset at this node.
3635                          */
3636                         empty_node = ISC_FALSE;
3637
3638                         /*
3639                          * Do special zone cut handling, if requested.
3640                          */
3641                         if (maybe_zonecut &&
3642                             header->type == dns_rdatatype_ns) {
3643                                 /*
3644                                  * We increment the reference count on node to
3645                                  * ensure that search->zonecut_rdataset will
3646                                  * still be valid later.
3647                                  */
3648                                 new_reference(search.rbtdb, node);
3649                                 search.zonecut = node;
3650                                 search.zonecut_rdataset = header;
3651                                 search.zonecut_sigrdataset = NULL;
3652                                 search.need_cleanup = ISC_TRUE;
3653                                 maybe_zonecut = ISC_FALSE;
3654                                 at_zonecut = ISC_TRUE;
3655                                 /*
3656                                  * It is not clear if KEY should still be
3657                                  * allowed at the parent side of the zone
3658                                  * cut or not.  It is needed for RFC3007
3659                                  * validated updates.
3660                                  */
3661                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3662                                     && type != dns_rdatatype_nsec
3663                                     && type != dns_rdatatype_key) {
3664                                         /*
3665                                          * Glue is not OK, but any answer we
3666                                          * could return would be glue.  Return
3667                                          * the delegation.
3668                                          */
3669                                         found = NULL;
3670                                         break;
3671                                 }
3672                                 if (found != NULL && foundsig != NULL)
3673                                         break;
3674                         }
3675
3676
3677                         /*
3678                          * If the NSEC3 record doesn't match the chain
3679                          * we are using behave as if it isn't here.
3680                          */
3681                         if (header->type == dns_rdatatype_nsec3 &&
3682                            !matchparams(header, &search)) {
3683                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3684                                 goto partial_match;
3685                         }
3686                         /*
3687                          * If we found a type we were looking for,
3688                          * remember it.
3689                          */
3690                         if (header->type == type ||
3691                             type == dns_rdatatype_any ||
3692                             (header->type == dns_rdatatype_cname &&
3693                              cname_ok)) {
3694                                 /*
3695                                  * We've found the answer!
3696                                  */
3697                                 found = header;
3698                                 if (header->type == dns_rdatatype_cname &&
3699                                     cname_ok) {
3700                                         /*
3701                                          * We may be finding a CNAME instead
3702                                          * of the desired type.
3703                                          *
3704                                          * If we've already got the CNAME RRSIG,
3705                                          * use it, otherwise change sigtype
3706                                          * so that we find it.
3707                                          */
3708                                         if (cnamesig != NULL)
3709                                                 foundsig = cnamesig;
3710                                         else
3711                                                 sigtype =
3712                                                     RBTDB_RDATATYPE_SIGCNAME;
3713                                 }
3714                                 /*
3715                                  * If we've got all we need, end the search.
3716                                  */
3717                                 if (!maybe_zonecut && foundsig != NULL)
3718                                         break;
3719                         } else if (header->type == sigtype) {
3720                                 /*
3721                                  * We've found the RRSIG rdataset for our
3722                                  * target type.  Remember it.
3723                                  */
3724                                 foundsig = header;
3725                                 /*
3726                                  * If we've got all we need, end the search.
3727                                  */
3728                                 if (!maybe_zonecut && found != NULL)
3729                                         break;
3730                         } else if (header->type == dns_rdatatype_nsec &&
3731                                    !search.rbtversion->havensec3) {
3732                                 /*
3733                                  * Remember a NSEC rdataset even if we're
3734                                  * not specifically looking for it, because
3735                                  * we might need it later.
3736                                  */
3737                                 nsecheader = header;
3738                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3739                                    !search.rbtversion->havensec3) {
3740                                 /*
3741                                  * If we need the NSEC rdataset, we'll also
3742                                  * need its signature.
3743                                  */
3744                                 nsecsig = header;
3745                         } else if (cname_ok &&
3746                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3747                                 /*
3748                                  * If we get a CNAME match, we'll also need
3749                                  * its signature.
3750                                  */
3751                                 cnamesig = header;
3752                         }
3753                 }
3754         }
3755
3756         if (empty_node) {
3757                 /*
3758                  * We have an exact match for the name, but there are no
3759                  * active rdatasets in the desired version.  That means that
3760                  * this node doesn't exist in the desired version, and that
3761                  * we really have a partial match.
3762                  */
3763                 if (!wild) {
3764                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3765                         goto partial_match;
3766                 }
3767         }
3768
3769         /*
3770          * If we didn't find what we were looking for...
3771          */
3772         if (found == NULL) {
3773                 if (search.zonecut != NULL) {
3774                         /*
3775                          * We were trying to find glue at a node beneath a
3776                          * zone cut, but didn't.
3777                          *
3778                          * Return the delegation.
3779                          */
3780                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3781                         result = setup_delegation(&search, nodep, foundname,
3782                                                   rdataset, sigrdataset);
3783                         goto tree_exit;
3784                 }
3785                 /*
3786                  * The desired type doesn't exist.
3787                  */
3788                 result = DNS_R_NXRRSET;
3789                 if (search.rbtversion->secure == dns_db_secure &&
3790                     !search.rbtversion->havensec3 &&
3791                     (nsecheader == NULL || nsecsig == NULL)) {
3792                         /*
3793                          * The zone is secure but there's no NSEC,
3794                          * or the NSEC has no signature!
3795                          */
3796                         if (!wild) {
3797                                 result = DNS_R_BADDB;
3798                                 goto node_exit;
3799                         }
3800
3801                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3802                         result = find_closest_nsec(&search, nodep, foundname,
3803                                                    rdataset, sigrdataset,
3804                                                    search.rbtdb->tree,
3805                                                    search.rbtversion->secure);
3806                         if (result == ISC_R_SUCCESS)
3807                                 result = DNS_R_EMPTYWILD;
3808                         goto tree_exit;
3809                 }
3810                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3811                     nsecheader == NULL)
3812                 {
3813                         /*
3814                          * There's no NSEC record, and we were told
3815                          * to find one.
3816                          */
3817                         result = DNS_R_BADDB;
3818                         goto node_exit;
3819                 }
3820                 if (nodep != NULL) {
3821                         new_reference(search.rbtdb, node);
3822                         *nodep = node;
3823                 }
3824                 if ((search.rbtversion->secure == dns_db_secure &&
3825                      !search.rbtversion->havensec3) ||
3826                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3827                 {
3828                         bind_rdataset(search.rbtdb, node, nsecheader,
3829                                       0, rdataset);
3830                         if (nsecsig != NULL)
3831                                 bind_rdataset(search.rbtdb, node,
3832                                               nsecsig, 0, sigrdataset);
3833                 }
3834                 if (wild)
3835                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3836                 goto node_exit;
3837         }
3838
3839         /*
3840          * We found what we were looking for, or we found a CNAME.
3841          */
3842
3843         if (type != found->type &&
3844             type != dns_rdatatype_any &&
3845             found->type == dns_rdatatype_cname) {
3846                 /*
3847                  * We weren't doing an ANY query and we found a CNAME instead
3848                  * of the type we were looking for, so we need to indicate
3849                  * that result to the caller.
3850                  */
3851                 result = DNS_R_CNAME;
3852         } else if (search.zonecut != NULL) {
3853                 /*
3854                  * If we're beneath a zone cut, we must indicate that the
3855                  * result is glue, unless we're actually at the zone cut
3856                  * and the type is NSEC or KEY.
3857                  */
3858                 if (search.zonecut == node) {
3859                         /*
3860                          * It is not clear if KEY should still be
3861                          * allowed at the parent side of the zone
3862                          * cut or not.  It is needed for RFC3007
3863                          * validated updates.
3864                          */
3865                         if (type == dns_rdatatype_nsec ||
3866                             type == dns_rdatatype_nsec3 ||
3867                             type == dns_rdatatype_key)
3868                                 result = ISC_R_SUCCESS;
3869                         else if (type == dns_rdatatype_any)
3870                                 result = DNS_R_ZONECUT;
3871                         else
3872                                 result = DNS_R_GLUE;
3873                 } else
3874                         result = DNS_R_GLUE;
3875                 /*
3876                  * We might have found data that isn't glue, but was occluded
3877                  * by a dynamic update.  If the caller cares about this, they
3878                  * will have told us to validate glue.
3879                  *
3880                  * XXX We should cache the glue validity state!
3881                  */
3882                 if (result == DNS_R_GLUE &&
3883                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3884                     !valid_glue(&search, foundname, type, node)) {
3885                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3886                         result = setup_delegation(&search, nodep, foundname,
3887                                                   rdataset, sigrdataset);
3888                     goto tree_exit;
3889                 }
3890         } else {
3891                 /*
3892                  * An ordinary successful query!
3893                  */
3894                 result = ISC_R_SUCCESS;
3895         }
3896
3897         if (nodep != NULL) {
3898                 if (!at_zonecut)
3899                         new_reference(search.rbtdb, node);
3900                 else
3901                         search.need_cleanup = ISC_FALSE;
3902                 *nodep = node;
3903         }
3904
3905         if (type != dns_rdatatype_any) {
3906                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3907                 if (foundsig != NULL)
3908                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3909                                       sigrdataset);
3910         }
3911
3912         if (wild)
3913                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3914
3915  node_exit:
3916         NODE_UNLOCK(lock, isc_rwlocktype_read);
3917
3918  tree_exit:
3919         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3920
3921         /*
3922          * If we found a zonecut but aren't going to use it, we have to
3923          * let go of it.
3924          */
3925         if (search.need_cleanup) {
3926                 node = search.zonecut;
3927                 INSIST(node != NULL);
3928                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3929
3930                 NODE_LOCK(lock, isc_rwlocktype_read);
3931                 decrement_reference(search.rbtdb, node, 0,
3932                                     isc_rwlocktype_read, isc_rwlocktype_none,
3933                                     ISC_FALSE);
3934                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3935         }
3936
3937         if (close_version)
3938                 closeversion(db, &version, ISC_FALSE);
3939
3940         dns_rbtnodechain_reset(&search.chain);
3941
3942         return (result);
3943 }
3944
3945 static isc_result_t
3946 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3947                  isc_stdtime_t now, dns_dbnode_t **nodep,
3948                  dns_name_t *foundname,
3949                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3950 {
3951         UNUSED(db);
3952         UNUSED(name);
3953         UNUSED(options);
3954         UNUSED(now);
3955         UNUSED(nodep);
3956         UNUSED(foundname);
3957         UNUSED(rdataset);
3958         UNUSED(sigrdataset);
3959
3960         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3961
3962         return (ISC_R_NOTIMPLEMENTED);
3963 }
3964
3965 static isc_result_t
3966 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3967         rbtdb_search_t *search = arg;
3968         rdatasetheader_t *header, *header_prev, *header_next;
3969         rdatasetheader_t *dname_header, *sigdname_header;
3970         isc_result_t result;
3971         nodelock_t *lock;
3972         isc_rwlocktype_t locktype;
3973
3974         /* XXX comment */
3975
3976         REQUIRE(search->zonecut == NULL);
3977
3978         /*
3979          * Keep compiler silent.
3980          */
3981         UNUSED(name);
3982
3983         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3984         locktype = isc_rwlocktype_read;
3985         NODE_LOCK(lock, locktype);
3986
3987         /*
3988          * Look for a DNAME or RRSIG DNAME rdataset.
3989          */
3990         dname_header = NULL;
3991         sigdname_header = NULL;
3992         header_prev = NULL;
3993         for (header = node->data; header != NULL; header = header_next) {
3994                 header_next = header->next;
3995                 if (header->rdh_ttl <= search->now) {
3996                         /*
3997                          * This rdataset is stale.  If no one else is
3998                          * using the node, we can clean it up right
3999                          * now, otherwise we mark it as stale, and
4000                          * the node as dirty, so it will get cleaned
4001                          * up later.
4002                          */
4003                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4004                             (locktype == isc_rwlocktype_write ||
4005                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4006                                 /*
4007                                  * We update the node's status only when we
4008                                  * can get write access; otherwise, we leave
4009                                  * others to this work.  Periodical cleaning
4010                                  * will eventually take the job as the last
4011                                  * resort.
4012                                  * We won't downgrade the lock, since other
4013                                  * rdatasets are probably stale, too.
4014                                  */
4015                                 locktype = isc_rwlocktype_write;
4016
4017                                 if (dns_rbtnode_refcurrent(node) == 0) {
4018                                         isc_mem_t *mctx;
4019
4020                                         /*
4021                                          * header->down can be non-NULL if the
4022                                          * refcount has just decremented to 0
4023                                          * but decrement_reference() has not
4024                                          * performed clean_cache_node(), in
4025                                          * which case we need to purge the
4026                                          * stale headers first.
4027                                          */
4028                                         mctx = search->rbtdb->common.mctx;
4029                                         clean_stale_headers(search->rbtdb,
4030                                                             mctx,
4031                                                             header);
4032                                         if (header_prev != NULL)
4033                                                 header_prev->next =
4034                                                         header->next;
4035                                         else
4036                                                 node->data = header->next;
4037                                         free_rdataset(search->rbtdb, mctx,
4038                                                       header);
4039                                 } else {
4040                                         header->attributes |=
4041                                                 RDATASET_ATTR_STALE;
4042                                         node->dirty = 1;
4043                                         header_prev = header;
4044                                 }
4045                         } else
4046                                 header_prev = header;
4047                 } else if (header->type == dns_rdatatype_dname &&
4048                            EXISTS(header)) {
4049                         dname_header = header;
4050                         header_prev = header;
4051                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4052                          EXISTS(header)) {
4053                         sigdname_header = header;
4054                         header_prev = header;
4055                 } else
4056                         header_prev = header;
4057         }
4058
4059         if (dname_header != NULL &&
4060             (!DNS_TRUST_PENDING(dname_header->trust) ||
4061              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4062                 /*
4063                  * We increment the reference count on node to ensure that
4064                  * search->zonecut_rdataset will still be valid later.
4065                  */
4066                 new_reference(search->rbtdb, node);
4067                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4068                 search->zonecut = node;
4069                 search->zonecut_rdataset = dname_header;
4070                 search->zonecut_sigrdataset = sigdname_header;
4071                 search->need_cleanup = ISC_TRUE;
4072                 result = DNS_R_PARTIALMATCH;
4073         } else
4074                 result = DNS_R_CONTINUE;
4075
4076         NODE_UNLOCK(lock, locktype);
4077
4078         return (result);
4079 }
4080
4081 static inline isc_result_t
4082 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4083                      dns_dbnode_t **nodep, dns_name_t *foundname,
4084                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4085 {
4086         unsigned int i;
4087         dns_rbtnode_t *level_node;
4088         rdatasetheader_t *header, *header_prev, *header_next;
4089         rdatasetheader_t *found, *foundsig;
4090         isc_result_t result = ISC_R_NOTFOUND;
4091         dns_name_t name;
4092         dns_rbtdb_t *rbtdb;
4093         isc_boolean_t done;
4094         nodelock_t *lock;
4095         isc_rwlocktype_t locktype;
4096
4097         /*
4098          * Caller must be holding the tree lock.
4099          */
4100
4101         rbtdb = search->rbtdb;
4102         i = search->chain.level_matches;
4103         done = ISC_FALSE;
4104         do {
4105                 locktype = isc_rwlocktype_read;
4106                 lock = &rbtdb->node_locks[node->locknum].lock;
4107                 NODE_LOCK(lock, locktype);
4108
4109                 /*
4110                  * Look for NS and RRSIG NS rdatasets.
4111                  */
4112                 found = NULL;
4113                 foundsig = NULL;
4114                 header_prev = NULL;
4115                 for (header = node->data;
4116                      header != NULL;
4117                      header = header_next) {
4118                         header_next = header->next;
4119                         if (header->rdh_ttl <= search->now) {
4120                                 /*
4121                                  * This rdataset is stale.  If no one else is
4122                                  * using the node, we can clean it up right
4123                                  * now, otherwise we mark it as stale, and
4124                                  * the node as dirty, so it will get cleaned
4125                                  * up later.
4126                                  */
4127                                 if ((header->rdh_ttl <= search->now -
4128                                                     RBTDB_VIRTUAL) &&
4129                                     (locktype == isc_rwlocktype_write ||
4130                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4131                                         /*
4132                                          * We update the node's status only
4133                                          * when we can get write access.
4134                                          */
4135                                         locktype = isc_rwlocktype_write;
4136
4137                                         if (dns_rbtnode_refcurrent(node)
4138                                             == 0) {
4139                                                 isc_mem_t *m;
4140
4141                                                 m = search->rbtdb->common.mctx;
4142                                                 clean_stale_headers(
4143                                                         search->rbtdb,
4144                                                         m, header);
4145                                                 if (header_prev != NULL)
4146                                                         header_prev->next =
4147                                                                 header->next;
4148                                                 else
4149                                                         node->data =
4150                                                                 header->next;
4151                                                 free_rdataset(rbtdb, m,
4152                                                               header);
4153                                         } else {
4154                                                 header->attributes |=
4155                                                         RDATASET_ATTR_STALE;
4156                                                 node->dirty = 1;
4157                                                 header_prev = header;
4158                                         }
4159                                 } else
4160                                         header_prev = header;
4161                         } else if (EXISTS(header)) {
4162                                 /*
4163                                  * We've found an extant rdataset.  See if
4164                                  * we're interested in it.
4165                                  */
4166                                 if (header->type == dns_rdatatype_ns) {
4167                                         found = header;
4168                                         if (foundsig != NULL)
4169                                                 break;
4170                                 } else if (header->type ==
4171                                            RBTDB_RDATATYPE_SIGNS) {
4172                                         foundsig = header;
4173                                         if (found != NULL)
4174                                                 break;
4175                                 }
4176                                 header_prev = header;
4177                         } else
4178                                 header_prev = header;
4179                 }
4180
4181                 if (found != NULL) {
4182                         /*
4183                          * If we have to set foundname, we do it before
4184                          * anything else.  If we were to set foundname after
4185                          * we had set nodep or bound the rdataset, then we'd
4186                          * have to undo that work if dns_name_concatenate()
4187                          * failed.  By setting foundname first, there's
4188                          * nothing to undo if we have trouble.
4189                          */
4190                         if (foundname != NULL) {
4191                                 dns_name_init(&name, NULL);
4192                                 dns_rbt_namefromnode(node, &name);
4193                                 result = dns_name_copy(&name, foundname, NULL);
4194                                 while (result == ISC_R_SUCCESS && i > 0) {
4195                                         i--;
4196                                         level_node = search->chain.levels[i];
4197                                         dns_name_init(&name, NULL);
4198                                         dns_rbt_namefromnode(level_node,
4199                                                              &name);
4200                                         result =
4201                                                 dns_name_concatenate(foundname,
4202                                                                      &name,
4203                                                                      foundname,
4204                                                                      NULL);
4205                                 }
4206                                 if (result != ISC_R_SUCCESS) {
4207                                         *nodep = NULL;
4208                                         goto node_exit;
4209                                 }
4210                         }
4211                         result = DNS_R_DELEGATION;
4212                         if (nodep != NULL) {
4213                                 new_reference(search->rbtdb, node);
4214                                 *nodep = node;
4215                         }
4216                         bind_rdataset(search->rbtdb, node, found, search->now,
4217                                       rdataset);
4218                         if (foundsig != NULL)
4219                                 bind_rdataset(search->rbtdb, node, foundsig,
4220                                               search->now, sigrdataset);
4221                         if (need_headerupdate(found, search->now) ||
4222                             (foundsig != NULL &&
4223                              need_headerupdate(foundsig, search->now))) {
4224                                 if (locktype != isc_rwlocktype_write) {
4225                                         NODE_UNLOCK(lock, locktype);
4226                                         NODE_LOCK(lock, isc_rwlocktype_write);
4227                                         locktype = isc_rwlocktype_write;
4228                                         POST(locktype);
4229                                 }
4230                                 if (need_headerupdate(found, search->now))
4231                                         update_header(search->rbtdb, found,
4232                                                       search->now);
4233                                 if (foundsig != NULL &&
4234                                     need_headerupdate(foundsig, search->now)) {
4235                                         update_header(search->rbtdb, foundsig,
4236                                                       search->now);
4237                                 }
4238                         }
4239                 }
4240
4241         node_exit:
4242                 NODE_UNLOCK(lock, locktype);
4243
4244                 if (found == NULL && i > 0) {
4245                         i--;
4246                         node = search->chain.levels[i];
4247                 } else
4248                         done = ISC_TRUE;
4249
4250         } while (!done);
4251
4252         return (result);
4253 }
4254
4255 static isc_result_t
4256 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4257                   isc_stdtime_t now, dns_name_t *foundname,
4258                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4259 {
4260         dns_rbtnode_t *node;
4261         rdatasetheader_t *header, *header_next, *header_prev;
4262         rdatasetheader_t *found, *foundsig;
4263         isc_boolean_t empty_node;
4264         isc_result_t result;
4265         dns_fixedname_t fname, forigin;
4266         dns_name_t *name, *origin;
4267         rbtdb_rdatatype_t matchtype, sigmatchtype;
4268         nodelock_t *lock;
4269         isc_rwlocktype_t locktype;
4270
4271         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4272         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4273                                              dns_rdatatype_nsec);
4274
4275         do {
4276                 node = NULL;
4277                 dns_fixedname_init(&fname);
4278                 name = dns_fixedname_name(&fname);
4279                 dns_fixedname_init(&forigin);
4280                 origin = dns_fixedname_name(&forigin);
4281                 result = dns_rbtnodechain_current(&search->chain, name,
4282                                                   origin, &node);
4283                 if (result != ISC_R_SUCCESS)
4284                         return (result);
4285                 locktype = isc_rwlocktype_read;
4286                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4287                 NODE_LOCK(lock, locktype);
4288                 found = NULL;
4289                 foundsig = NULL;
4290                 empty_node = ISC_TRUE;
4291                 header_prev = NULL;
4292                 for (header = node->data;
4293                      header != NULL;
4294                      header = header_next) {
4295                         header_next = header->next;
4296                         if (header->rdh_ttl <= now) {
4297                                 /*
4298                                  * This rdataset is stale.  If no one else is
4299                                  * using the node, we can clean it up right
4300                                  * now, otherwise we mark it as stale, and the
4301                                  * node as dirty, so it will get cleaned up
4302                                  * later.
4303                                  */
4304                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4305                                     (locktype == isc_rwlocktype_write ||
4306                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4307                                         /*
4308                                          * We update the node's status only
4309                                          * when we can get write access.
4310                                          */
4311                                         locktype = isc_rwlocktype_write;
4312
4313                                         if (dns_rbtnode_refcurrent(node)
4314                                             == 0) {
4315                                                 isc_mem_t *m;
4316
4317                                                 m = search->rbtdb->common.mctx;
4318                                                 clean_stale_headers(
4319                                                         search->rbtdb,
4320                                                         m, header);
4321                                                 if (header_prev != NULL)
4322                                                         header_prev->next =
4323                                                                 header->next;
4324                                                 else
4325                                                         node->data = header->next;
4326                                                 free_rdataset(search->rbtdb, m,
4327                                                               header);
4328                                         } else {
4329                                                 header->attributes |=
4330                                                         RDATASET_ATTR_STALE;
4331                                                 node->dirty = 1;
4332                                                 header_prev = header;
4333                                         }
4334                                 } else
4335                                         header_prev = header;
4336                                 continue;
4337                         }
4338                         if (NONEXISTENT(header) ||
4339                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4340                                 header_prev = header;
4341                                 continue;
4342                         }
4343                         empty_node = ISC_FALSE;
4344                         if (header->type == matchtype)
4345                                 found = header;
4346                         else if (header->type == sigmatchtype)
4347                                 foundsig = header;
4348                         header_prev = header;
4349                 }
4350                 if (found != NULL) {
4351                         result = dns_name_concatenate(name, origin,
4352                                                       foundname, NULL);
4353                         if (result != ISC_R_SUCCESS)
4354                                 goto unlock_node;
4355                         bind_rdataset(search->rbtdb, node, found,
4356                                       now, rdataset);
4357                         if (foundsig != NULL)
4358                                 bind_rdataset(search->rbtdb, node, foundsig,
4359                                               now, sigrdataset);
4360                         new_reference(search->rbtdb, node);
4361                         *nodep = node;
4362                         result = DNS_R_COVERINGNSEC;
4363                 } else if (!empty_node) {
4364                         result = ISC_R_NOTFOUND;
4365                 } else
4366                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4367                                                        NULL);
4368  unlock_node:
4369                 NODE_UNLOCK(lock, locktype);
4370         } while (empty_node && result == ISC_R_SUCCESS);
4371         return (result);
4372 }
4373
4374 static isc_result_t
4375 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4376            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4377            dns_dbnode_t **nodep, dns_name_t *foundname,
4378            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4379 {
4380         dns_rbtnode_t *node = NULL;
4381         isc_result_t result;
4382         rbtdb_search_t search;
4383         isc_boolean_t cname_ok = ISC_TRUE;
4384         isc_boolean_t empty_node;
4385         nodelock_t *lock;
4386         isc_rwlocktype_t locktype;
4387         rdatasetheader_t *header, *header_prev, *header_next;
4388         rdatasetheader_t *found, *nsheader;
4389         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4390         rdatasetheader_t *update, *updatesig;
4391         rbtdb_rdatatype_t sigtype, negtype;
4392
4393         UNUSED(version);
4394
4395         search.rbtdb = (dns_rbtdb_t *)db;
4396
4397         REQUIRE(VALID_RBTDB(search.rbtdb));
4398         REQUIRE(version == NULL);
4399
4400         if (now == 0)
4401                 isc_stdtime_get(&now);
4402
4403         search.rbtversion = NULL;
4404         search.serial = 1;
4405         search.options = options;
4406         search.copy_name = ISC_FALSE;
4407         search.need_cleanup = ISC_FALSE;
4408         search.wild = ISC_FALSE;
4409         search.zonecut = NULL;
4410         dns_fixedname_init(&search.zonecut_name);
4411         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4412         search.now = now;
4413         update = NULL;
4414         updatesig = NULL;
4415
4416         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4417
4418         /*
4419          * Search down from the root of the tree.  If, while going down, we
4420          * encounter a callback node, cache_zonecut_callback() will search the
4421          * rdatasets at the zone cut for a DNAME rdataset.
4422          */
4423         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4424                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4425                                   cache_zonecut_callback, &search);
4426
4427         if (result == DNS_R_PARTIALMATCH) {
4428                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4429                         result = find_coveringnsec(&search, nodep, now,
4430                                                    foundname, rdataset,
4431                                                    sigrdataset);
4432                         if (result == DNS_R_COVERINGNSEC)
4433                                 goto tree_exit;
4434                 }
4435                 if (search.zonecut != NULL) {
4436                     result = setup_delegation(&search, nodep, foundname,
4437                                               rdataset, sigrdataset);
4438                     goto tree_exit;
4439                 } else {
4440                 find_ns:
4441                         result = find_deepest_zonecut(&search, node, nodep,
4442                                                       foundname, rdataset,
4443                                                       sigrdataset);
4444                         goto tree_exit;
4445                 }
4446         } else if (result != ISC_R_SUCCESS)
4447                 goto tree_exit;
4448
4449         /*
4450          * Certain DNSSEC types are not subject to CNAME matching
4451          * (RFC4035, section 2.5 and RFC3007).
4452          *
4453          * We don't check for RRSIG, because we don't store RRSIG records
4454          * directly.
4455          */
4456         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4457                 cname_ok = ISC_FALSE;
4458
4459         /*
4460          * We now go looking for rdata...
4461          */
4462
4463         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4464         locktype = isc_rwlocktype_read;
4465         NODE_LOCK(lock, locktype);
4466
4467         found = NULL;
4468         foundsig = NULL;
4469         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4470         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4471         nsheader = NULL;
4472         nssig = NULL;
4473         cnamesig = NULL;
4474         empty_node = ISC_TRUE;
4475         header_prev = NULL;
4476         for (header = node->data; header != NULL; header = header_next) {
4477                 header_next = header->next;
4478                 if (header->rdh_ttl <= now) {
4479                         /*
4480                          * This rdataset is stale.  If no one else is using the
4481                          * node, we can clean it up right now, otherwise we
4482                          * mark it as stale, and the node as dirty, so it will
4483                          * get cleaned up later.
4484                          */
4485                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4486                             (locktype == isc_rwlocktype_write ||
4487                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4488                                 /*
4489                                  * We update the node's status only when we
4490                                  * can get write access.
4491                                  */
4492                                 locktype = isc_rwlocktype_write;
4493
4494                                 if (dns_rbtnode_refcurrent(node) == 0) {
4495                                         isc_mem_t *mctx;
4496
4497                                         mctx = search.rbtdb->common.mctx;
4498                                         clean_stale_headers(search.rbtdb, mctx,
4499                                                             header);
4500                                         if (header_prev != NULL)
4501                                                 header_prev->next =
4502                                                         header->next;
4503                                         else
4504                                                 node->data = header->next;
4505                                         free_rdataset(search.rbtdb, mctx,
4506                                                       header);
4507                                 } else {
4508                                         header->attributes |=
4509                                                 RDATASET_ATTR_STALE;
4510                                         node->dirty = 1;
4511                                         header_prev = header;
4512                                 }
4513                         } else
4514                                 header_prev = header;
4515                 } else if (EXISTS(header)) {
4516                         /*
4517                          * We now know that there is at least one active
4518                          * non-stale rdataset at this node.
4519                          */
4520                         empty_node = ISC_FALSE;
4521
4522                         /*
4523                          * If we found a type we were looking for, remember
4524                          * it.
4525                          */
4526                         if (header->type == type ||
4527                             (type == dns_rdatatype_any &&
4528                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4529                             (cname_ok && header->type ==
4530                              dns_rdatatype_cname)) {
4531                                 /*
4532                                  * We've found the answer.
4533                                  */
4534                                 found = header;
4535                                 if (header->type == dns_rdatatype_cname &&
4536                                     cname_ok &&
4537                                     cnamesig != NULL) {
4538                                         /*
4539                                          * If we've already got the
4540                                          * CNAME RRSIG, use it.
4541                                          */
4542                                         foundsig = cnamesig;
4543                                 }
4544                         } else if (header->type == sigtype) {
4545                                 /*
4546                                  * We've found the RRSIG rdataset for our
4547                                  * target type.  Remember it.
4548                                  */
4549                                 foundsig = header;
4550                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4551                                    header->type == negtype) {
4552                                 /*
4553                                  * We've found a negative cache entry.
4554                                  */
4555                                 found = header;
4556                         } else if (header->type == dns_rdatatype_ns) {
4557                                 /*
4558                                  * Remember a NS rdataset even if we're
4559                                  * not specifically looking for it, because
4560                                  * we might need it later.
4561                                  */
4562                                 nsheader = header;
4563                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4564                                 /*
4565                                  * If we need the NS rdataset, we'll also
4566                                  * need its signature.
4567                                  */
4568                                 nssig = header;
4569                         } else if (cname_ok &&
4570                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4571                                 /*
4572                                  * If we get a CNAME match, we'll also need
4573                                  * its signature.
4574                                  */
4575                                 cnamesig = header;
4576                         }
4577                         header_prev = header;
4578                 } else
4579                         header_prev = header;
4580         }
4581
4582         if (empty_node) {
4583                 /*
4584                  * We have an exact match for the name, but there are no
4585                  * extant rdatasets.  That means that this node doesn't
4586                  * meaningfully exist, and that we really have a partial match.
4587                  */
4588                 NODE_UNLOCK(lock, locktype);
4589                 goto find_ns;
4590         }
4591
4592         /*
4593          * If we didn't find what we were looking for...
4594          */
4595         if (found == NULL ||
4596             (DNS_TRUST_ADDITIONAL(found->trust) &&
4597              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4598             (found->trust == dns_trust_glue &&
4599              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4600             (DNS_TRUST_PENDING(found->trust) &&
4601              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4602                 /*
4603                  * If there is an NS rdataset at this node, then this is the
4604                  * deepest zone cut.
4605                  */
4606                 if (nsheader != NULL) {
4607                         if (nodep != NULL) {
4608                                 new_reference(search.rbtdb, node);
4609                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4610                                 *nodep = node;
4611                         }
4612                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4613                                       rdataset);
4614                         if (need_headerupdate(nsheader, search.now))
4615                                 update = nsheader;
4616                         if (nssig != NULL) {
4617                                 bind_rdataset(search.rbtdb, node, nssig,
4618                                               search.now, sigrdataset);
4619                                 if (need_headerupdate(nssig, search.now))
4620                                         updatesig = nssig;
4621                         }
4622                         result = DNS_R_DELEGATION;
4623                         goto node_exit;
4624                 }
4625
4626                 /*
4627                  * Go find the deepest zone cut.
4628                  */
4629                 NODE_UNLOCK(lock, locktype);
4630                 goto find_ns;
4631         }
4632
4633         /*
4634          * We found what we were looking for, or we found a CNAME.
4635          */
4636
4637         if (nodep != NULL) {
4638                 new_reference(search.rbtdb, node);
4639                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4640                 *nodep = node;
4641         }
4642
4643         if (NEGATIVE(found)) {
4644                 /*
4645                  * We found a negative cache entry.
4646                  */
4647                 if (NXDOMAIN(found))
4648                         result = DNS_R_NCACHENXDOMAIN;
4649                 else
4650                         result = DNS_R_NCACHENXRRSET;
4651         } else if (type != found->type &&
4652                    type != dns_rdatatype_any &&
4653                    found->type == dns_rdatatype_cname) {
4654                 /*
4655                  * We weren't doing an ANY query and we found a CNAME instead
4656                  * of the type we were looking for, so we need to indicate
4657                  * that result to the caller.
4658                  */
4659                 result = DNS_R_CNAME;
4660         } else {
4661                 /*
4662                  * An ordinary successful query!
4663                  */
4664                 result = ISC_R_SUCCESS;
4665         }
4666
4667         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4668             result == DNS_R_NCACHENXRRSET) {
4669                 bind_rdataset(search.rbtdb, node, found, search.now,
4670                               rdataset);
4671                 if (need_headerupdate(found, search.now))
4672                         update = found;
4673                 if (!NEGATIVE(found) && foundsig != NULL) {
4674                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4675                                       sigrdataset);
4676                         if (need_headerupdate(foundsig, search.now))
4677                                 updatesig = foundsig;
4678                 }
4679         }
4680
4681  node_exit:
4682         if ((update != NULL || updatesig != NULL) &&
4683             locktype != isc_rwlocktype_write) {
4684                 NODE_UNLOCK(lock, locktype);
4685                 NODE_LOCK(lock, isc_rwlocktype_write);
4686                 locktype = isc_rwlocktype_write;
4687                 POST(locktype);
4688         }
4689         if (update != NULL && need_headerupdate(update, search.now))
4690                 update_header(search.rbtdb, update, search.now);
4691         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4692                 update_header(search.rbtdb, updatesig, search.now);
4693
4694         NODE_UNLOCK(lock, locktype);
4695
4696  tree_exit:
4697         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4698
4699         /*
4700          * If we found a zonecut but aren't going to use it, we have to
4701          * let go of it.
4702          */
4703         if (search.need_cleanup) {
4704                 node = search.zonecut;
4705                 INSIST(node != NULL);
4706                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4707
4708                 NODE_LOCK(lock, isc_rwlocktype_read);
4709                 decrement_reference(search.rbtdb, node, 0,
4710                                     isc_rwlocktype_read, isc_rwlocktype_none,
4711                                     ISC_FALSE);
4712                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4713         }
4714
4715         dns_rbtnodechain_reset(&search.chain);
4716
4717         return (result);
4718 }
4719
4720 static isc_result_t
4721 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4722                   isc_stdtime_t now, dns_dbnode_t **nodep,
4723                   dns_name_t *foundname,
4724                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4725 {
4726         dns_rbtnode_t *node = NULL;
4727         nodelock_t *lock;
4728         isc_result_t result;
4729         rbtdb_search_t search;
4730         rdatasetheader_t *header, *header_prev, *header_next;
4731         rdatasetheader_t *found, *foundsig;
4732         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4733         isc_rwlocktype_t locktype;
4734
4735         search.rbtdb = (dns_rbtdb_t *)db;
4736
4737         REQUIRE(VALID_RBTDB(search.rbtdb));
4738
4739         if (now == 0)
4740                 isc_stdtime_get(&now);
4741
4742         search.rbtversion = NULL;
4743         search.serial = 1;
4744         search.options = options;
4745         search.copy_name = ISC_FALSE;
4746         search.need_cleanup = ISC_FALSE;
4747         search.wild = ISC_FALSE;
4748         search.zonecut = NULL;
4749         dns_fixedname_init(&search.zonecut_name);
4750         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4751         search.now = now;
4752
4753         if ((options & DNS_DBFIND_NOEXACT) != 0)
4754                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4755
4756         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4757
4758         /*
4759          * Search down from the root of the tree.
4760          */
4761         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4762                                   &search.chain, rbtoptions, NULL, &search);
4763
4764         if (result == DNS_R_PARTIALMATCH) {
4765         find_ns:
4766                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4767                                               rdataset, sigrdataset);
4768                 goto tree_exit;
4769         } else if (result != ISC_R_SUCCESS)
4770                 goto tree_exit;
4771
4772         /*
4773          * We now go looking for an NS rdataset at the node.
4774          */
4775
4776         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4777         locktype = isc_rwlocktype_read;
4778         NODE_LOCK(lock, locktype);
4779
4780         found = NULL;
4781         foundsig = NULL;
4782         header_prev = NULL;
4783         for (header = node->data; header != NULL; header = header_next) {
4784                 header_next = header->next;
4785                 if (header->rdh_ttl <= now) {
4786                         /*
4787                          * This rdataset is stale.  If no one else is using the
4788                          * node, we can clean it up right now, otherwise we
4789                          * mark it as stale, and the node as dirty, so it will
4790                          * get cleaned up later.
4791                          */
4792                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4793                             (locktype == isc_rwlocktype_write ||
4794                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4795                                 /*
4796                                  * We update the node's status only when we
4797                                  * can get write access.
4798                                  */
4799                                 locktype = isc_rwlocktype_write;
4800
4801                                 if (dns_rbtnode_refcurrent(node) == 0) {
4802                                         isc_mem_t *mctx;
4803
4804                                         mctx = search.rbtdb->common.mctx;
4805                                         clean_stale_headers(search.rbtdb, mctx,
4806                                                             header);
4807                                         if (header_prev != NULL)
4808                                                 header_prev->next =
4809                                                         header->next;
4810                                         else
4811                                                 node->data = header->next;
4812                                         free_rdataset(search.rbtdb, mctx,
4813                                                       header);
4814                                 } else {
4815                                         header->attributes |=
4816                                                 RDATASET_ATTR_STALE;
4817                                         node->dirty = 1;
4818                                         header_prev = header;
4819                                 }
4820                         } else
4821                                 header_prev = header;
4822                 } else if (EXISTS(header)) {
4823                         /*
4824                          * If we found a type we were looking for, remember
4825                          * it.
4826                          */
4827                         if (header->type == dns_rdatatype_ns) {
4828                                 /*
4829                                  * Remember a NS rdataset even if we're
4830                                  * not specifically looking for it, because
4831                                  * we might need it later.
4832                                  */
4833                                 found = header;
4834                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4835                                 /*
4836                                  * If we need the NS rdataset, we'll also
4837                                  * need its signature.
4838                                  */
4839                                 foundsig = header;
4840                         }
4841                         header_prev = header;
4842                 } else
4843                         header_prev = header;
4844         }
4845
4846         if (found == NULL) {
4847                 /*
4848                  * No NS records here.
4849                  */
4850                 NODE_UNLOCK(lock, locktype);
4851                 goto find_ns;
4852         }
4853
4854         if (nodep != NULL) {
4855                 new_reference(search.rbtdb, node);
4856                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4857                 *nodep = node;
4858         }
4859
4860         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4861         if (foundsig != NULL)
4862                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4863                               sigrdataset);
4864
4865         if (need_headerupdate(found, search.now) ||
4866             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4867                 if (locktype != isc_rwlocktype_write) {
4868                         NODE_UNLOCK(lock, locktype);
4869                         NODE_LOCK(lock, isc_rwlocktype_write);
4870                         locktype = isc_rwlocktype_write;
4871                         POST(locktype);
4872                 }
4873                 if (need_headerupdate(found, search.now))
4874                         update_header(search.rbtdb, found, search.now);
4875                 if (foundsig != NULL &&
4876                     need_headerupdate(foundsig, search.now)) {
4877                         update_header(search.rbtdb, foundsig, search.now);
4878                 }
4879         }
4880
4881         NODE_UNLOCK(lock, locktype);
4882
4883  tree_exit:
4884         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4885
4886         INSIST(!search.need_cleanup);
4887
4888         dns_rbtnodechain_reset(&search.chain);
4889
4890         if (result == DNS_R_DELEGATION)
4891                 result = ISC_R_SUCCESS;
4892
4893         return (result);
4894 }
4895
4896 static void
4897 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4898         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4899         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4900         unsigned int refs;
4901
4902         REQUIRE(VALID_RBTDB(rbtdb));
4903         REQUIRE(targetp != NULL && *targetp == NULL);
4904
4905         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4906         dns_rbtnode_refincrement(node, &refs);
4907         INSIST(refs != 0);
4908         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4909
4910         *targetp = source;
4911 }
4912
4913 static void
4914 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4915         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4916         dns_rbtnode_t *node;
4917         isc_boolean_t want_free = ISC_FALSE;
4918         isc_boolean_t inactive = ISC_FALSE;
4919         rbtdb_nodelock_t *nodelock;
4920
4921         REQUIRE(VALID_RBTDB(rbtdb));
4922         REQUIRE(targetp != NULL && *targetp != NULL);
4923
4924         node = (dns_rbtnode_t *)(*targetp);
4925         nodelock = &rbtdb->node_locks[node->locknum];
4926
4927         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4928
4929         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4930                                 isc_rwlocktype_none, ISC_FALSE)) {
4931                 if (isc_refcount_current(&nodelock->references) == 0 &&
4932                     nodelock->exiting) {
4933                         inactive = ISC_TRUE;
4934                 }
4935         }
4936
4937         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4938
4939         *targetp = NULL;
4940
4941         if (inactive) {
4942                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4943                 rbtdb->active--;
4944                 if (rbtdb->active == 0)
4945                         want_free = ISC_TRUE;
4946                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4947                 if (want_free) {
4948                         char buf[DNS_NAME_FORMATSIZE];
4949                         if (dns_name_dynamic(&rbtdb->common.origin))
4950                                 dns_name_format(&rbtdb->common.origin, buf,
4951                                                 sizeof(buf));
4952                         else
4953                                 strcpy(buf, "<UNKNOWN>");
4954                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4955                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4956                                       "calling free_rbtdb(%s)", buf);
4957                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4958                 }
4959         }
4960 }
4961
4962 static isc_result_t
4963 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4964         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4965         dns_rbtnode_t *rbtnode = node;
4966         rdatasetheader_t *header;
4967         isc_boolean_t force_expire = ISC_FALSE;
4968         /*
4969          * These are the category and module used by the cache cleaner.
4970          */
4971         isc_boolean_t log = ISC_FALSE;
4972         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4973         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4974         int level = ISC_LOG_DEBUG(2);
4975         char printname[DNS_NAME_FORMATSIZE];
4976
4977         REQUIRE(VALID_RBTDB(rbtdb));
4978
4979         /*
4980          * Caller must hold a tree lock.
4981          */
4982
4983         if (now == 0)
4984                 isc_stdtime_get(&now);
4985
4986         if (isc_mem_isovermem(rbtdb->common.mctx)) {
4987                 isc_uint32_t val;
4988
4989                 isc_random_get(&val);
4990                 /*
4991                  * XXXDCL Could stand to have a better policy, like LRU.
4992                  */
4993                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4994
4995                 /*
4996                  * Note that 'log' can be true IFF overmem is also true.
4997                  * overmem can currently only be true for cache
4998                  * databases -- hence all of the "overmem cache" log strings.
4999                  */
5000                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5001                 if (log)
5002                         isc_log_write(dns_lctx, category, module, level,
5003                                       "overmem cache: %s %s",
5004                                       force_expire ? "FORCE" : "check",
5005                                       dns_rbt_formatnodename(rbtnode,
5006                                                            printname,
5007                                                            sizeof(printname)));
5008         }
5009
5010         /*
5011          * We may not need write access, but this code path is not performance
5012          * sensitive, so it should be okay to always lock as a writer.
5013          */
5014         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5015                   isc_rwlocktype_write);
5016
5017         for (header = rbtnode->data; header != NULL; header = header->next)
5018                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5019                         /*
5020                          * We don't check if refcurrent(rbtnode) == 0 and try
5021                          * to free like we do in cache_find(), because
5022                          * refcurrent(rbtnode) must be non-zero.  This is so
5023                          * because 'node' is an argument to the function.
5024                          */
5025                         header->attributes |= RDATASET_ATTR_STALE;
5026                         rbtnode->dirty = 1;
5027                         if (log)
5028                                 isc_log_write(dns_lctx, category, module,
5029                                               level, "overmem cache: stale %s",
5030                                               printname);
5031                 } else if (force_expire) {
5032                         if (! RETAIN(header)) {
5033                                 set_ttl(rbtdb, header, 0);
5034                                 header->attributes |= RDATASET_ATTR_STALE;
5035                                 rbtnode->dirty = 1;
5036                         } else if (log) {
5037                                 isc_log_write(dns_lctx, category, module,
5038                                               level, "overmem cache: "
5039                                               "reprieve by RETAIN() %s",
5040                                               printname);
5041                         }
5042                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5043                         isc_log_write(dns_lctx, category, module, level,
5044                                       "overmem cache: saved %s", printname);
5045
5046         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5047                     isc_rwlocktype_write);
5048
5049         return (ISC_R_SUCCESS);
5050 }
5051
5052 static void
5053 overmem(dns_db_t *db, isc_boolean_t overmem) {
5054         /* This is an empty callback.  See adb.c:water() */
5055
5056         UNUSED(db);
5057         UNUSED(overmem);
5058
5059         return;
5060 }
5061
5062 static void
5063 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5064         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5065         dns_rbtnode_t *rbtnode = node;
5066         isc_boolean_t first;
5067
5068         REQUIRE(VALID_RBTDB(rbtdb));
5069
5070         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5071                   isc_rwlocktype_read);
5072
5073         fprintf(out, "node %p, %u references, locknum = %u\n",
5074                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5075                 rbtnode->locknum);
5076         if (rbtnode->data != NULL) {
5077                 rdatasetheader_t *current, *top_next;
5078
5079                 for (current = rbtnode->data; current != NULL;
5080                      current = top_next) {
5081                         top_next = current->next;
5082                         first = ISC_TRUE;
5083                         fprintf(out, "\ttype %u", current->type);
5084                         do {
5085                                 if (!first)
5086                                         fprintf(out, "\t");
5087                                 first = ISC_FALSE;
5088                                 fprintf(out,
5089                                         "\tserial = %lu, ttl = %u, "
5090                                         "trust = %u, attributes = %u, "
5091                                         "resign = %u\n",
5092                                         (unsigned long)current->serial,
5093                                         current->rdh_ttl,
5094                                         current->trust,
5095                                         current->attributes,
5096                                         current->resign);
5097                                 current = current->down;
5098                         } while (current != NULL);
5099                 }
5100         } else
5101                 fprintf(out, "(empty)\n");
5102
5103         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5104                     isc_rwlocktype_read);
5105 }
5106
5107 static isc_result_t
5108 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5109 {
5110         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5111         rbtdb_dbiterator_t *rbtdbiter;
5112
5113         REQUIRE(VALID_RBTDB(rbtdb));
5114
5115         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5116         if (rbtdbiter == NULL)
5117                 return (ISC_R_NOMEMORY);
5118
5119         rbtdbiter->common.methods = &dbiterator_methods;
5120         rbtdbiter->common.db = NULL;
5121         dns_db_attach(db, &rbtdbiter->common.db);
5122         rbtdbiter->common.relative_names =
5123                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5124         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5125         rbtdbiter->common.cleaning = ISC_FALSE;
5126         rbtdbiter->paused = ISC_TRUE;
5127         rbtdbiter->tree_locked = isc_rwlocktype_none;
5128         rbtdbiter->result = ISC_R_SUCCESS;
5129         dns_fixedname_init(&rbtdbiter->name);
5130         dns_fixedname_init(&rbtdbiter->origin);
5131         rbtdbiter->node = NULL;
5132         rbtdbiter->delete = 0;
5133         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5134         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5135         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5136         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5137         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5138         if (rbtdbiter->nsec3only)
5139                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5140         else
5141                 rbtdbiter->current = &rbtdbiter->chain;
5142
5143         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5144
5145         return (ISC_R_SUCCESS);
5146 }
5147
5148 static isc_result_t
5149 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5150                   dns_rdatatype_t type, dns_rdatatype_t covers,
5151                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5152                   dns_rdataset_t *sigrdataset)
5153 {
5154         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5155         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5156         rdatasetheader_t *header, *header_next, *found, *foundsig;
5157         rbtdb_serial_t serial;
5158         rbtdb_version_t *rbtversion = version;
5159         isc_boolean_t close_version = ISC_FALSE;
5160         rbtdb_rdatatype_t matchtype, sigmatchtype;
5161
5162         REQUIRE(VALID_RBTDB(rbtdb));
5163         REQUIRE(type != dns_rdatatype_any);
5164         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
5165
5166         if (rbtversion == NULL) {
5167                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5168                 close_version = ISC_TRUE;
5169         }
5170         serial = rbtversion->serial;
5171         now = 0;
5172
5173         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5174                   isc_rwlocktype_read);
5175
5176         found = NULL;
5177         foundsig = NULL;
5178         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5179         if (covers == 0)
5180                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5181         else
5182                 sigmatchtype = 0;
5183
5184         for (header = rbtnode->data; header != NULL; header = header_next) {
5185                 header_next = header->next;
5186                 do {
5187                         if (header->serial <= serial &&
5188                             !IGNORE(header)) {
5189                                 /*
5190                                  * Is this a "this rdataset doesn't
5191                                  * exist" record?
5192                                  */
5193                                 if (NONEXISTENT(header))
5194                                         header = NULL;
5195                                 break;
5196                         } else
5197                                 header = header->down;
5198                 } while (header != NULL);
5199                 if (header != NULL) {
5200                         /*
5201                          * We have an active, extant rdataset.  If it's a
5202                          * type we're looking for, remember it.
5203                          */
5204                         if (header->type == matchtype) {
5205                                 found = header;
5206                                 if (foundsig != NULL)
5207                                         break;
5208                         } else if (header->type == sigmatchtype) {
5209                                 foundsig = header;
5210                                 if (found != NULL)
5211                                         break;
5212                         }
5213                 }
5214         }
5215         if (found != NULL) {
5216                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5217                 if (foundsig != NULL)
5218                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5219                                       sigrdataset);
5220         }
5221
5222         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5223                     isc_rwlocktype_read);
5224
5225         if (close_version)
5226                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5227                              ISC_FALSE);
5228
5229         if (found == NULL)
5230                 return (ISC_R_NOTFOUND);
5231
5232         return (ISC_R_SUCCESS);
5233 }
5234
5235 static isc_result_t
5236 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5237                    dns_rdatatype_t type, dns_rdatatype_t covers,
5238                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5239                    dns_rdataset_t *sigrdataset)
5240 {
5241         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5242         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5243         rdatasetheader_t *header, *header_next, *found, *foundsig;
5244         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5245         isc_result_t result;
5246         nodelock_t *lock;
5247         isc_rwlocktype_t locktype;
5248
5249         REQUIRE(VALID_RBTDB(rbtdb));
5250         REQUIRE(type != dns_rdatatype_any);
5251
5252         UNUSED(version);
5253
5254         result = ISC_R_SUCCESS;
5255
5256         if (now == 0)
5257                 isc_stdtime_get(&now);
5258
5259         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5260         locktype = isc_rwlocktype_read;
5261         NODE_LOCK(lock, locktype);
5262
5263         found = NULL;
5264         foundsig = NULL;
5265         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5266         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5267         if (covers == 0)
5268                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5269         else
5270                 sigmatchtype = 0;
5271
5272         for (header = rbtnode->data; header != NULL; header = header_next) {
5273                 header_next = header->next;
5274                 if (header->rdh_ttl <= now) {
5275                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5276                             (locktype == isc_rwlocktype_write ||
5277                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5278                                 /*
5279                                  * We update the node's status only when we
5280                                  * can get write access.
5281                                  */
5282                                 locktype = isc_rwlocktype_write;
5283
5284                                 /*
5285                                  * We don't check if refcurrent(rbtnode) == 0
5286                                  * and try to free like we do in cache_find(),
5287                                  * because refcurrent(rbtnode) must be
5288                                  * non-zero.  This is so because 'node' is an
5289                                  * argument to the function.
5290                                  */
5291                                 header->attributes |= RDATASET_ATTR_STALE;
5292                                 rbtnode->dirty = 1;
5293                         }
5294                 } else if (EXISTS(header)) {
5295                         if (header->type == matchtype)
5296                                 found = header;
5297                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5298                                  header->type == negtype)
5299                                 found = header;
5300                         else if (header->type == sigmatchtype)
5301                                 foundsig = header;
5302                 }
5303         }
5304         if (found != NULL) {
5305                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5306                 if (!NEGATIVE(found) && foundsig != NULL)
5307                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5308                                       sigrdataset);
5309         }
5310
5311         NODE_UNLOCK(lock, locktype);
5312
5313         if (found == NULL)
5314                 return (ISC_R_NOTFOUND);
5315
5316         if (NEGATIVE(found)) {
5317                 /*
5318                  * We found a negative cache entry.
5319                  */
5320                 if (NXDOMAIN(found))
5321                         result = DNS_R_NCACHENXDOMAIN;
5322                 else
5323                         result = DNS_R_NCACHENXRRSET;
5324         }
5325
5326         return (result);
5327 }
5328
5329 static isc_result_t
5330 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5331              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5332 {
5333         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5334         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5335         rbtdb_version_t *rbtversion = version;
5336         rbtdb_rdatasetiter_t *iterator;
5337         unsigned int refs;
5338
5339         REQUIRE(VALID_RBTDB(rbtdb));
5340
5341         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5342         if (iterator == NULL)
5343                 return (ISC_R_NOMEMORY);
5344
5345         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5346                 now = 0;
5347                 if (rbtversion == NULL)
5348                         currentversion(db,
5349                                  (dns_dbversion_t **) (void *)(&rbtversion));
5350                 else {
5351                         unsigned int refs;
5352
5353                         INSIST(rbtversion->rbtdb == rbtdb);
5354
5355                         isc_refcount_increment(&rbtversion->references,
5356                                                &refs);
5357                         INSIST(refs > 1);
5358                 }
5359         } else {
5360                 if (now == 0)
5361                         isc_stdtime_get(&now);
5362                 rbtversion = NULL;
5363         }
5364
5365         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5366         iterator->common.methods = &rdatasetiter_methods;
5367         iterator->common.db = db;
5368         iterator->common.node = node;
5369         iterator->common.version = (dns_dbversion_t *)rbtversion;
5370         iterator->common.now = now;
5371
5372         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5373
5374         dns_rbtnode_refincrement(rbtnode, &refs);
5375         INSIST(refs != 0);
5376
5377         iterator->current = NULL;
5378
5379         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5380
5381         *iteratorp = (dns_rdatasetiter_t *)iterator;
5382
5383         return (ISC_R_SUCCESS);
5384 }
5385
5386 static isc_boolean_t
5387 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5388         rdatasetheader_t *header, *header_next;
5389         isc_boolean_t cname, other_data;
5390         dns_rdatatype_t rdtype;
5391
5392         /*
5393          * The caller must hold the node lock.
5394          */
5395
5396         /*
5397          * Look for CNAME and "other data" rdatasets active in our version.
5398          */
5399         cname = ISC_FALSE;
5400         other_data = ISC_FALSE;
5401         for (header = node->data; header != NULL; header = header_next) {
5402                 header_next = header->next;
5403                 if (header->type == dns_rdatatype_cname) {
5404                         /*
5405                          * Look for an active extant CNAME.
5406                          */
5407                         do {
5408                                 if (header->serial <= serial &&
5409                                     !IGNORE(header)) {
5410                                         /*
5411                                          * Is this a "this rdataset doesn't
5412                                          * exist" record?
5413                                          */
5414                                         if (NONEXISTENT(header))
5415                                                 header = NULL;
5416                                         break;
5417                                 } else
5418                                         header = header->down;
5419                         } while (header != NULL);
5420                         if (header != NULL)
5421                                 cname = ISC_TRUE;
5422                 } else {
5423                         /*
5424                          * Look for active extant "other data".
5425                          *
5426                          * "Other data" is any rdataset whose type is not
5427                          * KEY, NSEC, SIG or RRSIG.
5428                          */
5429                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5430                         if (rdtype != dns_rdatatype_key &&
5431                             rdtype != dns_rdatatype_sig &&
5432                             rdtype != dns_rdatatype_nsec &&
5433                             rdtype != dns_rdatatype_rrsig) {
5434                                 /*
5435                                  * Is it active and extant?
5436                                  */
5437                                 do {
5438                                         if (header->serial <= serial &&
5439                                             !IGNORE(header)) {
5440                                                 /*
5441                                                  * Is this a "this rdataset
5442                                                  * doesn't exist" record?
5443                                                  */
5444                                                 if (NONEXISTENT(header))
5445                                                         header = NULL;
5446                                                 break;
5447                                         } else
5448                                                 header = header->down;
5449                                 } while (header != NULL);
5450                                 if (header != NULL)
5451                                         other_data = ISC_TRUE;
5452                         }
5453                 }
5454         }
5455
5456         if (cname && other_data)
5457                 return (ISC_TRUE);
5458
5459         return (ISC_FALSE);
5460 }
5461
5462 static isc_result_t
5463 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5464         isc_result_t result;
5465
5466         INSIST(!IS_CACHE(rbtdb));
5467         INSIST(newheader->heap_index == 0);
5468         INSIST(!ISC_LINK_LINKED(newheader, link));
5469
5470         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5471         return (result);
5472 }
5473
5474 static isc_result_t
5475 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5476     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5477     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5478 {
5479         rbtdb_changed_t *changed = NULL;
5480         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5481         unsigned char *merged;
5482         isc_result_t result;
5483         isc_boolean_t header_nx;
5484         isc_boolean_t newheader_nx;
5485         isc_boolean_t merge;
5486         dns_rdatatype_t rdtype, covers;
5487         rbtdb_rdatatype_t negtype, sigtype;
5488         dns_trust_t trust;
5489         int idx;
5490
5491         /*
5492          * Add an rdatasetheader_t to a node.
5493          */
5494
5495         /*
5496          * Caller must be holding the node lock.
5497          */
5498
5499         if ((options & DNS_DBADD_MERGE) != 0) {
5500                 REQUIRE(rbtversion != NULL);
5501                 merge = ISC_TRUE;
5502         } else
5503                 merge = ISC_FALSE;
5504
5505         if ((options & DNS_DBADD_FORCE) != 0)
5506                 trust = dns_trust_ultimate;
5507         else
5508                 trust = newheader->trust;
5509
5510         if (rbtversion != NULL && !loading) {
5511                 /*
5512                  * We always add a changed record, even if no changes end up
5513                  * being made to this node, because it's harmless and
5514                  * simplifies the code.
5515                  */
5516                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5517                 if (changed == NULL) {
5518                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5519                         return (ISC_R_NOMEMORY);
5520                 }
5521         }
5522
5523         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5524         topheader_prev = NULL;
5525         sigheader = NULL;
5526         negtype = 0;
5527         if (rbtversion == NULL && !newheader_nx) {
5528                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5529                 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5530                 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
5531                 if (NEGATIVE(newheader)) {
5532                         /*
5533                          * We're adding a negative cache entry.
5534                          */
5535                         for (topheader = rbtnode->data;
5536                              topheader != NULL;
5537                              topheader = topheader->next) {
5538                                 /*
5539                                  * If we're adding an negative cache entry
5540                                  * which covers all types (NXDOMAIN,
5541                                  * NODATA(QTYPE=ANY)).
5542                                  *
5543                                  * We make all other data stale so that the
5544                                  * only rdataset that can be found at this
5545                                  * node is the negative cache entry.
5546                                  *
5547                                  * Otherwise look for any RRSIGs of the
5548                                  * given type so they can be marked stale
5549                                  * later.
5550                                  */
5551                                 if (covers == dns_rdatatype_any) {
5552                                         set_ttl(rbtdb, topheader, 0);
5553                                         topheader->attributes |=
5554                                                 RDATASET_ATTR_STALE;
5555                                         rbtnode->dirty = 1;
5556                                 } else if (topheader->type == sigtype)
5557                                         sigheader = topheader;
5558                         }
5559                         if (covers == dns_rdatatype_any)
5560                                 goto find_header;
5561                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5562                 } else {
5563                         /*
5564                          * We're adding something that isn't a
5565                          * negative cache entry.  Look for an extant
5566                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5567                          * cache entry.  If we're adding an RRSIG, also
5568                          * check for an extant non-stale NODATA ncache
5569                          * entry which covers the same type as the RRSIG.
5570                          */
5571                         for (topheader = rbtnode->data;
5572                              topheader != NULL;
5573                              topheader = topheader->next) {
5574                                 if ((topheader->type ==
5575                                         RBTDB_RDATATYPE_NCACHEANY) ||
5576                                         (newheader->type == sigtype &&
5577                                         topheader->type ==
5578                                         RBTDB_RDATATYPE_VALUE(0, covers))) {
5579                                                 break;
5580                                         }
5581                         }
5582                         if (topheader != NULL && EXISTS(topheader) &&
5583                             topheader->rdh_ttl > now) {
5584                                 /*
5585                                  * Found one.
5586                                  */
5587                                 if (trust < topheader->trust) {
5588                                         /*
5589                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5590                                          * is more trusted.
5591                                          */
5592                                         free_rdataset(rbtdb,
5593                                                       rbtdb->common.mctx,
5594                                                       newheader);
5595                                         if (addedrdataset != NULL)
5596                                                 bind_rdataset(rbtdb, rbtnode,
5597                                                               topheader, now,
5598                                                               addedrdataset);
5599                                         return (DNS_R_UNCHANGED);
5600                                 }
5601                                 /*
5602                                  * The new rdataset is better.  Expire the
5603                                  * ncache entry.
5604                                  */
5605                                 set_ttl(rbtdb, topheader, 0);
5606                                 topheader->attributes |= RDATASET_ATTR_STALE;
5607                                 rbtnode->dirty = 1;
5608                                 topheader = NULL;
5609                                 goto find_header;
5610                         }
5611                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5612                 }
5613         }
5614
5615         for (topheader = rbtnode->data;
5616              topheader != NULL;
5617              topheader = topheader->next) {
5618                 if (topheader->type == newheader->type ||
5619                     topheader->type == negtype)
5620                         break;
5621                 topheader_prev = topheader;
5622         }
5623
5624  find_header:
5625         /*
5626          * If header isn't NULL, we've found the right type.  There may be
5627          * IGNORE rdatasets between the top of the chain and the first real
5628          * data.  We skip over them.
5629          */
5630         header = topheader;
5631         while (header != NULL && IGNORE(header))
5632                 header = header->down;
5633         if (header != NULL) {
5634                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5635
5636                 /*
5637                  * Deleting an already non-existent rdataset has no effect.
5638                  */
5639                 if (header_nx && newheader_nx) {
5640                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5641                         return (DNS_R_UNCHANGED);
5642                 }
5643
5644                 /*
5645                  * Trying to add an rdataset with lower trust to a cache DB
5646                  * has no effect, provided that the cache data isn't stale.
5647                  */
5648                 if (rbtversion == NULL && trust < header->trust &&
5649                     (header->rdh_ttl > now || header_nx)) {
5650                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5651                         if (addedrdataset != NULL)
5652                                 bind_rdataset(rbtdb, rbtnode, header, now,
5653                                               addedrdataset);
5654                         return (DNS_R_UNCHANGED);
5655                 }
5656
5657                 /*
5658                  * Don't merge if a nonexistent rdataset is involved.
5659                  */
5660                 if (merge && (header_nx || newheader_nx))
5661                         merge = ISC_FALSE;
5662
5663                 /*
5664                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5665                  * that is the union of 'newheader' and 'header'.
5666                  */
5667                 if (merge) {
5668                         unsigned int flags = 0;
5669                         INSIST(rbtversion->serial >= header->serial);
5670                         merged = NULL;
5671                         result = ISC_R_SUCCESS;
5672
5673                         if ((options & DNS_DBADD_EXACT) != 0)
5674                                 flags |= DNS_RDATASLAB_EXACT;
5675                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5676                              newheader->rdh_ttl != header->rdh_ttl)
5677                                         result = DNS_R_NOTEXACT;
5678                         else if (newheader->rdh_ttl != header->rdh_ttl)
5679                                 flags |= DNS_RDATASLAB_FORCE;
5680                         if (result == ISC_R_SUCCESS)
5681                                 result = dns_rdataslab_merge(
5682                                              (unsigned char *)header,
5683                                              (unsigned char *)newheader,
5684                                              (unsigned int)(sizeof(*newheader)),
5685                                              rbtdb->common.mctx,
5686                                              rbtdb->common.rdclass,
5687                                              (dns_rdatatype_t)header->type,
5688                                              flags, &merged);
5689                         if (result == ISC_R_SUCCESS) {
5690                                 /*
5691                                  * If 'header' has the same serial number as
5692                                  * we do, we could clean it up now if we knew
5693                                  * that our caller had no references to it.
5694                                  * We don't know this, however, so we leave it
5695                                  * alone.  It will get cleaned up when
5696                                  * clean_zone_node() runs.
5697                                  */
5698                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5699                                               newheader);
5700                                 newheader = (rdatasetheader_t *)merged;
5701                                 if (loading && RESIGN(newheader) &&
5702                                     RESIGN(header) &&
5703                                     header->resign < newheader->resign)
5704                                         newheader->resign = header->resign;
5705                         } else {
5706                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5707                                               newheader);
5708                                 return (result);
5709                         }
5710                 }
5711                 /*
5712                  * Don't replace existing NS, A and AAAA RRsets
5713                  * in the cache if they are already exist.  This
5714                  * prevents named being locked to old servers.
5715                  * Don't lower trust of existing record if the
5716                  * update is forced.
5717                  */
5718                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5719                     header->type == dns_rdatatype_ns &&
5720                     !header_nx && !newheader_nx &&
5721                     header->trust >= newheader->trust &&
5722                     dns_rdataslab_equalx((unsigned char *)header,
5723                                          (unsigned char *)newheader,
5724                                          (unsigned int)(sizeof(*newheader)),
5725                                          rbtdb->common.rdclass,
5726                                          (dns_rdatatype_t)header->type)) {
5727                         /*
5728                          * Honour the new ttl if it is less than the
5729                          * older one.
5730                          */
5731                         if (header->rdh_ttl > newheader->rdh_ttl)
5732                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5733                         if (header->noqname == NULL &&
5734                             newheader->noqname != NULL) {
5735                                 header->noqname = newheader->noqname;
5736                                 newheader->noqname = NULL;
5737                         }
5738                         if (header->closest == NULL &&
5739                             newheader->closest != NULL) {
5740                                 header->closest = newheader->closest;
5741                                 newheader->closest = NULL;
5742                         }
5743                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5744                         if (addedrdataset != NULL)
5745                                 bind_rdataset(rbtdb, rbtnode, header, now,
5746                                               addedrdataset);
5747                         return (ISC_R_SUCCESS);
5748                 }
5749                 /*
5750                  * If we have will be replacing a NS RRset force its TTL
5751                  * to be no more than the current NS RRset's TTL.  This
5752                  * ensures the delegations that are withdrawn are honoured.
5753                  */
5754                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5755                     header->type == dns_rdatatype_ns &&
5756                     !header_nx && !newheader_nx &&
5757                     header->trust <= newheader->trust) {
5758                         if (newheader->rdh_ttl > header->rdh_ttl) {
5759                                 newheader->rdh_ttl = header->rdh_ttl;
5760                         }
5761                 }
5762                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5763                     (header->type == dns_rdatatype_a ||
5764                      header->type == dns_rdatatype_aaaa ||
5765                      header->type == dns_rdatatype_ds ||
5766                      header->type == RBTDB_RDATATYPE_SIGDDS) &&
5767                     !header_nx && !newheader_nx &&
5768                     header->trust >= newheader->trust &&
5769                     dns_rdataslab_equal((unsigned char *)header,
5770                                         (unsigned char *)newheader,
5771                                         (unsigned int)(sizeof(*newheader)))) {
5772                         /*
5773                          * Honour the new ttl if it is less than the
5774                          * older one.
5775                          */
5776                         if (header->rdh_ttl > newheader->rdh_ttl)
5777                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5778                         if (header->noqname == NULL &&
5779                             newheader->noqname != NULL) {
5780                                 header->noqname = newheader->noqname;
5781                                 newheader->noqname = NULL;
5782                         }
5783                         if (header->closest == NULL &&
5784                             newheader->closest != NULL) {
5785                                 header->closest = newheader->closest;
5786                                 newheader->closest = NULL;
5787                         }
5788                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5789                         if (addedrdataset != NULL)
5790                                 bind_rdataset(rbtdb, rbtnode, header, now,
5791                                               addedrdataset);
5792                         return (ISC_R_SUCCESS);
5793                 }
5794                 INSIST(rbtversion == NULL ||
5795                        rbtversion->serial >= topheader->serial);
5796                 if (topheader_prev != NULL)
5797                         topheader_prev->next = newheader;
5798                 else
5799                         rbtnode->data = newheader;
5800                 newheader->next = topheader->next;
5801                 if (loading) {
5802                         /*
5803                          * There are no other references to 'header' when
5804                          * loading, so we MAY clean up 'header' now.
5805                          * Since we don't generate changed records when
5806                          * loading, we MUST clean up 'header' now.
5807                          */
5808                         newheader->down = NULL;
5809                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5810                 } else {
5811                         newheader->down = topheader;
5812                         topheader->next = newheader;
5813                         rbtnode->dirty = 1;
5814                         if (changed != NULL)
5815                                 changed->dirty = ISC_TRUE;
5816                         if (rbtversion == NULL) {
5817                                 set_ttl(rbtdb, header, 0);
5818                                 header->attributes |= RDATASET_ATTR_STALE;
5819                                 if (sigheader != NULL) {
5820                                         set_ttl(rbtdb, sigheader, 0);
5821                                         sigheader->attributes |=
5822                                                  RDATASET_ATTR_STALE;
5823                                 }
5824                         }
5825                         idx = newheader->node->locknum;
5826                         if (IS_CACHE(rbtdb)) {
5827                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5828                                                  newheader, link);
5829                                 /*
5830                                  * XXXMLG We don't check the return value
5831                                  * here.  If it fails, we will not do TTL
5832                                  * based expiry on this node.  However, we
5833                                  * will do it on the LRU side, so memory
5834                                  * will not leak... for long.
5835                                  */
5836                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5837                         } else if (RESIGN(newheader))
5838                                 resign_insert(rbtdb, idx, newheader);
5839                 }
5840         } else {
5841                 /*
5842                  * No non-IGNORED rdatasets of the given type exist at
5843                  * this node.
5844                  */
5845
5846                 /*
5847                  * If we're trying to delete the type, don't bother.
5848                  */
5849                 if (newheader_nx) {
5850                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5851                         return (DNS_R_UNCHANGED);
5852                 }
5853
5854                 if (topheader != NULL) {
5855                         /*
5856                          * We have an list of rdatasets of the given type,
5857                          * but they're all marked IGNORE.  We simply insert
5858                          * the new rdataset at the head of the list.
5859                          *
5860                          * Ignored rdatasets cannot occur during loading, so
5861                          * we INSIST on it.
5862                          */
5863                         INSIST(!loading);
5864                         INSIST(rbtversion == NULL ||
5865                                rbtversion->serial >= topheader->serial);
5866                         if (topheader_prev != NULL)
5867                                 topheader_prev->next = newheader;
5868                         else
5869                                 rbtnode->data = newheader;
5870                         newheader->next = topheader->next;
5871                         newheader->down = topheader;
5872                         topheader->next = newheader;
5873                         rbtnode->dirty = 1;
5874                         if (changed != NULL)
5875                                 changed->dirty = ISC_TRUE;
5876                 } else {
5877                         /*
5878                          * No rdatasets of the given type exist at the node.
5879                          */
5880                         newheader->next = rbtnode->data;
5881                         newheader->down = NULL;
5882                         rbtnode->data = newheader;
5883                 }
5884                 idx = newheader->node->locknum;
5885                 if (IS_CACHE(rbtdb)) {
5886                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5887                                          newheader, link);
5888                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5889                 } else if (RESIGN(newheader)) {
5890                         resign_insert(rbtdb, idx, newheader);
5891                 }
5892         }
5893
5894         /*
5895          * Check if the node now contains CNAME and other data.
5896          */
5897         if (rbtversion != NULL &&
5898             cname_and_other_data(rbtnode, rbtversion->serial))
5899                 return (DNS_R_CNAMEANDOTHER);
5900
5901         if (addedrdataset != NULL)
5902                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5903
5904         return (ISC_R_SUCCESS);
5905 }
5906
5907 static inline isc_boolean_t
5908 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5909                 rbtdb_rdatatype_t type)
5910 {
5911         if (IS_CACHE(rbtdb)) {
5912                 if (type == dns_rdatatype_dname)
5913                         return (ISC_TRUE);
5914                 else
5915                         return (ISC_FALSE);
5916         } else if (type == dns_rdatatype_dname ||
5917                    (type == dns_rdatatype_ns &&
5918                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5919                 return (ISC_TRUE);
5920         return (ISC_FALSE);
5921 }
5922
5923 static inline isc_result_t
5924 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5925            dns_rdataset_t *rdataset)
5926 {
5927         struct noqname *noqname;
5928         isc_mem_t *mctx = rbtdb->common.mctx;
5929         dns_name_t name;
5930         dns_rdataset_t neg, negsig;
5931         isc_result_t result;
5932         isc_region_t r;
5933
5934         dns_name_init(&name, NULL);
5935         dns_rdataset_init(&neg);
5936         dns_rdataset_init(&negsig);
5937
5938         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5939         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5940
5941         noqname = isc_mem_get(mctx, sizeof(*noqname));
5942         if (noqname == NULL) {
5943                 result = ISC_R_NOMEMORY;
5944                 goto cleanup;
5945         }
5946         dns_name_init(&noqname->name, NULL);
5947         noqname->neg = NULL;
5948         noqname->negsig = NULL;
5949         noqname->type = neg.type;
5950         result = dns_name_dup(&name, mctx, &noqname->name);
5951         if (result != ISC_R_SUCCESS)
5952                 goto cleanup;
5953         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5954         if (result != ISC_R_SUCCESS)
5955                 goto cleanup;
5956         noqname->neg = r.base;
5957         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5958         if (result != ISC_R_SUCCESS)
5959                 goto cleanup;
5960         noqname->negsig = r.base;
5961         dns_rdataset_disassociate(&neg);
5962         dns_rdataset_disassociate(&negsig);
5963         newheader->noqname = noqname;
5964         return (ISC_R_SUCCESS);
5965
5966 cleanup:
5967         dns_rdataset_disassociate(&neg);
5968         dns_rdataset_disassociate(&negsig);
5969         free_noqname(mctx, &noqname);
5970         return(result);
5971 }
5972
5973 static inline isc_result_t
5974 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5975            dns_rdataset_t *rdataset)
5976 {
5977         struct noqname *closest;
5978         isc_mem_t *mctx = rbtdb->common.mctx;
5979         dns_name_t name;
5980         dns_rdataset_t neg, negsig;
5981         isc_result_t result;
5982         isc_region_t r;
5983
5984         dns_name_init(&name, NULL);
5985         dns_rdataset_init(&neg);
5986         dns_rdataset_init(&negsig);
5987
5988         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5989         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5990
5991         closest = isc_mem_get(mctx, sizeof(*closest));
5992         if (closest == NULL) {
5993                 result = ISC_R_NOMEMORY;
5994                 goto cleanup;
5995         }
5996         dns_name_init(&closest->name, NULL);
5997         closest->neg = NULL;
5998         closest->negsig = NULL;
5999         closest->type = neg.type;
6000         result = dns_name_dup(&name, mctx, &closest->name);
6001         if (result != ISC_R_SUCCESS)
6002                 goto cleanup;
6003         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6004         if (result != ISC_R_SUCCESS)
6005                 goto cleanup;
6006         closest->neg = r.base;
6007         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6008         if (result != ISC_R_SUCCESS)
6009                 goto cleanup;
6010         closest->negsig = r.base;
6011         dns_rdataset_disassociate(&neg);
6012         dns_rdataset_disassociate(&negsig);
6013         newheader->closest = closest;
6014         return (ISC_R_SUCCESS);
6015
6016  cleanup:
6017         dns_rdataset_disassociate(&neg);
6018         dns_rdataset_disassociate(&negsig);
6019         free_noqname(mctx, &closest);
6020         return(result);
6021 }
6022
6023 static dns_dbmethods_t zone_methods;
6024
6025 static isc_result_t
6026 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6027             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6028             dns_rdataset_t *addedrdataset)
6029 {
6030         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6031         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6032         rbtdb_version_t *rbtversion = version;
6033         isc_region_t region;
6034         rdatasetheader_t *newheader;
6035         rdatasetheader_t *header;
6036         isc_result_t result;
6037         isc_boolean_t delegating;
6038         isc_boolean_t tree_locked = ISC_FALSE;
6039         isc_boolean_t cache_is_overmem = ISC_FALSE;
6040
6041         REQUIRE(VALID_RBTDB(rbtdb));
6042         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6043
6044         if (rbtdb->common.methods == &zone_methods)
6045                 REQUIRE(((rbtnode->nsec3 &&
6046                           (rdataset->type == dns_rdatatype_nsec3 ||
6047                            rdataset->covers == dns_rdatatype_nsec3)) ||
6048                          (!rbtnode->nsec3 &&
6049                            rdataset->type != dns_rdatatype_nsec3 &&
6050                            rdataset->covers != dns_rdatatype_nsec3)));
6051
6052         if (rbtversion == NULL) {
6053                 if (now == 0)
6054                         isc_stdtime_get(&now);
6055         } else
6056                 now = 0;
6057
6058         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6059                                             &region, sizeof(rdatasetheader_t));
6060         if (result != ISC_R_SUCCESS)
6061                 return (result);
6062
6063         newheader = (rdatasetheader_t *)region.base;
6064         init_rdataset(rbtdb, newheader);
6065         set_ttl(rbtdb, newheader, rdataset->ttl + now);
6066         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6067                                                 rdataset->covers);
6068         newheader->attributes = 0;
6069         newheader->noqname = NULL;
6070         newheader->closest = NULL;
6071         newheader->count = init_count++;
6072         newheader->trust = rdataset->trust;
6073         newheader->additional_auth = NULL;
6074         newheader->additional_glue = NULL;
6075         newheader->last_used = now;
6076         newheader->node = rbtnode;
6077         if (rbtversion != NULL) {
6078                 newheader->serial = rbtversion->serial;
6079                 now = 0;
6080
6081                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6082                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6083                         newheader->resign = rdataset->resign;
6084                 } else
6085                         newheader->resign = 0;
6086         } else {
6087                 newheader->serial = 1;
6088                 newheader->resign = 0;
6089                 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6090                         newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6091                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6092                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6093                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6094                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6095                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6096                         result = addnoqname(rbtdb, newheader, rdataset);
6097                         if (result != ISC_R_SUCCESS) {
6098                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6099                                               newheader);
6100                                 return (result);
6101                         }
6102                 }
6103                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6104                         result = addclosest(rbtdb, newheader, rdataset);
6105                         if (result != ISC_R_SUCCESS) {
6106                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6107                                               newheader);
6108                                 return (result);
6109                         }
6110                 }
6111         }
6112
6113         /*
6114          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6115          * just DNAME for the cache), then we need to set the callback bit
6116          * on the node.
6117          */
6118         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6119                 delegating = ISC_TRUE;
6120         else
6121                 delegating = ISC_FALSE;
6122
6123         /*
6124          * If we're adding a delegation type or the DB is a cache in an overmem
6125          * state, hold an exclusive lock on the tree.  In the latter case
6126          * the lock does not necessarily have to be acquired but it will help
6127          * purge stale entries more effectively.
6128          */
6129         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6130                 cache_is_overmem = ISC_TRUE;
6131         if (delegating || cache_is_overmem) {
6132                 tree_locked = ISC_TRUE;
6133                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6134         }
6135
6136         if (cache_is_overmem)
6137                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6138
6139         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6140                   isc_rwlocktype_write);
6141
6142         if (rbtdb->rrsetstats != NULL) {
6143                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6144                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6145         }
6146
6147         if (IS_CACHE(rbtdb)) {
6148                 if (tree_locked)
6149                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6150
6151                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6152                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6153                         expire_header(rbtdb, header, tree_locked);
6154
6155                 /*
6156                  * If we've been holding a write lock on the tree just for
6157                  * cleaning, we can release it now.  However, we still need the
6158                  * node lock.
6159                  */
6160                 if (tree_locked && !delegating) {
6161                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6162                         tree_locked = ISC_FALSE;
6163                 }
6164         }
6165
6166         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6167                      addedrdataset, now);
6168         if (result == ISC_R_SUCCESS && delegating)
6169                 rbtnode->find_callback = 1;
6170
6171         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6172                     isc_rwlocktype_write);
6173
6174         if (tree_locked)
6175                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6176
6177         /*
6178          * Update the zone's secure status.  If version is non-NULL
6179          * this is deferred until closeversion() is called.
6180          */
6181         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6182                 iszonesecure(db, version, rbtdb->origin_node);
6183
6184         return (result);
6185 }
6186
6187 static isc_result_t
6188 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6189                  dns_rdataset_t *rdataset, unsigned int options,
6190                  dns_rdataset_t *newrdataset)
6191 {
6192         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6193         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6194         rbtdb_version_t *rbtversion = version;
6195         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6196         unsigned char *subresult;
6197         isc_region_t region;
6198         isc_result_t result;
6199         rbtdb_changed_t *changed;
6200
6201         REQUIRE(VALID_RBTDB(rbtdb));
6202         REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
6203
6204         if (rbtdb->common.methods == &zone_methods)
6205                 REQUIRE(((rbtnode->nsec3 &&
6206                           (rdataset->type == dns_rdatatype_nsec3 ||
6207                            rdataset->covers == dns_rdatatype_nsec3)) ||
6208                          (!rbtnode->nsec3 &&
6209                            rdataset->type != dns_rdatatype_nsec3 &&
6210                            rdataset->covers != dns_rdatatype_nsec3)));
6211
6212         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6213                                             &region,
6214                                             sizeof(rdatasetheader_t));
6215         if (result != ISC_R_SUCCESS)
6216                 return (result);
6217         newheader = (rdatasetheader_t *)region.base;
6218         init_rdataset(rbtdb, newheader);
6219         set_ttl(rbtdb, newheader, rdataset->ttl);
6220         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6221                                                 rdataset->covers);
6222         newheader->attributes = 0;
6223         newheader->serial = rbtversion->serial;
6224         newheader->trust = 0;
6225         newheader->noqname = NULL;
6226         newheader->closest = NULL;
6227         newheader->count = init_count++;
6228         newheader->additional_auth = NULL;
6229         newheader->additional_glue = NULL;
6230         newheader->last_used = 0;
6231         newheader->node = rbtnode;
6232         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6233                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6234                 newheader->resign = rdataset->resign;
6235         } else
6236                 newheader->resign = 0;
6237
6238         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6239                   isc_rwlocktype_write);
6240
6241         changed = add_changed(rbtdb, rbtversion, rbtnode);
6242         if (changed == NULL) {
6243                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6244                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6245                             isc_rwlocktype_write);
6246                 return (ISC_R_NOMEMORY);
6247         }
6248
6249         topheader_prev = NULL;
6250         for (topheader = rbtnode->data;
6251              topheader != NULL;
6252              topheader = topheader->next) {
6253                 if (topheader->type == newheader->type)
6254                         break;
6255                 topheader_prev = topheader;
6256         }
6257         /*
6258          * If header isn't NULL, we've found the right type.  There may be
6259          * IGNORE rdatasets between the top of the chain and the first real
6260          * data.  We skip over them.
6261          */
6262         header = topheader;
6263         while (header != NULL && IGNORE(header))
6264                 header = header->down;
6265         if (header != NULL && EXISTS(header)) {
6266                 unsigned int flags = 0;
6267                 subresult = NULL;
6268                 result = ISC_R_SUCCESS;
6269                 if ((options & DNS_DBSUB_EXACT) != 0) {
6270                         flags |= DNS_RDATASLAB_EXACT;
6271                         if (newheader->rdh_ttl != header->rdh_ttl)
6272                                 result = DNS_R_NOTEXACT;
6273                 }
6274                 if (result == ISC_R_SUCCESS)
6275                         result = dns_rdataslab_subtract(
6276                                         (unsigned char *)header,
6277                                         (unsigned char *)newheader,
6278                                         (unsigned int)(sizeof(*newheader)),
6279                                         rbtdb->common.mctx,
6280                                         rbtdb->common.rdclass,
6281                                         (dns_rdatatype_t)header->type,
6282                                         flags, &subresult);
6283                 if (result == ISC_R_SUCCESS) {
6284                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6285                         newheader = (rdatasetheader_t *)subresult;
6286                         init_rdataset(rbtdb, newheader);
6287                         /*
6288                          * We have to set the serial since the rdataslab
6289                          * subtraction routine copies the reserved portion of
6290                          * header, not newheader.
6291                          */
6292                         newheader->serial = rbtversion->serial;
6293                         /*
6294                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6295                          * to additional info.  We need to clear these fields
6296                          * to avoid having duplicated references.
6297                          */
6298                         newheader->additional_auth = NULL;
6299                         newheader->additional_glue = NULL;
6300                 } else if (result == DNS_R_NXRRSET) {
6301                         /*
6302                          * This subtraction would remove all of the rdata;
6303                          * add a nonexistent header instead.
6304                          */
6305                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6306                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6307                         if (newheader == NULL) {
6308                                 result = ISC_R_NOMEMORY;
6309                                 goto unlock;
6310                         }
6311                         set_ttl(rbtdb, newheader, 0);
6312                         newheader->type = topheader->type;
6313                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6314                         newheader->trust = 0;
6315                         newheader->serial = rbtversion->serial;
6316                         newheader->noqname = NULL;
6317                         newheader->closest = NULL;
6318                         newheader->count = 0;
6319                         newheader->additional_auth = NULL;
6320                         newheader->additional_glue = NULL;
6321                         newheader->node = rbtnode;
6322                         newheader->resign = 0;
6323                         newheader->last_used = 0;
6324                 } else {
6325                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6326                         goto unlock;
6327                 }
6328
6329                 /*
6330                  * If we're here, we want to link newheader in front of
6331                  * topheader.
6332                  */
6333                 INSIST(rbtversion->serial >= topheader->serial);
6334                 if (topheader_prev != NULL)
6335                         topheader_prev->next = newheader;
6336                 else
6337                         rbtnode->data = newheader;
6338                 newheader->next = topheader->next;
6339                 newheader->down = topheader;
6340                 topheader->next = newheader;
6341                 rbtnode->dirty = 1;
6342                 changed->dirty = ISC_TRUE;
6343         } else {
6344                 /*
6345                  * The rdataset doesn't exist, so we don't need to do anything
6346                  * to satisfy the deletion request.
6347                  */
6348                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6349                 if ((options & DNS_DBSUB_EXACT) != 0)
6350                         result = DNS_R_NOTEXACT;
6351                 else
6352                         result = DNS_R_UNCHANGED;
6353         }
6354
6355         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6356                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6357
6358  unlock:
6359         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6360                     isc_rwlocktype_write);
6361
6362         /*
6363          * Update the zone's secure status.  If version is non-NULL
6364          * this is deferred until closeversion() is called.
6365          */
6366         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6367                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6368
6369         return (result);
6370 }
6371
6372 static isc_result_t
6373 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6374                dns_rdatatype_t type, dns_rdatatype_t covers)
6375 {
6376         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6377         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6378         rbtdb_version_t *rbtversion = version;
6379         isc_result_t result;
6380         rdatasetheader_t *newheader;
6381
6382         REQUIRE(VALID_RBTDB(rbtdb));
6383         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6384
6385         if (type == dns_rdatatype_any)
6386                 return (ISC_R_NOTIMPLEMENTED);
6387         if (type == dns_rdatatype_rrsig && covers == 0)
6388                 return (ISC_R_NOTIMPLEMENTED);
6389
6390         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6391         if (newheader == NULL)
6392                 return (ISC_R_NOMEMORY);
6393         set_ttl(rbtdb, newheader, 0);
6394         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6395         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6396         newheader->trust = 0;
6397         newheader->noqname = NULL;
6398         newheader->closest = NULL;
6399         newheader->additional_auth = NULL;
6400         newheader->additional_glue = NULL;
6401         if (rbtversion != NULL)
6402                 newheader->serial = rbtversion->serial;
6403         else
6404                 newheader->serial = 0;
6405         newheader->count = 0;
6406         newheader->last_used = 0;
6407         newheader->node = rbtnode;
6408
6409         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6410                   isc_rwlocktype_write);
6411
6412         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6413                      ISC_FALSE, NULL, 0);
6414
6415         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6416                     isc_rwlocktype_write);
6417
6418         /*
6419          * Update the zone's secure status.  If version is non-NULL
6420          * this is deferred until closeversion() is called.
6421          */
6422         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6423                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6424
6425         return (result);
6426 }
6427
6428 static isc_result_t
6429 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6430         rbtdb_load_t *loadctx = arg;
6431         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6432         dns_rbtnode_t *node;
6433         isc_result_t result;
6434         isc_region_t region;
6435         rdatasetheader_t *newheader;
6436
6437         /*
6438          * This routine does no node locking.  See comments in
6439          * 'load' below for more information on loading and
6440          * locking.
6441          */
6442
6443
6444         /*
6445          * SOA records are only allowed at top of zone.
6446          */
6447         if (rdataset->type == dns_rdatatype_soa &&
6448             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6449                 return (DNS_R_NOTZONETOP);
6450
6451         if (rdataset->type != dns_rdatatype_nsec3 &&
6452             rdataset->covers != dns_rdatatype_nsec3)
6453                 add_empty_wildcards(rbtdb, name);
6454
6455         if (dns_name_iswildcard(name)) {
6456                 /*
6457                  * NS record owners cannot legally be wild cards.
6458                  */
6459                 if (rdataset->type == dns_rdatatype_ns)
6460                         return (DNS_R_INVALIDNS);
6461                 /*
6462                  * NSEC3 record owners cannot legally be wild cards.
6463                  */
6464                 if (rdataset->type == dns_rdatatype_nsec3)
6465                         return (DNS_R_INVALIDNSEC3);
6466                 result = add_wildcard_magic(rbtdb, name);
6467                 if (result != ISC_R_SUCCESS)
6468                         return (result);
6469         }
6470
6471         node = NULL;
6472         if (rdataset->type == dns_rdatatype_nsec3 ||
6473             rdataset->covers == dns_rdatatype_nsec3) {
6474                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6475                 if (result == ISC_R_SUCCESS)
6476                         node->nsec3 = 1;
6477         } else {
6478                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6479                 if (result == ISC_R_SUCCESS)
6480                         node->nsec3 = 0;
6481         }
6482         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6483                 return (result);
6484         if (result != ISC_R_EXISTS) {
6485                 dns_name_t foundname;
6486                 dns_name_init(&foundname, NULL);
6487                 dns_rbt_namefromnode(node, &foundname);
6488 #ifdef DNS_RBT_USEHASH
6489                 node->locknum = node->hashval % rbtdb->node_lock_count;
6490 #else
6491                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6492                         rbtdb->node_lock_count;
6493 #endif
6494         }
6495
6496         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6497                                             &region,
6498                                             sizeof(rdatasetheader_t));
6499         if (result != ISC_R_SUCCESS)
6500                 return (result);
6501         newheader = (rdatasetheader_t *)region.base;
6502         init_rdataset(rbtdb, newheader);
6503         set_ttl(rbtdb, newheader,
6504                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6505         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6506                                                 rdataset->covers);
6507         newheader->attributes = 0;
6508         newheader->trust = rdataset->trust;
6509         newheader->serial = 1;
6510         newheader->noqname = NULL;
6511         newheader->closest = NULL;
6512         newheader->count = init_count++;
6513         newheader->additional_auth = NULL;
6514         newheader->additional_glue = NULL;
6515         newheader->last_used = 0;
6516         newheader->node = node;
6517         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6518                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6519                 newheader->resign = rdataset->resign;
6520         } else
6521                 newheader->resign = 0;
6522
6523         result = add(rbtdb, node, rbtdb->current_version, newheader,
6524                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6525         if (result == ISC_R_SUCCESS &&
6526             delegating_type(rbtdb, node, rdataset->type))
6527                 node->find_callback = 1;
6528         else if (result == DNS_R_UNCHANGED)
6529                 result = ISC_R_SUCCESS;
6530
6531         return (result);
6532 }
6533
6534 static isc_result_t
6535 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6536         rbtdb_load_t *loadctx;
6537         dns_rbtdb_t *rbtdb;
6538
6539         rbtdb = (dns_rbtdb_t *)db;
6540
6541         REQUIRE(VALID_RBTDB(rbtdb));
6542
6543         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6544         if (loadctx == NULL)
6545                 return (ISC_R_NOMEMORY);
6546
6547         loadctx->rbtdb = rbtdb;
6548         if (IS_CACHE(rbtdb))
6549                 isc_stdtime_get(&loadctx->now);
6550         else
6551                 loadctx->now = 0;
6552
6553         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6554
6555         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6556                 == 0);
6557         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6558
6559         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6560
6561         *addp = loading_addrdataset;
6562         *dbloadp = loadctx;
6563
6564         return (ISC_R_SUCCESS);
6565 }
6566
6567 static isc_result_t
6568 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6569         rbtdb_load_t *loadctx;
6570         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6571
6572         REQUIRE(VALID_RBTDB(rbtdb));
6573         REQUIRE(dbloadp != NULL);
6574         loadctx = *dbloadp;
6575         REQUIRE(loadctx->rbtdb == rbtdb);
6576
6577         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6578
6579         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6580         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6581
6582         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6583         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6584
6585         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6586
6587         /*
6588          * If there's a KEY rdataset at the zone origin containing a
6589          * zone key, we consider the zone secure.
6590          */
6591         if (! IS_CACHE(rbtdb))
6592                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6593
6594         *dbloadp = NULL;
6595
6596         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6597
6598         return (ISC_R_SUCCESS);
6599 }
6600
6601 static isc_result_t
6602 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6603      dns_masterformat_t masterformat) {
6604         dns_rbtdb_t *rbtdb;
6605
6606         rbtdb = (dns_rbtdb_t *)db;
6607
6608         REQUIRE(VALID_RBTDB(rbtdb));
6609
6610         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6611                                  &dns_master_style_default,
6612                                  filename, masterformat));
6613 }
6614
6615 static void
6616 delete_callback(void *data, void *arg) {
6617         dns_rbtdb_t *rbtdb = arg;
6618         rdatasetheader_t *current, *next;
6619         unsigned int locknum;
6620
6621         current = data;
6622         locknum = current->node->locknum;
6623         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6624         while (current != NULL) {
6625                 next = current->next;
6626                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6627                 current = next;
6628         }
6629         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6630 }
6631
6632 static isc_boolean_t
6633 issecure(dns_db_t *db) {
6634         dns_rbtdb_t *rbtdb;
6635         isc_boolean_t secure;
6636
6637         rbtdb = (dns_rbtdb_t *)db;
6638
6639         REQUIRE(VALID_RBTDB(rbtdb));
6640
6641         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6642         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6643         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6644
6645         return (secure);
6646 }
6647
6648 static isc_boolean_t
6649 isdnssec(dns_db_t *db) {
6650         dns_rbtdb_t *rbtdb;
6651         isc_boolean_t dnssec;
6652
6653         rbtdb = (dns_rbtdb_t *)db;
6654
6655         REQUIRE(VALID_RBTDB(rbtdb));
6656
6657         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6658         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6659         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6660
6661         return (dnssec);
6662 }
6663
6664 static unsigned int
6665 nodecount(dns_db_t *db) {
6666         dns_rbtdb_t *rbtdb;
6667         unsigned int count;
6668
6669         rbtdb = (dns_rbtdb_t *)db;
6670
6671         REQUIRE(VALID_RBTDB(rbtdb));
6672
6673         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6674         count = dns_rbt_nodecount(rbtdb->tree);
6675         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6676
6677         return (count);
6678 }
6679
6680 static void
6681 settask(dns_db_t *db, isc_task_t *task) {
6682         dns_rbtdb_t *rbtdb;
6683
6684         rbtdb = (dns_rbtdb_t *)db;
6685
6686         REQUIRE(VALID_RBTDB(rbtdb));
6687
6688         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6689         if (rbtdb->task != NULL)
6690                 isc_task_detach(&rbtdb->task);
6691         if (task != NULL)
6692                 isc_task_attach(task, &rbtdb->task);
6693         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6694 }
6695
6696 static isc_boolean_t
6697 ispersistent(dns_db_t *db) {
6698         UNUSED(db);
6699         return (ISC_FALSE);
6700 }
6701
6702 static isc_result_t
6703 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6704         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6705         dns_rbtnode_t *onode;
6706         isc_result_t result = ISC_R_SUCCESS;
6707
6708         REQUIRE(VALID_RBTDB(rbtdb));
6709         REQUIRE(nodep != NULL && *nodep == NULL);
6710
6711         /* Note that the access to origin_node doesn't require a DB lock */
6712         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6713         if (onode != NULL) {
6714                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6715                 new_reference(rbtdb, onode);
6716                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6717
6718                 *nodep = rbtdb->origin_node;
6719         } else {
6720                 INSIST(IS_CACHE(rbtdb));
6721                 result = ISC_R_NOTFOUND;
6722         }
6723
6724         return (result);
6725 }
6726
6727 static isc_result_t
6728 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6729                    isc_uint8_t *flags, isc_uint16_t *iterations,
6730                    unsigned char *salt, size_t *salt_length)
6731 {
6732         dns_rbtdb_t *rbtdb;
6733         isc_result_t result = ISC_R_NOTFOUND;
6734         rbtdb_version_t *rbtversion = version;
6735
6736         rbtdb = (dns_rbtdb_t *)db;
6737
6738         REQUIRE(VALID_RBTDB(rbtdb));
6739         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6740
6741         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6742
6743         if (rbtversion == NULL)
6744                 rbtversion = rbtdb->current_version;
6745
6746         if (rbtversion->havensec3) {
6747                 if (hash != NULL)
6748                         *hash = rbtversion->hash;
6749                 if (salt != NULL && salt_length != NULL) {
6750                         REQUIRE(*salt_length >= rbtversion->salt_length);
6751                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6752                 }
6753                 if (salt_length != NULL)
6754                         *salt_length = rbtversion->salt_length;
6755                 if (iterations != NULL)
6756                         *iterations = rbtversion->iterations;
6757                 if (flags != NULL)
6758                         *flags = rbtversion->flags;
6759                 result = ISC_R_SUCCESS;
6760         }
6761         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6762
6763         return (result);
6764 }
6765
6766 static isc_result_t
6767 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6768         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6769         isc_stdtime_t oldresign;
6770         isc_result_t result = ISC_R_SUCCESS;
6771         rdatasetheader_t *header;
6772
6773         REQUIRE(VALID_RBTDB(rbtdb));
6774         REQUIRE(!IS_CACHE(rbtdb));
6775         REQUIRE(rdataset != NULL);
6776
6777         header = rdataset->private3;
6778         header--;
6779
6780         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6781                   isc_rwlocktype_write);
6782
6783         oldresign = header->resign;
6784         header->resign = resign;
6785         if (header->heap_index != 0) {
6786                 INSIST(RESIGN(header));
6787                 if (resign == 0) {
6788                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6789                                         header->heap_index);
6790                         header->heap_index = 0;
6791                 } else if (resign < oldresign)
6792                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6793                                            header->heap_index);
6794                 else
6795                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6796                                            header->heap_index);
6797         } else if (resign && header->heap_index == 0) {
6798                 header->attributes |= RDATASET_ATTR_RESIGN;
6799                 result = resign_insert(rbtdb, header->node->locknum, header);
6800         }
6801         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6802                     isc_rwlocktype_write);
6803         return (result);
6804 }
6805
6806 static isc_result_t
6807 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6808                dns_name_t *foundname)
6809 {
6810         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6811         rdatasetheader_t *header = NULL, *this;
6812         unsigned int i;
6813         isc_result_t result = ISC_R_NOTFOUND;
6814         unsigned int locknum;
6815
6816         REQUIRE(VALID_RBTDB(rbtdb));
6817
6818         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6819
6820         for (i = 0; i < rbtdb->node_lock_count; i++) {
6821                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6822                 this = isc_heap_element(rbtdb->heaps[i], 1);
6823                 if (this == NULL) {
6824                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6825                                     isc_rwlocktype_read);
6826                         continue;
6827                 }
6828                 if (header == NULL)
6829                         header = this;
6830                 else if (isc_serial_lt(this->resign, header->resign)) {
6831                         locknum = header->node->locknum;
6832                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6833                                     isc_rwlocktype_read);
6834                         header = this;
6835                 } else
6836                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6837                                     isc_rwlocktype_read);
6838         }
6839
6840         if (header == NULL)
6841                 goto unlock;
6842
6843         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6844
6845         if (foundname != NULL)
6846                 dns_rbt_fullnamefromnode(header->node, foundname);
6847
6848         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6849                     isc_rwlocktype_read);
6850
6851         result = ISC_R_SUCCESS;
6852
6853  unlock:
6854         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6855
6856         return (result);
6857 }
6858
6859 static void
6860 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6861 {
6862         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6863         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6864         dns_rbtnode_t *node;
6865         rdatasetheader_t *header;
6866
6867         REQUIRE(VALID_RBTDB(rbtdb));
6868         REQUIRE(rdataset != NULL);
6869         REQUIRE(rdataset->methods == &rdataset_methods);
6870         REQUIRE(rbtdb->future_version == rbtversion);
6871         REQUIRE(rbtversion != NULL);
6872         REQUIRE(rbtversion->writer);
6873         REQUIRE(rbtversion->rbtdb == rbtdb);
6874
6875         node = rdataset->private2;
6876         INSIST(node != NULL);
6877         header = rdataset->private3;
6878         INSIST(header != NULL);
6879         header--;
6880
6881         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6882         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6883                   isc_rwlocktype_write);
6884         /*
6885          * Delete from heap and save to re-signed list so that it can
6886          * be restored if we backout of this change.
6887          */
6888         new_reference(rbtdb, node);
6889         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6890         header->heap_index = 0;
6891         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6892
6893         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6894                     isc_rwlocktype_write);
6895         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6896 }
6897
6898 static dns_stats_t *
6899 getrrsetstats(dns_db_t *db) {
6900         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6901
6902         REQUIRE(VALID_RBTDB(rbtdb));
6903         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6904
6905         return (rbtdb->rrsetstats);
6906 }
6907
6908 static dns_dbmethods_t zone_methods = {
6909         attach,
6910         detach,
6911         beginload,
6912         endload,
6913         dump,
6914         currentversion,
6915         newversion,
6916         attachversion,
6917         closeversion,
6918         findnode,
6919         zone_find,
6920         zone_findzonecut,
6921         attachnode,
6922         detachnode,
6923         expirenode,
6924         printnode,
6925         createiterator,
6926         zone_findrdataset,
6927         allrdatasets,
6928         addrdataset,
6929         subtractrdataset,
6930         deleterdataset,
6931         issecure,
6932         nodecount,
6933         ispersistent,
6934         overmem,
6935         settask,
6936         getoriginnode,
6937         NULL,
6938         getnsec3parameters,
6939         findnsec3node,
6940         setsigningtime,
6941         getsigningtime,
6942         resigned,
6943         isdnssec,
6944         NULL
6945 };
6946
6947 static dns_dbmethods_t cache_methods = {
6948         attach,
6949         detach,
6950         beginload,
6951         endload,
6952         dump,
6953         currentversion,
6954         newversion,
6955         attachversion,
6956         closeversion,
6957         findnode,
6958         cache_find,
6959         cache_findzonecut,
6960         attachnode,
6961         detachnode,
6962         expirenode,
6963         printnode,
6964         createiterator,
6965         cache_findrdataset,
6966         allrdatasets,
6967         addrdataset,
6968         subtractrdataset,
6969         deleterdataset,
6970         issecure,
6971         nodecount,
6972         ispersistent,
6973         overmem,
6974         settask,
6975         getoriginnode,
6976         NULL,
6977         NULL,
6978         NULL,
6979         NULL,
6980         NULL,
6981         NULL,
6982         isdnssec,
6983         getrrsetstats
6984 };
6985
6986 isc_result_t
6987 #ifdef DNS_RBTDB_VERSION64
6988 dns_rbtdb64_create
6989 #else
6990 dns_rbtdb_create
6991 #endif
6992                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6993                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6994                  void *driverarg, dns_db_t **dbp)
6995 {
6996         dns_rbtdb_t *rbtdb;
6997         isc_result_t result;
6998         int i;
6999         dns_name_t name;
7000         isc_boolean_t (*sooner)(void *, void *);
7001         isc_mem_t *hmctx = mctx;
7002
7003         /* Keep the compiler happy. */
7004         UNUSED(driverarg);
7005
7006         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7007         if (rbtdb == NULL)
7008                 return (ISC_R_NOMEMORY);
7009
7010         /*
7011          * If argv[0] exists, it points to a memory context to use for heap
7012          */
7013         if (argc != 0)
7014                 hmctx = (isc_mem_t *) argv[0];
7015
7016         memset(rbtdb, '\0', sizeof(*rbtdb));
7017         dns_name_init(&rbtdb->common.origin, NULL);
7018         rbtdb->common.attributes = 0;
7019         if (type == dns_dbtype_cache) {
7020                 rbtdb->common.methods = &cache_methods;
7021                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7022         } else if (type == dns_dbtype_stub) {
7023                 rbtdb->common.methods = &zone_methods;
7024                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7025         } else
7026                 rbtdb->common.methods = &zone_methods;
7027         rbtdb->common.rdclass = rdclass;
7028         rbtdb->common.mctx = NULL;
7029
7030         result = RBTDB_INITLOCK(&rbtdb->lock);
7031         if (result != ISC_R_SUCCESS)
7032                 goto cleanup_rbtdb;
7033
7034         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7035         if (result != ISC_R_SUCCESS)
7036                 goto cleanup_lock;
7037
7038         /*
7039          * Initialize node_lock_count in a generic way to support future
7040          * extension which allows the user to specify this value on creation.
7041          * Note that when specified for a cache DB it must be larger than 1
7042          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7043          */
7044         if (rbtdb->node_lock_count == 0) {
7045                 if (IS_CACHE(rbtdb))
7046                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7047                 else
7048                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7049         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7050                 result = ISC_R_RANGE;
7051                 goto cleanup_tree_lock;
7052         }
7053         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7054         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7055                                         sizeof(rbtdb_nodelock_t));
7056         if (rbtdb->node_locks == NULL) {
7057                 result = ISC_R_NOMEMORY;
7058                 goto cleanup_tree_lock;
7059         }
7060
7061         rbtdb->rrsetstats = NULL;
7062         if (IS_CACHE(rbtdb)) {
7063                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7064                 if (result != ISC_R_SUCCESS)
7065                         goto cleanup_node_locks;
7066                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7067                                                sizeof(rdatasetheaderlist_t));
7068                 if (rbtdb->rdatasets == NULL) {
7069                         result = ISC_R_NOMEMORY;
7070                         goto cleanup_rrsetstats;
7071                 }
7072                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7073                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
7074         } else
7075                 rbtdb->rdatasets = NULL;
7076
7077         /*
7078          * Create the heaps.
7079          */
7080         rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7081                                    sizeof(isc_heap_t *));
7082         if (rbtdb->heaps == NULL) {
7083                 result = ISC_R_NOMEMORY;
7084                 goto cleanup_rdatasets;
7085         }
7086         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7087                 rbtdb->heaps[i] = NULL;
7088         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7089         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7090                 result = isc_heap_create(hmctx, sooner, set_index, 0,
7091                                          &rbtdb->heaps[i]);
7092                 if (result != ISC_R_SUCCESS)
7093                         goto cleanup_heaps;
7094         }
7095
7096         /*
7097          * Create deadnode lists.
7098          */
7099         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7100                                        sizeof(rbtnodelist_t));
7101         if (rbtdb->deadnodes == NULL) {
7102                 result = ISC_R_NOMEMORY;
7103                 goto cleanup_heaps;
7104         }
7105         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7106                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7107
7108         rbtdb->active = rbtdb->node_lock_count;
7109
7110         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7111                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7112                 if (result == ISC_R_SUCCESS) {
7113                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7114                         if (result != ISC_R_SUCCESS)
7115                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7116                 }
7117                 if (result != ISC_R_SUCCESS) {
7118                         while (i-- > 0) {
7119                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7120                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7121                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7122                         }
7123                         goto cleanup_deadnodes;
7124                 }
7125                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7126         }
7127
7128         /*
7129          * Attach to the mctx.  The database will persist so long as there
7130          * are references to it, and attaching to the mctx ensures that our
7131          * mctx won't disappear out from under us.
7132          */
7133         isc_mem_attach(mctx, &rbtdb->common.mctx);
7134         isc_mem_attach(hmctx, &rbtdb->hmctx);
7135
7136         /*
7137          * Must be initialized before free_rbtdb() is called.
7138          */
7139         isc_ondestroy_init(&rbtdb->common.ondest);
7140
7141         /*
7142          * Make a copy of the origin name.
7143          */
7144         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7145         if (result != ISC_R_SUCCESS) {
7146                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7147                 return (result);
7148         }
7149
7150         /*
7151          * Make the Red-Black Trees.
7152          */
7153         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7154         if (result != ISC_R_SUCCESS) {
7155                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7156                 return (result);
7157         }
7158
7159         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7160         if (result != ISC_R_SUCCESS) {
7161                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7162                 return (result);
7163         }
7164
7165         /*
7166          * In order to set the node callback bit correctly in zone databases,
7167          * we need to know if the node has the origin name of the zone.
7168          * In loading_addrdataset() we could simply compare the new name
7169          * to the origin name, but this is expensive.  Also, we don't know the
7170          * node name in addrdataset(), so we need another way of knowing the
7171          * zone's top.
7172          *
7173          * We now explicitly create a node for the zone's origin, and then
7174          * we simply remember the node's address.  This is safe, because
7175          * the top-of-zone node can never be deleted, nor can its address
7176          * change.
7177          */
7178         if (!IS_CACHE(rbtdb)) {
7179                 dns_rbtnode_t *nsec3node;
7180
7181                 rbtdb->origin_node = NULL;
7182                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7183                                          &rbtdb->origin_node);
7184                 if (result != ISC_R_SUCCESS) {
7185                         INSIST(result != ISC_R_EXISTS);
7186                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7187                         return (result);
7188                 }
7189                 rbtdb->origin_node->nsec3 = 0;
7190                 /*
7191                  * We need to give the origin node the right locknum.
7192                  */
7193                 dns_name_init(&name, NULL);
7194                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7195 #ifdef DNS_RBT_USEHASH
7196                 rbtdb->origin_node->locknum =
7197                         rbtdb->origin_node->hashval %
7198                         rbtdb->node_lock_count;
7199 #else
7200                 rbtdb->origin_node->locknum =
7201                         dns_name_hash(&name, ISC_TRUE) %
7202                         rbtdb->node_lock_count;
7203 #endif
7204                 /*
7205                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7206                  * return partial matches when there is only a single NSEC3
7207                  * record in the tree.
7208                  */
7209                 nsec3node = NULL;
7210                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7211                                          &nsec3node);
7212                 if (result != ISC_R_SUCCESS) {
7213                         INSIST(result != ISC_R_EXISTS);
7214                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7215                         return (result);
7216                 }
7217                 nsec3node->nsec3 = 1;
7218                 /*
7219                  * We need to give the nsec3 origin node the right locknum.
7220                  */
7221                 dns_name_init(&name, NULL);
7222                 dns_rbt_namefromnode(nsec3node, &name);
7223 #ifdef DNS_RBT_USEHASH
7224                 nsec3node->locknum = nsec3node->hashval %
7225                         rbtdb->node_lock_count;
7226 #else
7227                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7228                         rbtdb->node_lock_count;
7229 #endif
7230         }
7231
7232         /*
7233          * Misc. Initialization.
7234          */
7235         result = isc_refcount_init(&rbtdb->references, 1);
7236         if (result != ISC_R_SUCCESS) {
7237                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7238                 return (result);
7239         }
7240         rbtdb->attributes = 0;
7241         rbtdb->task = NULL;
7242
7243         /*
7244          * Version Initialization.
7245          */
7246         rbtdb->current_serial = 1;
7247         rbtdb->least_serial = 1;
7248         rbtdb->next_serial = 2;
7249         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7250         if (rbtdb->current_version == NULL) {
7251                 isc_refcount_decrement(&rbtdb->references, NULL);
7252                 isc_refcount_destroy(&rbtdb->references);
7253                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7254                 return (ISC_R_NOMEMORY);
7255         }
7256         rbtdb->current_version->rbtdb = rbtdb;
7257         rbtdb->current_version->secure = dns_db_insecure;
7258         rbtdb->current_version->havensec3 = ISC_FALSE;
7259         rbtdb->current_version->flags = 0;
7260         rbtdb->current_version->iterations = 0;
7261         rbtdb->current_version->hash = 0;
7262         rbtdb->current_version->salt_length = 0;
7263         memset(rbtdb->current_version->salt, 0,
7264                sizeof(rbtdb->current_version->salt));
7265         rbtdb->future_version = NULL;
7266         ISC_LIST_INIT(rbtdb->open_versions);
7267         /*
7268          * Keep the current version in the open list so that list operation
7269          * won't happen in normal lookup operations.
7270          */
7271         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7272
7273         rbtdb->common.magic = DNS_DB_MAGIC;
7274         rbtdb->common.impmagic = RBTDB_MAGIC;
7275
7276         *dbp = (dns_db_t *)rbtdb;
7277
7278         return (ISC_R_SUCCESS);
7279
7280  cleanup_deadnodes:
7281         isc_mem_put(mctx, rbtdb->deadnodes,
7282                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7283
7284  cleanup_heaps:
7285         if (rbtdb->heaps != NULL) {
7286                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7287                         if (rbtdb->heaps[i] != NULL)
7288                                 isc_heap_destroy(&rbtdb->heaps[i]);
7289                 isc_mem_put(hmctx, rbtdb->heaps,
7290                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7291         }
7292
7293  cleanup_rdatasets:
7294         if (rbtdb->rdatasets != NULL)
7295                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7296                             sizeof(rdatasetheaderlist_t));
7297  cleanup_rrsetstats:
7298         if (rbtdb->rrsetstats != NULL)
7299                 dns_stats_detach(&rbtdb->rrsetstats);
7300
7301  cleanup_node_locks:
7302         isc_mem_put(mctx, rbtdb->node_locks,
7303                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7304
7305  cleanup_tree_lock:
7306         isc_rwlock_destroy(&rbtdb->tree_lock);
7307
7308  cleanup_lock:
7309         RBTDB_DESTROYLOCK(&rbtdb->lock);
7310
7311  cleanup_rbtdb:
7312         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7313         return (result);
7314 }
7315
7316
7317 /*
7318  * Slabbed Rdataset Methods
7319  */
7320
7321 static void
7322 rdataset_disassociate(dns_rdataset_t *rdataset) {
7323         dns_db_t *db = rdataset->private1;
7324         dns_dbnode_t *node = rdataset->private2;
7325
7326         detachnode(db, &node);
7327 }
7328
7329 static isc_result_t
7330 rdataset_first(dns_rdataset_t *rdataset) {
7331         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7332         unsigned int count;
7333
7334         count = raw[0] * 256 + raw[1];
7335         if (count == 0) {
7336                 rdataset->private5 = NULL;
7337                 return (ISC_R_NOMORE);
7338         }
7339
7340 #if DNS_RDATASET_FIXED
7341         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7342                 raw += 2 + (4 * count);
7343         else
7344 #endif
7345                 raw += 2;
7346
7347         /*
7348          * The privateuint4 field is the number of rdata beyond the
7349          * cursor position, so we decrement the total count by one
7350          * before storing it.
7351          *
7352          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7353          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7354          * to the first entry in the offset table.
7355          */
7356         count--;
7357         rdataset->privateuint4 = count;
7358         rdataset->private5 = raw;
7359
7360         return (ISC_R_SUCCESS);
7361 }
7362
7363 static isc_result_t
7364 rdataset_next(dns_rdataset_t *rdataset) {
7365         unsigned int count;
7366         unsigned int length;
7367         unsigned char *raw;     /* RDATASLAB */
7368
7369         count = rdataset->privateuint4;
7370         if (count == 0)
7371                 return (ISC_R_NOMORE);
7372         count--;
7373         rdataset->privateuint4 = count;
7374
7375         /*
7376          * Skip forward one record (length + 4) or one offset (4).
7377          */
7378         raw = rdataset->private5;
7379 #if DNS_RDATASET_FIXED
7380         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7381 #endif
7382                 length = raw[0] * 256 + raw[1];
7383                 raw += length;
7384 #if DNS_RDATASET_FIXED
7385         }
7386         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7387 #else
7388         rdataset->private5 = raw + 2;           /* length(2) */
7389 #endif
7390
7391         return (ISC_R_SUCCESS);
7392 }
7393
7394 static void
7395 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7396         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7397 #if DNS_RDATASET_FIXED
7398         unsigned int offset;
7399 #endif
7400         unsigned int length;
7401         isc_region_t r;
7402         unsigned int flags = 0;
7403
7404         REQUIRE(raw != NULL);
7405
7406         /*
7407          * Find the start of the record if not already in private5
7408          * then skip the length and order fields.
7409          */
7410 #if DNS_RDATASET_FIXED
7411         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7412                 offset = (raw[0] << 24) + (raw[1] << 16) +
7413                          (raw[2] << 8) + raw[3];
7414                 raw = rdataset->private3;
7415                 raw += offset;
7416         }
7417 #endif
7418         length = raw[0] * 256 + raw[1];
7419 #if DNS_RDATASET_FIXED
7420         raw += 4;
7421 #else
7422         raw += 2;
7423 #endif
7424         if (rdataset->type == dns_rdatatype_rrsig) {
7425                 if (*raw & DNS_RDATASLAB_OFFLINE)
7426                         flags |= DNS_RDATA_OFFLINE;
7427                 length--;
7428                 raw++;
7429         }
7430         r.length = length;
7431         r.base = raw;
7432         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7433         rdata->flags |= flags;
7434 }
7435
7436 static void
7437 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7438         dns_db_t *db = source->private1;
7439         dns_dbnode_t *node = source->private2;
7440         dns_dbnode_t *cloned_node = NULL;
7441
7442         attachnode(db, node, &cloned_node);
7443         *target = *source;
7444
7445         /*
7446          * Reset iterator state.
7447          */
7448         target->privateuint4 = 0;
7449         target->private5 = NULL;
7450 }
7451
7452 static unsigned int
7453 rdataset_count(dns_rdataset_t *rdataset) {
7454         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7455         unsigned int count;
7456
7457         count = raw[0] * 256 + raw[1];
7458
7459         return (count);
7460 }
7461
7462 static isc_result_t
7463 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7464                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7465 {
7466         dns_db_t *db = rdataset->private1;
7467         dns_dbnode_t *node = rdataset->private2;
7468         dns_dbnode_t *cloned_node;
7469         struct noqname *noqname = rdataset->private6;
7470
7471         cloned_node = NULL;
7472         attachnode(db, node, &cloned_node);
7473         nsec->methods = &rdataset_methods;
7474         nsec->rdclass = db->rdclass;
7475         nsec->type = noqname->type;
7476         nsec->covers = 0;
7477         nsec->ttl = rdataset->ttl;
7478         nsec->trust = rdataset->trust;
7479         nsec->private1 = rdataset->private1;
7480         nsec->private2 = rdataset->private2;
7481         nsec->private3 = noqname->neg;
7482         nsec->privateuint4 = 0;
7483         nsec->private5 = NULL;
7484         nsec->private6 = NULL;
7485         nsec->private7 = NULL;
7486
7487         cloned_node = NULL;
7488         attachnode(db, node, &cloned_node);
7489         nsecsig->methods = &rdataset_methods;
7490         nsecsig->rdclass = db->rdclass;
7491         nsecsig->type = dns_rdatatype_rrsig;
7492         nsecsig->covers = noqname->type;
7493         nsecsig->ttl = rdataset->ttl;
7494         nsecsig->trust = rdataset->trust;
7495         nsecsig->private1 = rdataset->private1;
7496         nsecsig->private2 = rdataset->private2;
7497         nsecsig->private3 = noqname->negsig;
7498         nsecsig->privateuint4 = 0;
7499         nsecsig->private5 = NULL;
7500         nsec->private6 = NULL;
7501         nsec->private7 = NULL;
7502
7503         dns_name_clone(&noqname->name, name);
7504
7505         return (ISC_R_SUCCESS);
7506 }
7507
7508 static isc_result_t
7509 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7510                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7511 {
7512         dns_db_t *db = rdataset->private1;
7513         dns_dbnode_t *node = rdataset->private2;
7514         dns_dbnode_t *cloned_node;
7515         struct noqname *closest = rdataset->private7;
7516
7517         cloned_node = NULL;
7518         attachnode(db, node, &cloned_node);
7519         nsec->methods = &rdataset_methods;
7520         nsec->rdclass = db->rdclass;
7521         nsec->type = closest->type;
7522         nsec->covers = 0;
7523         nsec->ttl = rdataset->ttl;
7524         nsec->trust = rdataset->trust;
7525         nsec->private1 = rdataset->private1;
7526         nsec->private2 = rdataset->private2;
7527         nsec->private3 = closest->neg;
7528         nsec->privateuint4 = 0;
7529         nsec->private5 = NULL;
7530         nsec->private6 = NULL;
7531         nsec->private7 = NULL;
7532
7533         cloned_node = NULL;
7534         attachnode(db, node, &cloned_node);
7535         nsecsig->methods = &rdataset_methods;
7536         nsecsig->rdclass = db->rdclass;
7537         nsecsig->type = dns_rdatatype_rrsig;
7538         nsecsig->covers = closest->type;
7539         nsecsig->ttl = rdataset->ttl;
7540         nsecsig->trust = rdataset->trust;
7541         nsecsig->private1 = rdataset->private1;
7542         nsecsig->private2 = rdataset->private2;
7543         nsecsig->private3 = closest->negsig;
7544         nsecsig->privateuint4 = 0;
7545         nsecsig->private5 = NULL;
7546         nsec->private6 = NULL;
7547         nsec->private7 = NULL;
7548
7549         dns_name_clone(&closest->name, name);
7550
7551         return (ISC_R_SUCCESS);
7552 }
7553
7554 static void
7555 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7556         dns_rbtdb_t *rbtdb = rdataset->private1;
7557         dns_rbtnode_t *rbtnode = rdataset->private2;
7558         rdatasetheader_t *header = rdataset->private3;
7559
7560         header--;
7561         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7562                   isc_rwlocktype_write);
7563         header->trust = rdataset->trust = trust;
7564         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7565                   isc_rwlocktype_write);
7566 }
7567
7568 static void
7569 rdataset_expire(dns_rdataset_t *rdataset) {
7570         dns_rbtdb_t *rbtdb = rdataset->private1;
7571         dns_rbtnode_t *rbtnode = rdataset->private2;
7572         rdatasetheader_t *header = rdataset->private3;
7573
7574         header--;
7575         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7576                   isc_rwlocktype_write);
7577         expire_header(rbtdb, header, ISC_FALSE);
7578         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7579                   isc_rwlocktype_write);
7580 }
7581
7582 /*
7583  * Rdataset Iterator Methods
7584  */
7585
7586 static void
7587 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7588         rbtdb_rdatasetiter_t *rbtiterator;
7589
7590         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7591
7592         if (rbtiterator->common.version != NULL)
7593                 closeversion(rbtiterator->common.db,
7594                              &rbtiterator->common.version, ISC_FALSE);
7595         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7596         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7597                     sizeof(*rbtiterator));
7598
7599         *iteratorp = NULL;
7600 }
7601
7602 static isc_result_t
7603 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7604         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7605         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7606         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7607         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7608         rdatasetheader_t *header, *top_next;
7609         rbtdb_serial_t serial;
7610         isc_stdtime_t now;
7611
7612         if (IS_CACHE(rbtdb)) {
7613                 serial = 1;
7614                 now = rbtiterator->common.now;
7615         } else {
7616                 serial = rbtversion->serial;
7617                 now = 0;
7618         }
7619
7620         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7621                   isc_rwlocktype_read);
7622
7623         for (header = rbtnode->data; header != NULL; header = top_next) {
7624                 top_next = header->next;
7625                 do {
7626                         if (header->serial <= serial && !IGNORE(header)) {
7627                                 /*
7628                                  * Is this a "this rdataset doesn't exist"
7629                                  * record?  Or is it too old in the cache?
7630                                  *
7631                                  * Note: unlike everywhere else, we
7632                                  * check for now > header->rdh_ttl instead
7633                                  * of now >= header->rdh_ttl.  This allows
7634                                  * ANY and RRSIG queries for 0 TTL
7635                                  * rdatasets to work.
7636                                  */
7637                                 if (NONEXISTENT(header) ||
7638                                     (now != 0 && now > header->rdh_ttl))
7639                                         header = NULL;
7640                                 break;
7641                         } else
7642                                 header = header->down;
7643                 } while (header != NULL);
7644                 if (header != NULL)
7645                         break;
7646         }
7647
7648         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7649                     isc_rwlocktype_read);
7650
7651         rbtiterator->current = header;
7652
7653         if (header == NULL)
7654                 return (ISC_R_NOMORE);
7655
7656         return (ISC_R_SUCCESS);
7657 }
7658
7659 static isc_result_t
7660 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7661         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7662         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7663         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7664         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7665         rdatasetheader_t *header, *top_next;
7666         rbtdb_serial_t serial;
7667         isc_stdtime_t now;
7668         rbtdb_rdatatype_t type, negtype;
7669         dns_rdatatype_t rdtype, covers;
7670
7671         header = rbtiterator->current;
7672         if (header == NULL)
7673                 return (ISC_R_NOMORE);
7674
7675         if (IS_CACHE(rbtdb)) {
7676                 serial = 1;
7677                 now = rbtiterator->common.now;
7678         } else {
7679                 serial = rbtversion->serial;
7680                 now = 0;
7681         }
7682
7683         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7684                   isc_rwlocktype_read);
7685
7686         type = header->type;
7687         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7688         if (NEGATIVE(header)) {
7689                 covers = RBTDB_RDATATYPE_EXT(header->type);
7690                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7691         } else
7692                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7693         for (header = header->next; header != NULL; header = top_next) {
7694                 top_next = header->next;
7695                 /*
7696                  * If not walking back up the down list.
7697                  */
7698                 if (header->type != type && header->type != negtype) {
7699                         do {
7700                                 if (header->serial <= serial &&
7701                                     !IGNORE(header)) {
7702                                         /*
7703                                          * Is this a "this rdataset doesn't
7704                                          * exist" record?
7705                                          *
7706                                          * Note: unlike everywhere else, we
7707                                          * check for now > header->ttl instead
7708                                          * of now >= header->ttl.  This allows
7709                                          * ANY and RRSIG queries for 0 TTL
7710                                          * rdatasets to work.
7711                                          */
7712                                         if ((header->attributes &
7713                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7714                                             (now != 0 && now > header->rdh_ttl))
7715                                                 header = NULL;
7716                                         break;
7717                                 } else
7718                                         header = header->down;
7719                         } while (header != NULL);
7720                         if (header != NULL)
7721                                 break;
7722                 }
7723         }
7724
7725         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7726                     isc_rwlocktype_read);
7727
7728         rbtiterator->current = header;
7729
7730         if (header == NULL)
7731                 return (ISC_R_NOMORE);
7732
7733         return (ISC_R_SUCCESS);
7734 }
7735
7736 static void
7737 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7738         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7739         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7740         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7741         rdatasetheader_t *header;
7742
7743         header = rbtiterator->current;
7744         REQUIRE(header != NULL);
7745
7746         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7747                   isc_rwlocktype_read);
7748
7749         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7750                       rdataset);
7751
7752         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7753                     isc_rwlocktype_read);
7754 }
7755
7756
7757 /*
7758  * Database Iterator Methods
7759  */
7760
7761 static inline void
7762 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7763         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7764         dns_rbtnode_t *node = rbtdbiter->node;
7765
7766         if (node == NULL)
7767                 return;
7768
7769         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7770         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7771 }
7772
7773 static inline void
7774 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7775         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7776         dns_rbtnode_t *node = rbtdbiter->node;
7777         nodelock_t *lock;
7778
7779         if (node == NULL)
7780                 return;
7781
7782         lock = &rbtdb->node_locks[node->locknum].lock;
7783         NODE_LOCK(lock, isc_rwlocktype_read);
7784         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7785                             rbtdbiter->tree_locked, ISC_FALSE);
7786         NODE_UNLOCK(lock, isc_rwlocktype_read);
7787
7788         rbtdbiter->node = NULL;
7789 }
7790
7791 static void
7792 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7793         dns_rbtnode_t *node;
7794         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7795         isc_boolean_t was_read_locked = ISC_FALSE;
7796         nodelock_t *lock;
7797         int i;
7798
7799         if (rbtdbiter->delete != 0) {
7800                 /*
7801                  * Note that "%d node of %d in tree" can report things like
7802                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7803                  * That some nodes appear on the deletions list more than
7804                  * once.  Only the last occurence will actually be deleted.
7805                  */
7806                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7807                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7808                               "flush_deletions: %d nodes of %d in tree",
7809                               rbtdbiter->delete,
7810                               dns_rbt_nodecount(rbtdb->tree));
7811
7812                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7813                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7814                         was_read_locked = ISC_TRUE;
7815                 }
7816                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7817                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7818
7819                 for (i = 0; i < rbtdbiter->delete; i++) {
7820                         node = rbtdbiter->deletions[i];
7821                         lock = &rbtdb->node_locks[node->locknum].lock;
7822
7823                         NODE_LOCK(lock, isc_rwlocktype_read);
7824                         decrement_reference(rbtdb, node, 0,
7825                                             isc_rwlocktype_read,
7826                                             rbtdbiter->tree_locked, ISC_FALSE);
7827                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7828                 }
7829
7830                 rbtdbiter->delete = 0;
7831
7832                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7833                 if (was_read_locked) {
7834                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7835                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7836
7837                 } else {
7838                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7839                 }
7840         }
7841 }
7842
7843 static inline void
7844 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7845         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7846
7847         REQUIRE(rbtdbiter->paused);
7848         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7849
7850         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7851         rbtdbiter->tree_locked = isc_rwlocktype_read;
7852
7853         rbtdbiter->paused = ISC_FALSE;
7854 }
7855
7856 static void
7857 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7858         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7859         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7860         dns_db_t *db = NULL;
7861
7862         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7863                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7864                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7865         } else
7866                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7867
7868         dereference_iter_node(rbtdbiter);
7869
7870         flush_deletions(rbtdbiter);
7871
7872         dns_db_attach(rbtdbiter->common.db, &db);
7873         dns_db_detach(&rbtdbiter->common.db);
7874
7875         dns_rbtnodechain_reset(&rbtdbiter->chain);
7876         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7877         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7878         dns_db_detach(&db);
7879
7880         *iteratorp = NULL;
7881 }
7882
7883 static isc_result_t
7884 dbiterator_first(dns_dbiterator_t *iterator) {
7885         isc_result_t result;
7886         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7887         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7888         dns_name_t *name, *origin;
7889
7890         if (rbtdbiter->result != ISC_R_SUCCESS &&
7891             rbtdbiter->result != ISC_R_NOMORE)
7892                 return (rbtdbiter->result);
7893
7894         if (rbtdbiter->paused)
7895                 resume_iteration(rbtdbiter);
7896
7897         dereference_iter_node(rbtdbiter);
7898
7899         name = dns_fixedname_name(&rbtdbiter->name);
7900         origin = dns_fixedname_name(&rbtdbiter->origin);
7901         dns_rbtnodechain_reset(&rbtdbiter->chain);
7902         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7903
7904         if (rbtdbiter->nsec3only) {
7905                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7906                 result = dns_rbtnodechain_first(rbtdbiter->current,
7907                                                 rbtdb->nsec3, name, origin);
7908         } else {
7909                 rbtdbiter->current = &rbtdbiter->chain;
7910                 result = dns_rbtnodechain_first(rbtdbiter->current,
7911                                                 rbtdb->tree, name, origin);
7912                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7913                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7914                         result = dns_rbtnodechain_first(rbtdbiter->current,
7915                                                         rbtdb->nsec3, name,
7916                                                         origin);
7917                 }
7918         }
7919         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7920                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7921                                                   NULL, &rbtdbiter->node);
7922                 if (result == ISC_R_SUCCESS) {
7923                         rbtdbiter->new_origin = ISC_TRUE;
7924                         reference_iter_node(rbtdbiter);
7925                 }
7926         } else {
7927                 INSIST(result == ISC_R_NOTFOUND);
7928                 result = ISC_R_NOMORE; /* The tree is empty. */
7929         }
7930
7931         rbtdbiter->result = result;
7932
7933         return (result);
7934 }
7935
7936 static isc_result_t
7937 dbiterator_last(dns_dbiterator_t *iterator) {
7938         isc_result_t result;
7939         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7940         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7941         dns_name_t *name, *origin;
7942
7943         if (rbtdbiter->result != ISC_R_SUCCESS &&
7944             rbtdbiter->result != ISC_R_NOMORE)
7945                 return (rbtdbiter->result);
7946
7947         if (rbtdbiter->paused)
7948                 resume_iteration(rbtdbiter);
7949
7950         dereference_iter_node(rbtdbiter);
7951
7952         name = dns_fixedname_name(&rbtdbiter->name);
7953         origin = dns_fixedname_name(&rbtdbiter->origin);
7954         dns_rbtnodechain_reset(&rbtdbiter->chain);
7955         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7956
7957         result = ISC_R_NOTFOUND;
7958         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7959                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7960                 result = dns_rbtnodechain_last(rbtdbiter->current,
7961                                                rbtdb->nsec3, name, origin);
7962         }
7963         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7964                 rbtdbiter->current = &rbtdbiter->chain;
7965                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7966                                                name, origin);
7967         }
7968         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7969                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7970                                                   NULL, &rbtdbiter->node);
7971                 if (result == ISC_R_SUCCESS) {
7972                         rbtdbiter->new_origin = ISC_TRUE;
7973                         reference_iter_node(rbtdbiter);
7974                 }
7975         } else {
7976                 INSIST(result == ISC_R_NOTFOUND);
7977                 result = ISC_R_NOMORE; /* The tree is empty. */
7978         }
7979
7980         rbtdbiter->result = result;
7981
7982         return (result);
7983 }
7984
7985 static isc_result_t
7986 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7987         isc_result_t result, tresult;
7988         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7989         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7990         dns_name_t *iname, *origin;
7991
7992         if (rbtdbiter->result != ISC_R_SUCCESS &&
7993             rbtdbiter->result != ISC_R_NOTFOUND &&
7994             rbtdbiter->result != ISC_R_NOMORE)
7995                 return (rbtdbiter->result);
7996
7997         if (rbtdbiter->paused)
7998                 resume_iteration(rbtdbiter);
7999
8000         dereference_iter_node(rbtdbiter);
8001
8002         iname = dns_fixedname_name(&rbtdbiter->name);
8003         origin = dns_fixedname_name(&rbtdbiter->origin);
8004         dns_rbtnodechain_reset(&rbtdbiter->chain);
8005         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8006
8007         if (rbtdbiter->nsec3only) {
8008                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8009                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8010                                           &rbtdbiter->node,
8011                                           rbtdbiter->current,
8012                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8013         } else if (rbtdbiter->nonsec3) {
8014                 rbtdbiter->current = &rbtdbiter->chain;
8015                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8016                                           &rbtdbiter->node,
8017                                           rbtdbiter->current,
8018                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8019         } else {
8020                 /*
8021                  * Stay on main chain if not found on either chain.
8022                  */
8023                 rbtdbiter->current = &rbtdbiter->chain;
8024                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8025                                           &rbtdbiter->node,
8026                                           rbtdbiter->current,
8027                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8028                 if (result == DNS_R_PARTIALMATCH) {
8029                         dns_rbtnode_t *node = NULL;
8030                         tresult = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8031                                                   &node, &rbtdbiter->nsec3chain,
8032                                                   DNS_RBTFIND_EMPTYDATA,
8033                                                   NULL, NULL);
8034                         if (tresult == ISC_R_SUCCESS) {
8035                                 rbtdbiter->node = node;
8036                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8037                                 result = tresult;
8038                         }
8039                 }
8040         }
8041
8042 #if 1
8043         if (result == ISC_R_SUCCESS) {
8044                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8045                                                   origin, NULL);
8046                 if (result == ISC_R_SUCCESS) {
8047                         rbtdbiter->new_origin = ISC_TRUE;
8048                         reference_iter_node(rbtdbiter);
8049                 }
8050         } else if (result == DNS_R_PARTIALMATCH) {
8051                 result = ISC_R_NOTFOUND;
8052                 rbtdbiter->node = NULL;
8053         }
8054
8055         rbtdbiter->result = result;
8056 #else
8057         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8058                 isc_result_t tresult;
8059                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8060                                                    origin, NULL);
8061                 if (tresult == ISC_R_SUCCESS) {
8062                         rbtdbiter->new_origin = ISC_TRUE;
8063                         reference_iter_node(rbtdbiter);
8064                 } else {
8065                         result = tresult;
8066                         rbtdbiter->node = NULL;
8067                 }
8068         } else
8069                 rbtdbiter->node = NULL;
8070
8071         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8072                             ISC_R_SUCCESS : result;
8073 #endif
8074
8075         return (result);
8076 }
8077
8078 static isc_result_t
8079 dbiterator_prev(dns_dbiterator_t *iterator) {
8080         isc_result_t result;
8081         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8082         dns_name_t *name, *origin;
8083         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8084
8085         REQUIRE(rbtdbiter->node != NULL);
8086
8087         if (rbtdbiter->result != ISC_R_SUCCESS)
8088                 return (rbtdbiter->result);
8089
8090         if (rbtdbiter->paused)
8091                 resume_iteration(rbtdbiter);
8092
8093         name = dns_fixedname_name(&rbtdbiter->name);
8094         origin = dns_fixedname_name(&rbtdbiter->origin);
8095         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8096         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8097             !rbtdbiter->nonsec3 &&
8098             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8099                 rbtdbiter->current = &rbtdbiter->chain;
8100                 dns_rbtnodechain_reset(rbtdbiter->current);
8101                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8102                                                name, origin);
8103                 if (result == ISC_R_NOTFOUND)
8104                         result = ISC_R_NOMORE;
8105         }
8106
8107         dereference_iter_node(rbtdbiter);
8108
8109         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8110                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8111                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8112                                                   NULL, &rbtdbiter->node);
8113         }
8114
8115         if (result == ISC_R_SUCCESS)
8116                 reference_iter_node(rbtdbiter);
8117
8118         rbtdbiter->result = result;
8119
8120         return (result);
8121 }
8122
8123 static isc_result_t
8124 dbiterator_next(dns_dbiterator_t *iterator) {
8125         isc_result_t result;
8126         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8127         dns_name_t *name, *origin;
8128         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8129
8130         REQUIRE(rbtdbiter->node != NULL);
8131
8132         if (rbtdbiter->result != ISC_R_SUCCESS)
8133                 return (rbtdbiter->result);
8134
8135         if (rbtdbiter->paused)
8136                 resume_iteration(rbtdbiter);
8137
8138         name = dns_fixedname_name(&rbtdbiter->name);
8139         origin = dns_fixedname_name(&rbtdbiter->origin);
8140         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8141         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8142             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8143                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8144                 dns_rbtnodechain_reset(rbtdbiter->current);
8145                 result = dns_rbtnodechain_first(rbtdbiter->current,
8146                                                 rbtdb->nsec3, name, origin);
8147                 if (result == ISC_R_NOTFOUND)
8148                         result = ISC_R_NOMORE;
8149         }
8150
8151         dereference_iter_node(rbtdbiter);
8152
8153         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8154                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8155                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8156                                                   NULL, &rbtdbiter->node);
8157         }
8158         if (result == ISC_R_SUCCESS)
8159                 reference_iter_node(rbtdbiter);
8160
8161         rbtdbiter->result = result;
8162
8163         return (result);
8164 }
8165
8166 static isc_result_t
8167 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8168                    dns_name_t *name)
8169 {
8170         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8171         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8172         dns_rbtnode_t *node = rbtdbiter->node;
8173         isc_result_t result;
8174         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8175         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8176
8177         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8178         REQUIRE(rbtdbiter->node != NULL);
8179
8180         if (rbtdbiter->paused)
8181                 resume_iteration(rbtdbiter);
8182
8183         if (name != NULL) {
8184                 if (rbtdbiter->common.relative_names)
8185                         origin = NULL;
8186                 result = dns_name_concatenate(nodename, origin, name, NULL);
8187                 if (result != ISC_R_SUCCESS)
8188                         return (result);
8189                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8190                         result = DNS_R_NEWORIGIN;
8191         } else
8192                 result = ISC_R_SUCCESS;
8193
8194         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8195         new_reference(rbtdb, node);
8196         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8197
8198         *nodep = rbtdbiter->node;
8199
8200         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8201                 isc_result_t expire_result;
8202
8203                 /*
8204                  * If the deletion array is full, flush it before trying
8205                  * to expire the current node.  The current node can't
8206                  * fully deleted while the iteration cursor is still on it.
8207                  */
8208                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8209                         flush_deletions(rbtdbiter);
8210
8211                 expire_result = expirenode(iterator->db, *nodep, 0);
8212
8213                 /*
8214                  * expirenode() currently always returns success.
8215                  */
8216                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8217                         unsigned int refs;
8218
8219                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8220                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8221                         dns_rbtnode_refincrement(node, &refs);
8222                         INSIST(refs != 0);
8223                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8224                 }
8225         }
8226
8227         return (result);
8228 }
8229
8230 static isc_result_t
8231 dbiterator_pause(dns_dbiterator_t *iterator) {
8232         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8233         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8234
8235         if (rbtdbiter->result != ISC_R_SUCCESS &&
8236             rbtdbiter->result != ISC_R_NOMORE)
8237                 return (rbtdbiter->result);
8238
8239         if (rbtdbiter->paused)
8240                 return (ISC_R_SUCCESS);
8241
8242         rbtdbiter->paused = ISC_TRUE;
8243
8244         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8245                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8246                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8247                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8248         }
8249
8250         flush_deletions(rbtdbiter);
8251
8252         return (ISC_R_SUCCESS);
8253 }
8254
8255 static isc_result_t
8256 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8257         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8258         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8259
8260         if (rbtdbiter->result != ISC_R_SUCCESS)
8261                 return (rbtdbiter->result);
8262
8263         return (dns_name_copy(origin, name, NULL));
8264 }
8265
8266 /*%
8267  * Additional cache routines.
8268  */
8269 static isc_result_t
8270 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8271                        dns_rdatatype_t qtype, dns_acache_t *acache,
8272                        dns_zone_t **zonep, dns_db_t **dbp,
8273                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8274                        dns_name_t *fname, dns_message_t *msg,
8275                        isc_stdtime_t now)
8276 {
8277         dns_rbtdb_t *rbtdb = rdataset->private1;
8278         dns_rbtnode_t *rbtnode = rdataset->private2;
8279         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8280         unsigned int current_count = rdataset->privateuint4;
8281         unsigned int count;
8282         rdatasetheader_t *header;
8283         nodelock_t *nodelock;
8284         unsigned int total_count;
8285         acachectl_t *acarray;
8286         dns_acacheentry_t *entry;
8287         isc_result_t result;
8288
8289         UNUSED(qtype); /* we do not use this value at least for now */
8290         UNUSED(acache);
8291
8292         header = (struct rdatasetheader *)(raw - sizeof(*header));
8293
8294         total_count = raw[0] * 256 + raw[1];
8295         INSIST(total_count > current_count);
8296         count = total_count - current_count - 1;
8297
8298         acarray = NULL;
8299
8300         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8301         NODE_LOCK(nodelock, isc_rwlocktype_read);
8302
8303         switch (type) {
8304         case dns_rdatasetadditional_fromauth:
8305                 acarray = header->additional_auth;
8306                 break;
8307         case dns_rdatasetadditional_fromcache:
8308                 acarray = NULL;
8309                 break;
8310         case dns_rdatasetadditional_fromglue:
8311                 acarray = header->additional_glue;
8312                 break;
8313         default:
8314                 INSIST(0);
8315         }
8316
8317         if (acarray == NULL) {
8318                 if (type != dns_rdatasetadditional_fromcache)
8319                         dns_acache_countquerymiss(acache);
8320                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8321                 return (ISC_R_NOTFOUND);
8322         }
8323
8324         if (acarray[count].entry == NULL) {
8325                 dns_acache_countquerymiss(acache);
8326                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8327                 return (ISC_R_NOTFOUND);
8328         }
8329
8330         entry = NULL;
8331         dns_acache_attachentry(acarray[count].entry, &entry);
8332
8333         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8334
8335         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8336                                      nodep, fname, msg, now);
8337
8338         dns_acache_detachentry(&entry);
8339
8340         return (result);
8341 }
8342
8343 static void
8344 acache_callback(dns_acacheentry_t *entry, void **arg) {
8345         dns_rbtdb_t *rbtdb;
8346         dns_rbtnode_t *rbtnode;
8347         nodelock_t *nodelock;
8348         acachectl_t *acarray = NULL;
8349         acache_cbarg_t *cbarg;
8350         unsigned int count;
8351
8352         REQUIRE(arg != NULL);
8353         cbarg = *arg;
8354
8355         /*
8356          * The caller must hold the entry lock.
8357          */
8358
8359         rbtdb = (dns_rbtdb_t *)cbarg->db;
8360         rbtnode = (dns_rbtnode_t *)cbarg->node;
8361
8362         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8363         NODE_LOCK(nodelock, isc_rwlocktype_write);
8364
8365         switch (cbarg->type) {
8366         case dns_rdatasetadditional_fromauth:
8367                 acarray = cbarg->header->additional_auth;
8368                 break;
8369         case dns_rdatasetadditional_fromglue:
8370                 acarray = cbarg->header->additional_glue;
8371                 break;
8372         default:
8373                 INSIST(0);
8374         }
8375
8376         count = cbarg->count;
8377         if (acarray != NULL && acarray[count].entry == entry) {
8378                 acarray[count].entry = NULL;
8379                 INSIST(acarray[count].cbarg == cbarg);
8380                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8381                 acarray[count].cbarg = NULL;
8382         } else
8383                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8384
8385         dns_acache_detachentry(&entry);
8386
8387         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8388
8389         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8390         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8391
8392         *arg = NULL;
8393 }
8394
8395 static void
8396 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8397                       acache_cbarg_t **cbargp)
8398 {
8399         acache_cbarg_t *cbarg;
8400
8401         REQUIRE(mctx != NULL);
8402         REQUIRE(entry != NULL);
8403         REQUIRE(cbargp != NULL && *cbargp != NULL);
8404
8405         cbarg = *cbargp;
8406
8407         dns_acache_cancelentry(entry);
8408         dns_db_detachnode(cbarg->db, &cbarg->node);
8409         dns_db_detach(&cbarg->db);
8410
8411         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8412
8413         *cbargp = NULL;
8414 }
8415
8416 static isc_result_t
8417 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8418                        dns_rdatatype_t qtype, dns_acache_t *acache,
8419                        dns_zone_t *zone, dns_db_t *db,
8420                        dns_dbversion_t *version, dns_dbnode_t *node,
8421                        dns_name_t *fname)
8422 {
8423         dns_rbtdb_t *rbtdb = rdataset->private1;
8424         dns_rbtnode_t *rbtnode = rdataset->private2;
8425         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8426         unsigned int current_count = rdataset->privateuint4;
8427         rdatasetheader_t *header;
8428         unsigned int total_count, count;
8429         nodelock_t *nodelock;
8430         isc_result_t result;
8431         acachectl_t *acarray;
8432         dns_acacheentry_t *newentry, *oldentry = NULL;
8433         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8434
8435         UNUSED(qtype);
8436
8437         if (type == dns_rdatasetadditional_fromcache)
8438                 return (ISC_R_SUCCESS);
8439
8440         header = (struct rdatasetheader *)(raw - sizeof(*header));
8441
8442         total_count = raw[0] * 256 + raw[1];
8443         INSIST(total_count > current_count);
8444         count = total_count - current_count - 1; /* should be private data */
8445
8446         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8447         if (newcbarg == NULL)
8448                 return (ISC_R_NOMEMORY);
8449         newcbarg->type = type;
8450         newcbarg->count = count;
8451         newcbarg->header = header;
8452         newcbarg->db = NULL;
8453         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8454         newcbarg->node = NULL;
8455         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8456                           &newcbarg->node);
8457         newentry = NULL;
8458         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8459                                         acache_callback, newcbarg, &newentry);
8460         if (result != ISC_R_SUCCESS)
8461                 goto fail;
8462         /* Set cache data in the new entry. */
8463         result = dns_acache_setentry(acache, newentry, zone, db,
8464                                      version, node, fname);
8465         if (result != ISC_R_SUCCESS)
8466                 goto fail;
8467
8468         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8469         NODE_LOCK(nodelock, isc_rwlocktype_write);
8470
8471         acarray = NULL;
8472         switch (type) {
8473         case dns_rdatasetadditional_fromauth:
8474                 acarray = header->additional_auth;
8475                 break;
8476         case dns_rdatasetadditional_fromglue:
8477                 acarray = header->additional_glue;
8478                 break;
8479         default:
8480                 INSIST(0);
8481         }
8482
8483         if (acarray == NULL) {
8484                 unsigned int i;
8485
8486                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8487                                       sizeof(acachectl_t));
8488
8489                 if (acarray == NULL) {
8490                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8491                         goto fail;
8492                 }
8493
8494                 for (i = 0; i < total_count; i++) {
8495                         acarray[i].entry = NULL;
8496                         acarray[i].cbarg = NULL;
8497                 }
8498         }
8499         switch (type) {
8500         case dns_rdatasetadditional_fromauth:
8501                 header->additional_auth = acarray;
8502                 break;
8503         case dns_rdatasetadditional_fromglue:
8504                 header->additional_glue = acarray;
8505                 break;
8506         default:
8507                 INSIST(0);
8508         }
8509
8510         if (acarray[count].entry != NULL) {
8511                 /*
8512                  * Swap the entry.  Delay cleaning-up the old entry since
8513                  * it would require a node lock.
8514                  */
8515                 oldentry = acarray[count].entry;
8516                 INSIST(acarray[count].cbarg != NULL);
8517                 oldcbarg = acarray[count].cbarg;
8518         }
8519         acarray[count].entry = newentry;
8520         acarray[count].cbarg = newcbarg;
8521
8522         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8523
8524         if (oldentry != NULL) {
8525                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8526                 dns_acache_detachentry(&oldentry);
8527         }
8528
8529         return (ISC_R_SUCCESS);
8530
8531  fail:
8532         if (newcbarg != NULL) {
8533                 if (newentry != NULL) {
8534                         acache_cancelentry(rbtdb->common.mctx, newentry,
8535                                            &newcbarg);
8536                         dns_acache_detachentry(&newentry);
8537                 } else {
8538                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8539                         dns_db_detach(&newcbarg->db);
8540                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8541                             sizeof(*newcbarg));
8542                 }
8543         }
8544
8545         return (result);
8546 }
8547
8548 static isc_result_t
8549 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8550                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8551 {
8552         dns_rbtdb_t *rbtdb = rdataset->private1;
8553         dns_rbtnode_t *rbtnode = rdataset->private2;
8554         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8555         unsigned int current_count = rdataset->privateuint4;
8556         rdatasetheader_t *header;
8557         nodelock_t *nodelock;
8558         unsigned int total_count, count;
8559         acachectl_t *acarray;
8560         dns_acacheentry_t *entry;
8561         acache_cbarg_t *cbarg;
8562
8563         UNUSED(qtype);          /* we do not use this value at least for now */
8564         UNUSED(acache);
8565
8566         if (type == dns_rdatasetadditional_fromcache)
8567                 return (ISC_R_SUCCESS);
8568
8569         header = (struct rdatasetheader *)(raw - sizeof(*header));
8570
8571         total_count = raw[0] * 256 + raw[1];
8572         INSIST(total_count > current_count);
8573         count = total_count - current_count - 1;
8574
8575         acarray = NULL;
8576         entry = NULL;
8577
8578         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8579         NODE_LOCK(nodelock, isc_rwlocktype_write);
8580
8581         switch (type) {
8582         case dns_rdatasetadditional_fromauth:
8583                 acarray = header->additional_auth;
8584                 break;
8585         case dns_rdatasetadditional_fromglue:
8586                 acarray = header->additional_glue;
8587                 break;
8588         default:
8589                 INSIST(0);
8590         }
8591
8592         if (acarray == NULL) {
8593                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8594                 return (ISC_R_NOTFOUND);
8595         }
8596
8597         entry = acarray[count].entry;
8598         if (entry == NULL) {
8599                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8600                 return (ISC_R_NOTFOUND);
8601         }
8602
8603         acarray[count].entry = NULL;
8604         cbarg = acarray[count].cbarg;
8605         acarray[count].cbarg = NULL;
8606
8607         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8608
8609         if (entry != NULL) {
8610                 if (cbarg != NULL)
8611                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8612                 dns_acache_detachentry(&entry);
8613         }
8614
8615         return (ISC_R_SUCCESS);
8616 }
8617
8618 /*%
8619  * Routines for LRU-based cache management.
8620  */
8621
8622 /*%
8623  * See if a given cache entry that is being reused needs to be updated
8624  * in the LRU-list.  From the LRU management point of view, this function is
8625  * expected to return true for almost all cases.  When used with threads,
8626  * however, this may cause a non-negligible performance penalty because a
8627  * writer lock will have to be acquired before updating the list.
8628  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8629  * function returns true if the entry has not been updated for some period of
8630  * time.  We differentiate the NS or glue address case and the others since
8631  * experiments have shown that the former tends to be accessed relatively
8632  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8633  * may cause external queries at a higher level zone, involving more
8634  * transactions).
8635  *
8636  * Caller must hold the node (read or write) lock.
8637  */
8638 static inline isc_boolean_t
8639 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8640         if ((header->attributes &
8641              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8642                 return (ISC_FALSE);
8643
8644 #if DNS_RBTDB_LIMITLRUUPDATE
8645         if (header->type == dns_rdatatype_ns ||
8646             (header->trust == dns_trust_glue &&
8647              (header->type == dns_rdatatype_a ||
8648               header->type == dns_rdatatype_aaaa))) {
8649                 /*
8650                  * Glue records are updated if at least 60 seconds have passed
8651                  * since the previous update time.
8652                  */
8653                 return (header->last_used + 60 <= now);
8654         }
8655
8656         /* Other records are updated if 5 minutes have passed. */
8657         return (header->last_used + 300 <= now);
8658 #else
8659         UNUSED(now);
8660
8661         return (ISC_TRUE);
8662 #endif
8663 }
8664
8665 /*%
8666  * Update the timestamp of a given cache entry and move it to the head
8667  * of the corresponding LRU list.
8668  *
8669  * Caller must hold the node (write) lock.
8670  *
8671  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8672  */
8673 static void
8674 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8675               isc_stdtime_t now)
8676 {
8677         INSIST(IS_CACHE(rbtdb));
8678
8679         /* To be checked: can we really assume this? XXXMLG */
8680         INSIST(ISC_LINK_LINKED(header, link));
8681
8682         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8683         header->last_used = now;
8684         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8685 }
8686
8687 /*%
8688  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8689  * under an overmem condition.  To recover from this condition quickly, up to
8690  * 2 entries will be purged.  This process is triggered while adding a new
8691  * entry, and we specifically avoid purging entries in the same LRU bucket as
8692  * the one to which the new entry will belong.  Otherwise, we might purge
8693  * entries of the same name of different RR types while adding RRsets from a
8694  * single response (consider the case where we're adding A and AAAA glue records
8695  * of the same NS name).
8696  */
8697 static void
8698 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8699               isc_stdtime_t now, isc_boolean_t tree_locked)
8700 {
8701         rdatasetheader_t *header, *header_prev;
8702         unsigned int locknum;
8703         int purgecount = 2;
8704
8705         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8706              locknum != locknum_start && purgecount > 0;
8707              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8708                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8709                           isc_rwlocktype_write);
8710
8711                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8712                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8713                         expire_header(rbtdb, header, tree_locked);
8714                         purgecount--;
8715                 }
8716
8717                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8718                      header != NULL && purgecount > 0;
8719                      header = header_prev) {
8720                         header_prev = ISC_LIST_PREV(header, link);
8721                         /*
8722                          * Unlink the entry at this point to avoid checking it
8723                          * again even if it's currently used someone else and
8724                          * cannot be purged at this moment.  This entry won't be
8725                          * referenced any more (so unlinking is safe) since the
8726                          * TTL was reset to 0.
8727                          */
8728                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8729                                         link);
8730                         expire_header(rbtdb, header, tree_locked);
8731                         purgecount--;
8732                 }
8733
8734                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8735                                     isc_rwlocktype_write);
8736         }
8737 }
8738
8739 static void
8740 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8741               isc_boolean_t tree_locked)
8742 {
8743         set_ttl(rbtdb, header, 0);
8744         header->attributes |= RDATASET_ATTR_STALE;
8745         header->node->dirty = 1;
8746
8747         /*
8748          * Caller must hold the node (write) lock.
8749          */
8750
8751         if (dns_rbtnode_refcurrent(header->node) == 0) {
8752                 /*
8753                  * If no one else is using the node, we can clean it up now.
8754                  * We first need to gain a new reference to the node to meet a
8755                  * requirement of decrement_reference().
8756                  */
8757                 new_reference(rbtdb, header->node);
8758                 decrement_reference(rbtdb, header->node, 0,
8759                                     isc_rwlocktype_write,
8760                                     tree_locked ? isc_rwlocktype_write :
8761                                     isc_rwlocktype_none, ISC_FALSE);
8762         }
8763 }