]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - contrib/bind9/lib/dns/rbtdb.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2012  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id$ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rpz.h>
57 #include <dns/rdata.h>
58 #include <dns/rdataset.h>
59 #include <dns/rdatasetiter.h>
60 #include <dns/rdataslab.h>
61 #include <dns/rdatastruct.h>
62 #include <dns/result.h>
63 #include <dns/stats.h>
64 #include <dns/view.h>
65 #include <dns/zone.h>
66 #include <dns/zonekey.h>
67
68 #ifdef DNS_RBTDB_VERSION64
69 #include "rbtdb64.h"
70 #else
71 #include "rbtdb.h"
72 #endif
73
74 #ifdef DNS_RBTDB_VERSION64
75 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
76 #else
77 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
78 #endif
79
80 /*%
81  * Note that "impmagic" is not the first four bytes of the struct, so
82  * ISC_MAGIC_VALID cannot be used.
83  */
84 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
85                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
86
87 #ifdef DNS_RBTDB_VERSION64
88 typedef isc_uint64_t                    rbtdb_serial_t;
89 /*%
90  * Make casting easier in symbolic debuggers by using different names
91  * for the 64 bit version.
92  */
93 #define dns_rbtdb_t dns_rbtdb64_t
94 #define rdatasetheader_t rdatasetheader64_t
95 #define rbtdb_version_t rbtdb_version64_t
96 #else
97 typedef isc_uint32_t                    rbtdb_serial_t;
98 #endif
99
100 typedef isc_uint32_t                    rbtdb_rdatatype_t;
101
102 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
103 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
104 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
105
106 #define RBTDB_RDATATYPE_SIGNSEC \
107                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
108 #define RBTDB_RDATATYPE_SIGNSEC3 \
109                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
110 #define RBTDB_RDATATYPE_SIGNS \
111                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
112 #define RBTDB_RDATATYPE_SIGCNAME \
113                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
114 #define RBTDB_RDATATYPE_SIGDNAME \
115                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
116 #define RBTDB_RDATATYPE_SIGDDS \
117                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
118 #define RBTDB_RDATATYPE_NCACHEANY \
119                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
120
121 /*
122  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
123  * Using rwlock is effective with regard to lookup performance only when
124  * it is implemented in an efficient way.
125  * Otherwise, it is generally wise to stick to the simple locking since rwlock
126  * would require more memory or can even make lookups slower due to its own
127  * overhead (when it internally calls mutex locks).
128  */
129 #ifdef ISC_RWLOCK_USEATOMIC
130 #define DNS_RBTDB_USERWLOCK 1
131 #else
132 #define DNS_RBTDB_USERWLOCK 0
133 #endif
134
135 #if DNS_RBTDB_USERWLOCK
136 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
137 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
138 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
139 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
140 #else
141 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
142 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
143 #define RBTDB_LOCK(l, t)        LOCK(l)
144 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
145 #endif
146
147 /*
148  * Since node locking is sensitive to both performance and memory footprint,
149  * we need some trick here.  If we have both high-performance rwlock and
150  * high performance and small-memory reference counters, we use rwlock for
151  * node lock and isc_refcount for node references.  In this case, we don't have
152  * to protect the access to the counters by locks.
153  * Otherwise, we simply use ordinary mutex lock for node locking, and use
154  * simple integers as reference counters which is protected by the lock.
155  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
156  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
157  * counters first and then protect other parts of a node as read-only data.
158  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
159  * provided for these special cases.  When we can use the efficient backend
160  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
161  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
162  * section including the access to the reference counter.
163  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
164  * section is also protected by NODE_STRONGLOCK().
165  */
166 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
167 typedef isc_rwlock_t nodelock_t;
168
169 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
170 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
171 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
172 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
173 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
174
175 #define NODE_STRONGLOCK(l)      ((void)0)
176 #define NODE_STRONGUNLOCK(l)    ((void)0)
177 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
178 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
179 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
180 #else
181 typedef isc_mutex_t nodelock_t;
182
183 #define NODE_INITLOCK(l)        isc_mutex_init(l)
184 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
185 #define NODE_LOCK(l, t)         LOCK(l)
186 #define NODE_UNLOCK(l, t)       UNLOCK(l)
187 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
188
189 #define NODE_STRONGLOCK(l)      LOCK(l)
190 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
191 #define NODE_WEAKLOCK(l, t)     ((void)0)
192 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
193 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
194 #endif
195
196 /*%
197  * Whether to rate-limit updating the LRU to avoid possible thread contention.
198  * Our performance measurement has shown the cost is marginal, so it's defined
199  * to be 0 by default either with or without threads.
200  */
201 #ifndef DNS_RBTDB_LIMITLRUUPDATE
202 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 #endif
204
205 /*
206  * Allow clients with a virtual time of up to 5 minutes in the past to see
207  * records that would have otherwise have expired.
208  */
209 #define RBTDB_VIRTUAL 300
210
211 struct noqname {
212         dns_name_t      name;
213         void *          neg;
214         void *          negsig;
215         dns_rdatatype_t type;
216 };
217
218 typedef struct acachectl acachectl_t;
219
220 typedef struct rdatasetheader {
221         /*%
222          * Locked by the owning node's lock.
223          */
224         rbtdb_serial_t                  serial;
225         dns_ttl_t                       rdh_ttl;
226         rbtdb_rdatatype_t               type;
227         isc_uint16_t                    attributes;
228         dns_trust_t                     trust;
229         struct noqname                  *noqname;
230         struct noqname                  *closest;
231         /*%<
232          * We don't use the LIST macros, because the LIST structure has
233          * both head and tail pointers, and is doubly linked.
234          */
235
236         struct rdatasetheader           *next;
237         /*%<
238          * If this is the top header for an rdataset, 'next' points
239          * to the top header for the next rdataset (i.e., the next type).
240          * Otherwise, it points up to the header whose down pointer points
241          * at this header.
242          */
243
244         struct rdatasetheader           *down;
245         /*%<
246          * Points to the header for the next older version of
247          * this rdataset.
248          */
249
250         isc_uint32_t                    count;
251         /*%<
252          * Monotonously increased every time this rdataset is bound so that
253          * it is used as the base of the starting point in DNS responses
254          * when the "cyclic" rrset-order is required.  Since the ordering
255          * should not be so crucial, no lock is set for the counter for
256          * performance reasons.
257          */
258
259         acachectl_t                     *additional_auth;
260         acachectl_t                     *additional_glue;
261
262         dns_rbtnode_t                   *node;
263         isc_stdtime_t                   last_used;
264         ISC_LINK(struct rdatasetheader) link;
265
266         unsigned int                    heap_index;
267         /*%<
268          * Used for TTL-based cache cleaning.
269          */
270         isc_stdtime_t                   resign;
271 } rdatasetheader_t;
272
273 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
274 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
275
276 #define RDATASET_ATTR_NONEXISTENT       0x0001
277 #define RDATASET_ATTR_STALE             0x0002
278 #define RDATASET_ATTR_IGNORE            0x0004
279 #define RDATASET_ATTR_RETAIN            0x0008
280 #define RDATASET_ATTR_NXDOMAIN          0x0010
281 #define RDATASET_ATTR_RESIGN            0x0020
282 #define RDATASET_ATTR_STATCOUNT         0x0040
283 #define RDATASET_ATTR_OPTOUT            0x0080
284 #define RDATASET_ATTR_NEGATIVE          0x0100
285
286 typedef struct acache_cbarg {
287         dns_rdatasetadditional_t        type;
288         unsigned int                    count;
289         dns_db_t                        *db;
290         dns_dbnode_t                    *node;
291         rdatasetheader_t                *header;
292 } acache_cbarg_t;
293
294 struct acachectl {
295         dns_acacheentry_t               *entry;
296         acache_cbarg_t                  *cbarg;
297 };
298
299 /*
300  * XXX
301  * When the cache will pre-expire data (due to memory low or other
302  * situations) before the rdataset's TTL has expired, it MUST
303  * respect the RETAIN bit and not expire the data until its TTL is
304  * expired.
305  */
306
307 #undef IGNORE                   /* WIN32 winbase.h defines this. */
308
309 #define EXISTS(header) \
310         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
311 #define NONEXISTENT(header) \
312         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
313 #define IGNORE(header) \
314         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
315 #define RETAIN(header) \
316         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
317 #define NXDOMAIN(header) \
318         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
319 #define RESIGN(header) \
320         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
321 #define OPTOUT(header) \
322         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
323 #define NEGATIVE(header) \
324         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
325
326 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
327
328 /*%
329  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
330  * There is a tradeoff issue about configuring this value: if this is too
331  * small, it may cause heavier contention between threads; if this is too large,
332  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
333  * The default value should work well for most environments, but this can
334  * also be configurable at compilation time via the
335  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
336  * 1 due to the assumption of overmem_purge().
337  */
338 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
339 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
340 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
341 #else
342 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
343 #endif
344 #else
345 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
346 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
347
348 typedef struct {
349         nodelock_t                      lock;
350         /* Protected in the refcount routines. */
351         isc_refcount_t                  references;
352         /* Locked by lock. */
353         isc_boolean_t                   exiting;
354 } rbtdb_nodelock_t;
355
356 typedef struct rbtdb_changed {
357         dns_rbtnode_t *                 node;
358         isc_boolean_t                   dirty;
359         ISC_LINK(struct rbtdb_changed)  link;
360 } rbtdb_changed_t;
361
362 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
363
364 typedef enum {
365         dns_db_insecure,
366         dns_db_partial,
367         dns_db_secure
368 } dns_db_secure_t;
369
370 typedef struct dns_rbtdb dns_rbtdb_t;
371
372 typedef struct rbtdb_version {
373         /* Not locked */
374         rbtdb_serial_t                  serial;
375         dns_rbtdb_t *                   rbtdb;
376         /*
377          * Protected in the refcount routines.
378          * XXXJT: should we change the lock policy based on the refcount
379          * performance?
380          */
381         isc_refcount_t                  references;
382         /* Locked by database lock. */
383         isc_boolean_t                   writer;
384         isc_boolean_t                   commit_ok;
385         rbtdb_changedlist_t             changed_list;
386         rdatasetheaderlist_t            resigned_list;
387         ISC_LINK(struct rbtdb_version)  link;
388         dns_db_secure_t                 secure;
389         isc_boolean_t                   havensec3;
390         /* NSEC3 parameters */
391         dns_hash_t                      hash;
392         isc_uint8_t                     flags;
393         isc_uint16_t                    iterations;
394         isc_uint8_t                     salt_length;
395         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
396 } rbtdb_version_t;
397
398 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
399
400 struct dns_rbtdb {
401         /* Unlocked. */
402         dns_db_t                        common;
403         /* Locks the data in this struct */
404 #if DNS_RBTDB_USERWLOCK
405         isc_rwlock_t                    lock;
406 #else
407         isc_mutex_t                     lock;
408 #endif
409         /* Locks the tree structure (prevents nodes appearing/disappearing) */
410         isc_rwlock_t                    tree_lock;
411         /* Locks for individual tree nodes */
412         unsigned int                    node_lock_count;
413         rbtdb_nodelock_t *              node_locks;
414         dns_rbtnode_t *                 origin_node;
415         dns_stats_t *                   rrsetstats; /* cache DB only */
416         /* Locked by lock. */
417         unsigned int                    active;
418         isc_refcount_t                  references;
419         unsigned int                    attributes;
420         rbtdb_serial_t                  current_serial;
421         rbtdb_serial_t                  least_serial;
422         rbtdb_serial_t                  next_serial;
423         rbtdb_version_t *               current_version;
424         rbtdb_version_t *               future_version;
425         rbtdb_versionlist_t             open_versions;
426         isc_task_t *                    task;
427         dns_dbnode_t                    *soanode;
428         dns_dbnode_t                    *nsnode;
429
430         /*
431          * This is a linked list used to implement the LRU cache.  There will
432          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
433          * placed on the linked list rdatasets[1].
434          */
435         rdatasetheaderlist_t            *rdatasets;
436
437         /*%
438          * Temporary storage for stale cache nodes and dynamically deleted
439          * nodes that await being cleaned up.
440          */
441         rbtnodelist_t                   *deadnodes;
442
443         /*
444          * Heaps.  These are used for TTL based expiry in a cache,
445          * or for zone resigning in a zone DB.  hmctx is the memory
446          * context to use for the heap (which differs from the main
447          * database memory context in the case of a cache).
448          */
449         isc_mem_t *                     hmctx;
450         isc_heap_t                      **heaps;
451
452         /* Locked by tree_lock. */
453         dns_rbt_t *                     tree;
454         dns_rbt_t *                     nsec;
455         dns_rbt_t *                     nsec3;
456         dns_rpz_cidr_t *                rpz_cidr;
457
458         /* Unlocked */
459         unsigned int                    quantum;
460 };
461
462 #define RBTDB_ATTR_LOADED               0x01
463 #define RBTDB_ATTR_LOADING              0x02
464
465 /*%
466  * Search Context
467  */
468 typedef struct {
469         dns_rbtdb_t *           rbtdb;
470         rbtdb_version_t *       rbtversion;
471         rbtdb_serial_t          serial;
472         unsigned int            options;
473         dns_rbtnodechain_t      chain;
474         isc_boolean_t           copy_name;
475         isc_boolean_t           need_cleanup;
476         isc_boolean_t           wild;
477         dns_rbtnode_t *         zonecut;
478         rdatasetheader_t *      zonecut_rdataset;
479         rdatasetheader_t *      zonecut_sigrdataset;
480         dns_fixedname_t         zonecut_name;
481         isc_stdtime_t           now;
482 } rbtdb_search_t;
483
484 /*%
485  * Load Context
486  */
487 typedef struct {
488         dns_rbtdb_t *           rbtdb;
489         isc_stdtime_t           now;
490 } rbtdb_load_t;
491
492 static void rdataset_disassociate(dns_rdataset_t *rdataset);
493 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
494 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
495 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
496 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
497 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
498 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
499                                         dns_name_t *name,
500                                         dns_rdataset_t *neg,
501                                         dns_rdataset_t *negsig);
502 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
503                                         dns_name_t *name,
504                                         dns_rdataset_t *neg,
505                                         dns_rdataset_t *negsig);
506 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
507                                            dns_rdatasetadditional_t type,
508                                            dns_rdatatype_t qtype,
509                                            dns_acache_t *acache,
510                                            dns_zone_t **zonep,
511                                            dns_db_t **dbp,
512                                            dns_dbversion_t **versionp,
513                                            dns_dbnode_t **nodep,
514                                            dns_name_t *fname,
515                                            dns_message_t *msg,
516                                            isc_stdtime_t now);
517 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
518                                            dns_rdatasetadditional_t type,
519                                            dns_rdatatype_t qtype,
520                                            dns_acache_t *acache,
521                                            dns_zone_t *zone,
522                                            dns_db_t *db,
523                                            dns_dbversion_t *version,
524                                            dns_dbnode_t *node,
525                                            dns_name_t *fname);
526 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
527                                            dns_rdataset_t *rdataset,
528                                            dns_rdatasetadditional_t type,
529                                            dns_rdatatype_t qtype);
530 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
531                                               isc_stdtime_t now);
532 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
533                           isc_stdtime_t now);
534 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
535                           isc_boolean_t tree_locked);
536 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
537                           isc_stdtime_t now, isc_boolean_t tree_locked);
538 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
539                                   rdatasetheader_t *newheader);
540 static void prune_tree(isc_task_t *task, isc_event_t *event);
541 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
542 static void rdataset_expire(dns_rdataset_t *rdataset);
543
544 static dns_rdatasetmethods_t rdataset_methods = {
545         rdataset_disassociate,
546         rdataset_first,
547         rdataset_next,
548         rdataset_current,
549         rdataset_clone,
550         rdataset_count,
551         NULL,
552         rdataset_getnoqname,
553         NULL,
554         rdataset_getclosest,
555         rdataset_getadditional,
556         rdataset_setadditional,
557         rdataset_putadditional,
558         rdataset_settrust,
559         rdataset_expire
560 };
561
562 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
563 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
564 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
565 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
566                                  dns_rdataset_t *rdataset);
567
568 static dns_rdatasetitermethods_t rdatasetiter_methods = {
569         rdatasetiter_destroy,
570         rdatasetiter_first,
571         rdatasetiter_next,
572         rdatasetiter_current
573 };
574
575 typedef struct rbtdb_rdatasetiter {
576         dns_rdatasetiter_t              common;
577         rdatasetheader_t *              current;
578 } rbtdb_rdatasetiter_t;
579
580 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
581 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
582 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
583 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
584                                         dns_name_t *name);
585 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
586 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
587 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
588                                            dns_dbnode_t **nodep,
589                                            dns_name_t *name);
590 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
591 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
592                                           dns_name_t *name);
593
594 static dns_dbiteratormethods_t dbiterator_methods = {
595         dbiterator_destroy,
596         dbiterator_first,
597         dbiterator_last,
598         dbiterator_seek,
599         dbiterator_prev,
600         dbiterator_next,
601         dbiterator_current,
602         dbiterator_pause,
603         dbiterator_origin
604 };
605
606 #define DELETION_BATCH_MAX 64
607
608 /*
609  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
610  */
611 typedef struct rbtdb_dbiterator {
612         dns_dbiterator_t                common;
613         isc_boolean_t                   paused;
614         isc_boolean_t                   new_origin;
615         isc_rwlocktype_t                tree_locked;
616         isc_result_t                    result;
617         dns_fixedname_t                 name;
618         dns_fixedname_t                 origin;
619         dns_rbtnodechain_t              chain;
620         dns_rbtnodechain_t              nsec3chain;
621         dns_rbtnodechain_t              *current;
622         dns_rbtnode_t                   *node;
623         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
624         int                             delete;
625         isc_boolean_t                   nsec3only;
626         isc_boolean_t                   nonsec3;
627 } rbtdb_dbiterator_t;
628
629
630 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
631 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
632
633 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
634                        isc_event_t *event);
635 static void overmem(dns_db_t *db, isc_boolean_t overmem);
636 #ifdef BIND9
637 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
638 #endif
639
640 /*%
641  * 'init_count' is used to initialize 'newheader->count' which inturn
642  * is used to determine where in the cycle rrset-order cyclic starts.
643  * We don't lock this as we don't care about simultaneous updates.
644  *
645  * Note:
646  *      Both init_count and header->count can be ISC_UINT32_MAX.
647  *      The count on the returned rdataset however can't be as
648  *      that indicates that the database does not implement cyclic
649  *      processing.
650  */
651 static unsigned int init_count;
652
653 /*
654  * Locking
655  *
656  * If a routine is going to lock more than one lock in this module, then
657  * the locking must be done in the following order:
658  *
659  *      Tree Lock
660  *
661  *      Node Lock       (Only one from the set may be locked at one time by
662  *                       any caller)
663  *
664  *      Database Lock
665  *
666  * Failure to follow this hierarchy can result in deadlock.
667  */
668
669 /*
670  * Deleting Nodes
671  *
672  * For zone databases the node for the origin of the zone MUST NOT be deleted.
673  */
674
675
676 /*
677  * DB Routines
678  */
679
680 static void
681 attach(dns_db_t *source, dns_db_t **targetp) {
682         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
683
684         REQUIRE(VALID_RBTDB(rbtdb));
685
686         isc_refcount_increment(&rbtdb->references, NULL);
687
688         *targetp = source;
689 }
690
691 static void
692 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
693         dns_rbtdb_t *rbtdb = event->ev_arg;
694
695         UNUSED(task);
696
697         free_rbtdb(rbtdb, ISC_TRUE, event);
698 }
699
700 static void
701 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
702                   isc_boolean_t increment)
703 {
704         dns_rdatastatstype_t statattributes = 0;
705         dns_rdatastatstype_t base = 0;
706         dns_rdatastatstype_t type;
707
708         /* At the moment we count statistics only for cache DB */
709         INSIST(IS_CACHE(rbtdb));
710
711         if (NEGATIVE(header)) {
712                 if (NXDOMAIN(header))
713                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
714                 else {
715                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
716                         base = RBTDB_RDATATYPE_EXT(header->type);
717                 }
718         } else
719                 base = RBTDB_RDATATYPE_BASE(header->type);
720
721         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
722         if (increment)
723                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
724         else
725                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
726 }
727
728 static void
729 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
730         int idx;
731         isc_heap_t *heap;
732         dns_ttl_t oldttl;
733
734         oldttl = header->rdh_ttl;
735         header->rdh_ttl = newttl;
736
737         if (!IS_CACHE(rbtdb))
738                 return;
739
740         /*
741          * It's possible the rbtdb is not a cache.  If this is the case,
742          * we will not have a heap, and we move on.  If we do, though,
743          * we might need to adjust things.
744          */
745         if (header->heap_index == 0 || newttl == oldttl)
746                 return;
747         idx = header->node->locknum;
748         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
749             return;
750         heap = rbtdb->heaps[idx];
751
752         if (newttl < oldttl)
753                 isc_heap_increased(heap, header->heap_index);
754         else
755                 isc_heap_decreased(heap, header->heap_index);
756 }
757
758 /*%
759  * These functions allow the heap code to rank the priority of each
760  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
761  */
762 static isc_boolean_t
763 ttl_sooner(void *v1, void *v2) {
764         rdatasetheader_t *h1 = v1;
765         rdatasetheader_t *h2 = v2;
766
767         if (h1->rdh_ttl < h2->rdh_ttl)
768                 return (ISC_TRUE);
769         return (ISC_FALSE);
770 }
771
772 static isc_boolean_t
773 resign_sooner(void *v1, void *v2) {
774         rdatasetheader_t *h1 = v1;
775         rdatasetheader_t *h2 = v2;
776
777         if (h1->resign < h2->resign)
778                 return (ISC_TRUE);
779         return (ISC_FALSE);
780 }
781
782 /*%
783  * This function sets the heap index into the header.
784  */
785 static void
786 set_index(void *what, unsigned int index) {
787         rdatasetheader_t *h = what;
788
789         h->heap_index = index;
790 }
791
792 /*%
793  * Work out how many nodes can be deleted in the time between two
794  * requests to the nameserver.  Smooth the resulting number and use it
795  * as a estimate for the number of nodes to be deleted in the next
796  * iteration.
797  */
798 static unsigned int
799 adjust_quantum(unsigned int old, isc_time_t *start) {
800         unsigned int pps = dns_pps;     /* packets per second */
801         unsigned int interval;
802         isc_uint64_t usecs;
803         isc_time_t end;
804         unsigned int new;
805
806         if (pps < 100)
807                 pps = 100;
808         isc_time_now(&end);
809
810         interval = 1000000 / pps;       /* interval in usec */
811         if (interval == 0)
812                 interval = 1;
813         usecs = isc_time_microdiff(&end, start);
814         if (usecs == 0) {
815                 /*
816                  * We were unable to measure the amount of time taken.
817                  * Double the nodes deleted next time.
818                  */
819                 old *= 2;
820                 if (old > 1000)
821                         old = 1000;
822                 return (old);
823         }
824         new = old * interval;
825         new /= (unsigned int)usecs;
826         if (new == 0)
827                 new = 1;
828         else if (new > 1000)
829                 new = 1000;
830
831         /* Smooth */
832         new = (new + old * 3) / 4;
833
834         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
835                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
836
837         return (new);
838 }
839
840 static void
841 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
842         unsigned int i;
843         isc_ondestroy_t ondest;
844         isc_result_t result;
845         char buf[DNS_NAME_FORMATSIZE];
846         dns_rbt_t **treep;
847         isc_time_t start;
848
849         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
850                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
851
852         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
853         REQUIRE(rbtdb->future_version == NULL);
854
855         if (rbtdb->current_version != NULL) {
856                 unsigned int refs;
857
858                 isc_refcount_decrement(&rbtdb->current_version->references,
859                                        &refs);
860                 INSIST(refs == 0);
861                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
862                 isc_refcount_destroy(&rbtdb->current_version->references);
863                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
864                             sizeof(rbtdb_version_t));
865         }
866
867         /*
868          * We assume the number of remaining dead nodes is reasonably small;
869          * the overhead of unlinking all nodes here should be negligible.
870          */
871         for (i = 0; i < rbtdb->node_lock_count; i++) {
872                 dns_rbtnode_t *node;
873
874                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
875                 while (node != NULL) {
876                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
877                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
878                 }
879         }
880
881         if (event == NULL)
882                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
883
884         for (;;) {
885                 /*
886                  * pick the next tree to (start to) destroy
887                  */
888                 treep = &rbtdb->tree;
889                 if (*treep == NULL) {
890                         treep = &rbtdb->nsec;
891                         if (*treep == NULL) {
892                                 treep = &rbtdb->nsec3;
893                                 /*
894                                  * we're finished after clear cutting
895                                  */
896                                 if (*treep == NULL)
897                                         break;
898                         }
899                 }
900
901                 isc_time_now(&start);
902                 result = dns_rbt_destroy2(treep, rbtdb->quantum);
903                 if (result == ISC_R_QUOTA) {
904                         INSIST(rbtdb->task != NULL);
905                         if (rbtdb->quantum != 0)
906                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
907                                                                 &start);
908                         if (event == NULL)
909                                 event = isc_event_allocate(rbtdb->common.mctx,
910                                                            NULL,
911                                                          DNS_EVENT_FREESTORAGE,
912                                                            free_rbtdb_callback,
913                                                            rbtdb,
914                                                            sizeof(isc_event_t));
915                         if (event == NULL)
916                                 continue;
917                         isc_task_send(rbtdb->task, &event);
918                         return;
919                 }
920                 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
921         }
922
923         if (event != NULL)
924                 isc_event_free(&event);
925         if (log) {
926                 if (dns_name_dynamic(&rbtdb->common.origin))
927                         dns_name_format(&rbtdb->common.origin, buf,
928                                         sizeof(buf));
929                 else
930                         strcpy(buf, "<UNKNOWN>");
931                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
932                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
933                               "done free_rbtdb(%s)", buf);
934         }
935         if (dns_name_dynamic(&rbtdb->common.origin))
936                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
937         for (i = 0; i < rbtdb->node_lock_count; i++) {
938                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
939                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
940         }
941
942         /*
943          * Clean up LRU / re-signing order lists.
944          */
945         if (rbtdb->rdatasets != NULL) {
946                 for (i = 0; i < rbtdb->node_lock_count; i++)
947                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
948                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
949                             rbtdb->node_lock_count *
950                             sizeof(rdatasetheaderlist_t));
951         }
952         /*
953          * Clean up dead node buckets.
954          */
955         if (rbtdb->deadnodes != NULL) {
956                 for (i = 0; i < rbtdb->node_lock_count; i++)
957                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
958                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
959                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
960         }
961         /*
962          * Clean up heap objects.
963          */
964         if (rbtdb->heaps != NULL) {
965                 for (i = 0; i < rbtdb->node_lock_count; i++)
966                         isc_heap_destroy(&rbtdb->heaps[i]);
967                 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
968                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
969         }
970
971         if (rbtdb->rrsetstats != NULL)
972                 dns_stats_detach(&rbtdb->rrsetstats);
973
974 #ifdef BIND9
975         if (rbtdb->rpz_cidr != NULL)
976                 dns_rpz_cidr_free(&rbtdb->rpz_cidr);
977 #endif
978
979         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
980                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
981         isc_rwlock_destroy(&rbtdb->tree_lock);
982         isc_refcount_destroy(&rbtdb->references);
983         if (rbtdb->task != NULL)
984                 isc_task_detach(&rbtdb->task);
985
986         RBTDB_DESTROYLOCK(&rbtdb->lock);
987         rbtdb->common.magic = 0;
988         rbtdb->common.impmagic = 0;
989         ondest = rbtdb->common.ondest;
990         isc_mem_detach(&rbtdb->hmctx);
991         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
992         isc_ondestroy_notify(&ondest, rbtdb);
993 }
994
995 static inline void
996 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
997         isc_boolean_t want_free = ISC_FALSE;
998         unsigned int i;
999         unsigned int inactive = 0;
1000
1001         /* XXX check for open versions here */
1002
1003         if (rbtdb->soanode != NULL)
1004                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
1005         if (rbtdb->nsnode != NULL)
1006                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
1007
1008         /*
1009          * Even though there are no external direct references, there still
1010          * may be nodes in use.
1011          */
1012         for (i = 0; i < rbtdb->node_lock_count; i++) {
1013                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1014                 rbtdb->node_locks[i].exiting = ISC_TRUE;
1015                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1016                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1017                     == 0) {
1018                         inactive++;
1019                 }
1020         }
1021
1022         if (inactive != 0) {
1023                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1024                 rbtdb->active -= inactive;
1025                 if (rbtdb->active == 0)
1026                         want_free = ISC_TRUE;
1027                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1028                 if (want_free) {
1029                         char buf[DNS_NAME_FORMATSIZE];
1030                         if (dns_name_dynamic(&rbtdb->common.origin))
1031                                 dns_name_format(&rbtdb->common.origin, buf,
1032                                                 sizeof(buf));
1033                         else
1034                                 strcpy(buf, "<UNKNOWN>");
1035                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1036                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1037                                       "calling free_rbtdb(%s)", buf);
1038                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1039                 }
1040         }
1041 }
1042
1043 static void
1044 detach(dns_db_t **dbp) {
1045         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1046         unsigned int refs;
1047
1048         REQUIRE(VALID_RBTDB(rbtdb));
1049
1050         isc_refcount_decrement(&rbtdb->references, &refs);
1051
1052         if (refs == 0)
1053                 maybe_free_rbtdb(rbtdb);
1054
1055         *dbp = NULL;
1056 }
1057
1058 static void
1059 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1060         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1061         rbtdb_version_t *version;
1062         unsigned int refs;
1063
1064         REQUIRE(VALID_RBTDB(rbtdb));
1065
1066         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1067         version = rbtdb->current_version;
1068         isc_refcount_increment(&version->references, &refs);
1069         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1070
1071         *versionp = (dns_dbversion_t *)version;
1072 }
1073
1074 static inline rbtdb_version_t *
1075 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1076                  unsigned int references, isc_boolean_t writer)
1077 {
1078         isc_result_t result;
1079         rbtdb_version_t *version;
1080
1081         version = isc_mem_get(mctx, sizeof(*version));
1082         if (version == NULL)
1083                 return (NULL);
1084         version->serial = serial;
1085         result = isc_refcount_init(&version->references, references);
1086         if (result != ISC_R_SUCCESS) {
1087                 isc_mem_put(mctx, version, sizeof(*version));
1088                 return (NULL);
1089         }
1090         version->writer = writer;
1091         version->commit_ok = ISC_FALSE;
1092         ISC_LIST_INIT(version->changed_list);
1093         ISC_LIST_INIT(version->resigned_list);
1094         ISC_LINK_INIT(version, link);
1095
1096         return (version);
1097 }
1098
1099 static isc_result_t
1100 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1101         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1102         rbtdb_version_t *version;
1103
1104         REQUIRE(VALID_RBTDB(rbtdb));
1105         REQUIRE(versionp != NULL && *versionp == NULL);
1106         REQUIRE(rbtdb->future_version == NULL);
1107
1108         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1109         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1110         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1111                                    ISC_TRUE);
1112         if (version != NULL) {
1113                 version->rbtdb = rbtdb;
1114                 version->commit_ok = ISC_TRUE;
1115                 version->secure = rbtdb->current_version->secure;
1116                 version->havensec3 = rbtdb->current_version->havensec3;
1117                 if (version->havensec3) {
1118                         version->flags = rbtdb->current_version->flags;
1119                         version->iterations =
1120                                 rbtdb->current_version->iterations;
1121                         version->hash = rbtdb->current_version->hash;
1122                         version->salt_length =
1123                                 rbtdb->current_version->salt_length;
1124                         memcpy(version->salt, rbtdb->current_version->salt,
1125                                version->salt_length);
1126                 } else {
1127                         version->flags = 0;
1128                         version->iterations = 0;
1129                         version->hash = 0;
1130                         version->salt_length = 0;
1131                         memset(version->salt, 0, sizeof(version->salt));
1132                 }
1133                 rbtdb->next_serial++;
1134                 rbtdb->future_version = version;
1135         }
1136         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1137
1138         if (version == NULL)
1139                 return (ISC_R_NOMEMORY);
1140
1141         *versionp = version;
1142
1143         return (ISC_R_SUCCESS);
1144 }
1145
1146 static void
1147 attachversion(dns_db_t *db, dns_dbversion_t *source,
1148               dns_dbversion_t **targetp)
1149 {
1150         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1151         rbtdb_version_t *rbtversion = source;
1152         unsigned int refs;
1153
1154         REQUIRE(VALID_RBTDB(rbtdb));
1155         INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
1156
1157         isc_refcount_increment(&rbtversion->references, &refs);
1158         INSIST(refs > 1);
1159
1160         *targetp = rbtversion;
1161 }
1162
1163 static rbtdb_changed_t *
1164 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1165             dns_rbtnode_t *node)
1166 {
1167         rbtdb_changed_t *changed;
1168         unsigned int refs;
1169
1170         /*
1171          * Caller must be holding the node lock if its reference must be
1172          * protected by the lock.
1173          */
1174
1175         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1176
1177         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1178
1179         REQUIRE(version->writer);
1180
1181         if (changed != NULL) {
1182                 dns_rbtnode_refincrement(node, &refs);
1183                 INSIST(refs != 0);
1184                 changed->node = node;
1185                 changed->dirty = ISC_FALSE;
1186                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1187         } else
1188                 version->commit_ok = ISC_FALSE;
1189
1190         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1191
1192         return (changed);
1193 }
1194
1195 static void
1196 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1197                  acachectl_t *array)
1198 {
1199         unsigned int count;
1200         unsigned int i;
1201         unsigned char *raw;     /* RDATASLAB */
1202
1203         /*
1204          * The caller must be holding the corresponding node lock.
1205          */
1206
1207         if (array == NULL)
1208                 return;
1209
1210         raw = (unsigned char *)header + sizeof(*header);
1211         count = raw[0] * 256 + raw[1];
1212
1213         /*
1214          * Sanity check: since an additional cache entry has a reference to
1215          * the original DB node (in the callback arg), there should be no
1216          * acache entries when the node can be freed.
1217          */
1218         for (i = 0; i < count; i++)
1219                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1220
1221         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1222 }
1223
1224 static inline void
1225 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1226
1227         if (dns_name_dynamic(&(*noqname)->name))
1228                 dns_name_free(&(*noqname)->name, mctx);
1229         if ((*noqname)->neg != NULL)
1230                 isc_mem_put(mctx, (*noqname)->neg,
1231                             dns_rdataslab_size((*noqname)->neg, 0));
1232         if ((*noqname)->negsig != NULL)
1233                 isc_mem_put(mctx, (*noqname)->negsig,
1234                             dns_rdataslab_size((*noqname)->negsig, 0));
1235         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1236         *noqname = NULL;
1237 }
1238
1239 static inline void
1240 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1241 {
1242         ISC_LINK_INIT(h, link);
1243         h->heap_index = 0;
1244
1245 #if TRACE_HEADER
1246         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1247                 fprintf(stderr, "initialized header: %p\n", h);
1248 #else
1249         UNUSED(rbtdb);
1250 #endif
1251 }
1252
1253 static inline rdatasetheader_t *
1254 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1255 {
1256         rdatasetheader_t *h;
1257
1258         h = isc_mem_get(mctx, sizeof(*h));
1259         if (h == NULL)
1260                 return (NULL);
1261
1262 #if TRACE_HEADER
1263         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1264                 fprintf(stderr, "allocated header: %p\n", h);
1265 #endif
1266         init_rdataset(rbtdb, h);
1267         return (h);
1268 }
1269
1270 static inline void
1271 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1272 {
1273         unsigned int size;
1274         int idx;
1275
1276         if (EXISTS(rdataset) &&
1277             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1278                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1279         }
1280
1281         idx = rdataset->node->locknum;
1282         if (ISC_LINK_LINKED(rdataset, link)) {
1283                 INSIST(IS_CACHE(rbtdb));
1284                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1285         }
1286         if (rdataset->heap_index != 0)
1287                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1288         rdataset->heap_index = 0;
1289
1290         if (rdataset->noqname != NULL)
1291                 free_noqname(mctx, &rdataset->noqname);
1292         if (rdataset->closest != NULL)
1293                 free_noqname(mctx, &rdataset->closest);
1294
1295         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1296         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1297
1298         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1299                 size = sizeof(*rdataset);
1300         else
1301                 size = dns_rdataslab_size((unsigned char *)rdataset,
1302                                           sizeof(*rdataset));
1303         isc_mem_put(mctx, rdataset, size);
1304 }
1305
1306 static inline void
1307 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1308         rdatasetheader_t *header, *dcurrent;
1309         isc_boolean_t make_dirty = ISC_FALSE;
1310
1311         /*
1312          * Caller must hold the node lock.
1313          */
1314
1315         /*
1316          * We set the IGNORE attribute on rdatasets with serial number
1317          * 'serial'.  When the reference count goes to zero, these rdatasets
1318          * will be cleaned up; until that time, they will be ignored.
1319          */
1320         for (header = node->data; header != NULL; header = header->next) {
1321                 if (header->serial == serial) {
1322                         header->attributes |= RDATASET_ATTR_IGNORE;
1323                         make_dirty = ISC_TRUE;
1324                 }
1325                 for (dcurrent = header->down;
1326                      dcurrent != NULL;
1327                      dcurrent = dcurrent->down) {
1328                         if (dcurrent->serial == serial) {
1329                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1330                                 make_dirty = ISC_TRUE;
1331                         }
1332                 }
1333         }
1334         if (make_dirty)
1335                 node->dirty = 1;
1336 }
1337
1338 static inline void
1339 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1340 {
1341         rdatasetheader_t *d, *down_next;
1342
1343         for (d = top->down; d != NULL; d = down_next) {
1344                 down_next = d->down;
1345                 free_rdataset(rbtdb, mctx, d);
1346         }
1347         top->down = NULL;
1348 }
1349
1350 static inline void
1351 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1352         rdatasetheader_t *current, *top_prev, *top_next;
1353         isc_mem_t *mctx = rbtdb->common.mctx;
1354
1355         /*
1356          * Caller must be holding the node lock.
1357          */
1358
1359         top_prev = NULL;
1360         for (current = node->data; current != NULL; current = top_next) {
1361                 top_next = current->next;
1362                 clean_stale_headers(rbtdb, mctx, current);
1363                 /*
1364                  * If current is nonexistent or stale, we can clean it up.
1365                  */
1366                 if ((current->attributes &
1367                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1368                         if (top_prev != NULL)
1369                                 top_prev->next = current->next;
1370                         else
1371                                 node->data = current->next;
1372                         free_rdataset(rbtdb, mctx, current);
1373                 } else
1374                         top_prev = current;
1375         }
1376         node->dirty = 0;
1377 }
1378
1379 static inline void
1380 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1381                 rbtdb_serial_t least_serial)
1382 {
1383         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1384         rdatasetheader_t *top_prev, *top_next;
1385         isc_mem_t *mctx = rbtdb->common.mctx;
1386         isc_boolean_t still_dirty = ISC_FALSE;
1387
1388         /*
1389          * Caller must be holding the node lock.
1390          */
1391         REQUIRE(least_serial != 0);
1392
1393         top_prev = NULL;
1394         for (current = node->data; current != NULL; current = top_next) {
1395                 top_next = current->next;
1396
1397                 /*
1398                  * First, we clean up any instances of multiple rdatasets
1399                  * with the same serial number, or that have the IGNORE
1400                  * attribute.
1401                  */
1402                 dparent = current;
1403                 for (dcurrent = current->down;
1404                      dcurrent != NULL;
1405                      dcurrent = down_next) {
1406                         down_next = dcurrent->down;
1407                         INSIST(dcurrent->serial <= dparent->serial);
1408                         if (dcurrent->serial == dparent->serial ||
1409                             IGNORE(dcurrent)) {
1410                                 if (down_next != NULL)
1411                                         down_next->next = dparent;
1412                                 dparent->down = down_next;
1413                                 free_rdataset(rbtdb, mctx, dcurrent);
1414                         } else
1415                                 dparent = dcurrent;
1416                 }
1417
1418                 /*
1419                  * We've now eliminated all IGNORE datasets with the possible
1420                  * exception of current, which we now check.
1421                  */
1422                 if (IGNORE(current)) {
1423                         down_next = current->down;
1424                         if (down_next == NULL) {
1425                                 if (top_prev != NULL)
1426                                         top_prev->next = current->next;
1427                                 else
1428                                         node->data = current->next;
1429                                 free_rdataset(rbtdb, mctx, current);
1430                                 /*
1431                                  * current no longer exists, so we can
1432                                  * just continue with the loop.
1433                                  */
1434                                 continue;
1435                         } else {
1436                                 /*
1437                                  * Pull up current->down, making it the new
1438                                  * current.
1439                                  */
1440                                 if (top_prev != NULL)
1441                                         top_prev->next = down_next;
1442                                 else
1443                                         node->data = down_next;
1444                                 down_next->next = top_next;
1445                                 free_rdataset(rbtdb, mctx, current);
1446                                 current = down_next;
1447                         }
1448                 }
1449
1450                 /*
1451                  * We now try to find the first down node less than the
1452                  * least serial.
1453                  */
1454                 dparent = current;
1455                 for (dcurrent = current->down;
1456                      dcurrent != NULL;
1457                      dcurrent = down_next) {
1458                         down_next = dcurrent->down;
1459                         if (dcurrent->serial < least_serial)
1460                                 break;
1461                         dparent = dcurrent;
1462                 }
1463
1464                 /*
1465                  * If there is a such an rdataset, delete it and any older
1466                  * versions.
1467                  */
1468                 if (dcurrent != NULL) {
1469                         do {
1470                                 down_next = dcurrent->down;
1471                                 INSIST(dcurrent->serial <= least_serial);
1472                                 free_rdataset(rbtdb, mctx, dcurrent);
1473                                 dcurrent = down_next;
1474                         } while (dcurrent != NULL);
1475                         dparent->down = NULL;
1476                 }
1477
1478                 /*
1479                  * Note.  The serial number of 'current' might be less than
1480                  * least_serial too, but we cannot delete it because it is
1481                  * the most recent version, unless it is a NONEXISTENT
1482                  * rdataset.
1483                  */
1484                 if (current->down != NULL) {
1485                         still_dirty = ISC_TRUE;
1486                         top_prev = current;
1487                 } else {
1488                         /*
1489                          * If this is a NONEXISTENT rdataset, we can delete it.
1490                          */
1491                         if (NONEXISTENT(current)) {
1492                                 if (top_prev != NULL)
1493                                         top_prev->next = current->next;
1494                                 else
1495                                         node->data = current->next;
1496                                 free_rdataset(rbtdb, mctx, current);
1497                         } else
1498                                 top_prev = current;
1499                 }
1500         }
1501         if (!still_dirty)
1502                 node->dirty = 0;
1503 }
1504
1505 static void
1506 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1507 {
1508         dns_rbtnode_t *nsecnode;
1509         dns_fixedname_t fname;
1510         dns_name_t *name;
1511         isc_result_t result = ISC_R_UNEXPECTED;
1512
1513         INSIST(!ISC_LINK_LINKED(node, deadlink));
1514
1515         switch (node->nsec) {
1516         case DNS_RBT_NSEC_NORMAL:
1517 #ifdef BIND9
1518                 if (rbtdb->rpz_cidr != NULL) {
1519                         dns_fixedname_init(&fname);
1520                         name = dns_fixedname_name(&fname);
1521                         dns_rbt_fullnamefromnode(node, name);
1522                         dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1523                 }
1524 #endif
1525                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1526                 break;
1527         case DNS_RBT_NSEC_HAS_NSEC:
1528                 dns_fixedname_init(&fname);
1529                 name = dns_fixedname_name(&fname);
1530                 dns_rbt_fullnamefromnode(node, name);
1531                 /*
1532                  * Delete the corresponding node from the auxiliary NSEC
1533                  * tree before deleting from the main tree.
1534                  */
1535                 nsecnode = NULL;
1536                 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1537                                           NULL, DNS_RBTFIND_EMPTYDATA,
1538                                           NULL, NULL);
1539                 if (result != ISC_R_SUCCESS) {
1540                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1541                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1542                                       "delete_node: "
1543                                       "dns_rbt_findnode(nsec): %s",
1544                                       isc_result_totext(result));
1545                 } else {
1546                         result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1547                                                     ISC_FALSE);
1548                         if (result != ISC_R_SUCCESS) {
1549                                 isc_log_write(dns_lctx,
1550                                               DNS_LOGCATEGORY_DATABASE,
1551                                               DNS_LOGMODULE_CACHE,
1552                                               ISC_LOG_WARNING,
1553                                               "delete_nsecnode(): "
1554                                               "dns_rbt_deletenode(nsecnode): %s",
1555                                               isc_result_totext(result));
1556                         }
1557                 }
1558                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1559 #ifdef BIND9
1560                 dns_rpz_cidr_deleteip(rbtdb->rpz_cidr, name);
1561 #endif
1562                 break;
1563         case DNS_RBT_NSEC_NSEC:
1564                 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1565                 break;
1566         case DNS_RBT_NSEC_NSEC3:
1567                 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1568                 break;
1569         }
1570         if (result != ISC_R_SUCCESS) {
1571                 isc_log_write(dns_lctx,
1572                               DNS_LOGCATEGORY_DATABASE,
1573                               DNS_LOGMODULE_CACHE,
1574                               ISC_LOG_WARNING,
1575                               "delete_nsecnode(): "
1576                               "dns_rbt_deletenode: %s",
1577                               isc_result_totext(result));
1578         }
1579 }
1580
1581 /*%
1582  * Clean up dead nodes.  These are nodes which have no references, and
1583  * have no data.  They are dead but we could not or chose not to delete
1584  * them when we deleted all the data at that node because we did not want
1585  * to wait for the tree write lock.
1586  *
1587  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1588  */
1589 static void
1590 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1591         dns_rbtnode_t *node;
1592         int count = 10;         /* XXXJT: should be adjustable */
1593
1594         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1595         while (node != NULL && count > 0) {
1596                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1597
1598                 /*
1599                  * Since we're holding a tree write lock, it should be
1600                  * impossible for this node to be referenced by others.
1601                  */
1602                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1603                        node->data == NULL);
1604
1605                 delete_node(rbtdb, node);
1606
1607                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1608                 count--;
1609         }
1610 }
1611
1612 /*
1613  * Caller must be holding the node lock.
1614  */
1615 static inline void
1616 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1617         unsigned int lockrefs, noderefs;
1618         isc_refcount_t *lockref;
1619
1620         INSIST(!ISC_LINK_LINKED(node, deadlink));
1621         dns_rbtnode_refincrement0(node, &noderefs);
1622         if (noderefs == 1) {    /* this is the first reference to the node */
1623                 lockref = &rbtdb->node_locks[node->locknum].references;
1624                 isc_refcount_increment0(lockref, &lockrefs);
1625                 INSIST(lockrefs != 0);
1626         }
1627         INSIST(noderefs != 0);
1628 }
1629
1630 /*
1631  * This function is assumed to be called when a node is newly referenced
1632  * and can be in the deadnode list.  In that case the node must be retrieved
1633  * from the list because it is going to be used.  In addition, if the caller
1634  * happens to hold a write lock on the tree, it's a good chance to purge dead
1635  * nodes.
1636  * Note: while a new reference is gained in multiple places, there are only very
1637  * few cases where the node can be in the deadnode list (only empty nodes can
1638  * have been added to the list).
1639  */
1640 static inline void
1641 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1642                 isc_rwlocktype_t treelocktype)
1643 {
1644         isc_rwlocktype_t locktype = isc_rwlocktype_read;
1645         nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
1646         isc_boolean_t maybe_cleanup = ISC_FALSE;
1647
1648         POST(locktype);
1649
1650         NODE_STRONGLOCK(nodelock);
1651         NODE_WEAKLOCK(nodelock, locktype);
1652
1653         /*
1654          * Check if we can possibly cleanup the dead node.  If so, upgrade
1655          * the node lock below to perform the cleanup.
1656          */
1657         if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1658             treelocktype == isc_rwlocktype_write) {
1659                 maybe_cleanup = ISC_TRUE;
1660         }
1661
1662         if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
1663                 /*
1664                  * Upgrade the lock and test if we still need to unlink.
1665                  */
1666                 NODE_WEAKUNLOCK(nodelock, locktype);
1667                 locktype = isc_rwlocktype_write;
1668                 POST(locktype);
1669                 NODE_WEAKLOCK(nodelock, locktype);
1670                 if (ISC_LINK_LINKED(node, deadlink))
1671                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1672                                         node, deadlink);
1673                 if (maybe_cleanup)
1674                         cleanup_dead_nodes(rbtdb, node->locknum);
1675         }
1676
1677         new_reference(rbtdb, node);
1678
1679         NODE_WEAKUNLOCK(nodelock, locktype);
1680         NODE_STRONGUNLOCK(nodelock);
1681 }
1682
1683 /*
1684  * Caller must be holding the node lock; either the "strong", read or write
1685  * lock.  Note that the lock must be held even when node references are
1686  * atomically modified; in that case the decrement operation itself does not
1687  * have to be protected, but we must avoid a race condition where multiple
1688  * threads are decreasing the reference to zero simultaneously and at least
1689  * one of them is going to free the node.
1690  * This function returns ISC_TRUE if and only if the node reference decreases
1691  * to zero.
1692  */
1693 static isc_boolean_t
1694 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1695                     rbtdb_serial_t least_serial,
1696                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1697                     isc_boolean_t pruning)
1698 {
1699         isc_result_t result;
1700         isc_boolean_t write_locked;
1701         rbtdb_nodelock_t *nodelock;
1702         unsigned int refs, nrefs;
1703         int bucket = node->locknum;
1704         isc_boolean_t no_reference = ISC_TRUE;
1705
1706         nodelock = &rbtdb->node_locks[bucket];
1707
1708         /* Handle easy and typical case first. */
1709         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1710                 dns_rbtnode_refdecrement(node, &nrefs);
1711                 INSIST((int)nrefs >= 0);
1712                 if (nrefs == 0) {
1713                         isc_refcount_decrement(&nodelock->references, &refs);
1714                         INSIST((int)refs >= 0);
1715                 }
1716                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1717         }
1718
1719         /* Upgrade the lock? */
1720         if (nlock == isc_rwlocktype_read) {
1721                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1722                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1723         }
1724
1725         dns_rbtnode_refdecrement(node, &nrefs);
1726         INSIST((int)nrefs >= 0);
1727         if (nrefs > 0) {
1728                 /* Restore the lock? */
1729                 if (nlock == isc_rwlocktype_read)
1730                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1731                 return (ISC_FALSE);
1732         }
1733
1734         if (node->dirty) {
1735                 if (IS_CACHE(rbtdb))
1736                         clean_cache_node(rbtdb, node);
1737                 else {
1738                         if (least_serial == 0) {
1739                                 /*
1740                                  * Caller doesn't know the least serial.
1741                                  * Get it.
1742                                  */
1743                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1744                                 least_serial = rbtdb->least_serial;
1745                                 RBTDB_UNLOCK(&rbtdb->lock,
1746                                              isc_rwlocktype_read);
1747                         }
1748                         clean_zone_node(rbtdb, node, least_serial);
1749                 }
1750         }
1751
1752         /*
1753          * Attempt to switch to a write lock on the tree.  If this fails,
1754          * we will add this node to a linked list of nodes in this locking
1755          * bucket which we will free later.
1756          */
1757         if (tlock != isc_rwlocktype_write) {
1758                 /*
1759                  * Locking hierarchy notwithstanding, we don't need to free
1760                  * the node lock before acquiring the tree write lock because
1761                  * we only do a trylock.
1762                  */
1763                 if (tlock == isc_rwlocktype_read)
1764                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1765                 else
1766                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1767                                                     isc_rwlocktype_write);
1768                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1769                               result == ISC_R_LOCKBUSY);
1770
1771                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1772         } else
1773                 write_locked = ISC_TRUE;
1774
1775         isc_refcount_decrement(&nodelock->references, &refs);
1776         INSIST((int)refs >= 0);
1777
1778         /*
1779          * XXXDCL should this only be done for cache zones?
1780          */
1781         if (node->data != NULL || node->down != NULL)
1782                 goto restore_locks;
1783
1784         if (write_locked) {
1785                 /*
1786                  * We can now delete the node.
1787                  */
1788
1789                 /*
1790                  * If this node is the only one in the level it's in, deleting
1791                  * this node may recursively make its parent the only node in
1792                  * the parent level; if so, and if no one is currently using
1793                  * the parent node, this is almost the only opportunity to
1794                  * clean it up.  But the recursive cleanup is not that trivial
1795                  * since the child and parent may be in different lock buckets,
1796                  * which would cause a lock order reversal problem.  To avoid
1797                  * the trouble, we'll dispatch a separate event for batch
1798                  * cleaning.  We need to check whether we're deleting the node
1799                  * as a result of pruning to avoid infinite dispatching.
1800                  * Note: pruning happens only when a task has been set for the
1801                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1802                  * it's their responsibility to purge stale leaves (e.g. by
1803                  * periodic walk-through).
1804                  */
1805                 if (!pruning && node->parent != NULL &&
1806                     node->parent->down == node && node->left == NULL &&
1807                     node->right == NULL && rbtdb->task != NULL) {
1808                         isc_event_t *ev;
1809                         dns_db_t *db;
1810
1811                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1812                                                 DNS_EVENT_RBTPRUNE,
1813                                                 prune_tree, node,
1814                                                 sizeof(isc_event_t));
1815                         if (ev != NULL) {
1816                                 new_reference(rbtdb, node);
1817                                 db = NULL;
1818                                 attach((dns_db_t *)rbtdb, &db);
1819                                 ev->ev_sender = db;
1820                                 isc_task_send(rbtdb->task, &ev);
1821                                 no_reference = ISC_FALSE;
1822                         } else {
1823                                 /*
1824                                  * XXX: this is a weird situation.  We could
1825                                  * ignore this error case, but then the stale
1826                                  * node will unlikely be purged except via a
1827                                  * rare condition such as manual cleanup.  So
1828                                  * we queue it in the deadnodes list, hoping
1829                                  * the memory shortage is temporary and the node
1830                                  * will be deleted later.
1831                                  */
1832                                 isc_log_write(dns_lctx,
1833                                               DNS_LOGCATEGORY_DATABASE,
1834                                               DNS_LOGMODULE_CACHE,
1835                                               ISC_LOG_INFO,
1836                                               "decrement_reference: failed to "
1837                                               "allocate pruning event");
1838                                 INSIST(node->data == NULL);
1839                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1840                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1841                                                 deadlink);
1842                         }
1843                 } else {
1844                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1845                                 char printname[DNS_NAME_FORMATSIZE];
1846
1847                                 isc_log_write(dns_lctx,
1848                                               DNS_LOGCATEGORY_DATABASE,
1849                                               DNS_LOGMODULE_CACHE,
1850                                               ISC_LOG_DEBUG(1),
1851                                               "decrement_reference: "
1852                                               "delete from rbt: %p %s",
1853                                               node,
1854                                               dns_rbt_formatnodename(node,
1855                                                         printname,
1856                                                         sizeof(printname)));
1857                         }
1858
1859                         delete_node(rbtdb, node);
1860                 }
1861         } else {
1862                 INSIST(node->data == NULL);
1863                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1864                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1865         }
1866
1867  restore_locks:
1868         /* Restore the lock? */
1869         if (nlock == isc_rwlocktype_read)
1870                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1871
1872         /*
1873          * Relock a read lock, or unlock the write lock if no lock was held.
1874          */
1875         if (tlock == isc_rwlocktype_none)
1876                 if (write_locked)
1877                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1878
1879         if (tlock == isc_rwlocktype_read)
1880                 if (write_locked)
1881                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1882
1883         return (no_reference);
1884 }
1885
1886 /*
1887  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1888  * case, the number of iteration is the number of tree levels, which is at
1889  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1890  * should be much smaller (only a few times), and even the worst case would be
1891  * acceptable for a single event.
1892  */
1893 static void
1894 prune_tree(isc_task_t *task, isc_event_t *event) {
1895         dns_rbtdb_t *rbtdb = event->ev_sender;
1896         dns_rbtnode_t *node = event->ev_arg;
1897         dns_rbtnode_t *parent;
1898         unsigned int locknum;
1899
1900         UNUSED(task);
1901
1902         isc_event_free(&event);
1903
1904         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1905         locknum = node->locknum;
1906         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1907         do {
1908                 parent = node->parent;
1909                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1910                                     isc_rwlocktype_write, ISC_TRUE);
1911
1912                 if (parent != NULL && parent->down == NULL) {
1913                         /*
1914                          * node was the only down child of the parent and has
1915                          * just been removed.  We'll then need to examine the
1916                          * parent.  Keep the lock if possible; otherwise,
1917                          * release the old lock and acquire one for the parent.
1918                          */
1919                         if (parent->locknum != locknum) {
1920                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1921                                             isc_rwlocktype_write);
1922                                 locknum = parent->locknum;
1923                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1924                                           isc_rwlocktype_write);
1925                         }
1926
1927                         /*
1928                          * We need to gain a reference to the node before
1929                          * decrementing it in the next iteration.  In addition,
1930                          * if the node is in the dead-nodes list, extract it
1931                          * from the list beforehand as we do in
1932                          * reactivate_node().
1933                          */
1934                         if (ISC_LINK_LINKED(parent, deadlink))
1935                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1936                                                 parent, deadlink);
1937                         new_reference(rbtdb, parent);
1938                 } else
1939                         parent = NULL;
1940
1941                 node = parent;
1942         } while (node != NULL);
1943         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1944         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1945
1946         detach((dns_db_t **)&rbtdb);
1947 }
1948
1949 static inline void
1950 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1951                    rbtdb_changedlist_t *cleanup_list)
1952 {
1953         /*
1954          * Caller must be holding the database lock.
1955          */
1956
1957         rbtdb->least_serial = version->serial;
1958         *cleanup_list = version->changed_list;
1959         ISC_LIST_INIT(version->changed_list);
1960 }
1961
1962 static inline void
1963 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1964         rbtdb_changed_t *changed, *next_changed;
1965
1966         /*
1967          * If the changed record is dirty, then
1968          * an update created multiple versions of
1969          * a given rdataset.  We keep this list
1970          * until we're the least open version, at
1971          * which point it's safe to get rid of any
1972          * older versions.
1973          *
1974          * If the changed record isn't dirty, then
1975          * we don't need it anymore since we're
1976          * committing and not rolling back.
1977          *
1978          * The caller must be holding the database lock.
1979          */
1980         for (changed = HEAD(version->changed_list);
1981              changed != NULL;
1982              changed = next_changed) {
1983                 next_changed = NEXT(changed, link);
1984                 if (!changed->dirty) {
1985                         UNLINK(version->changed_list,
1986                                changed, link);
1987                         APPEND(*cleanup_list,
1988                                changed, link);
1989                 }
1990         }
1991 }
1992
1993 static void
1994 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1995 #ifndef BIND9
1996         UNUSED(db);
1997         UNUSED(version);
1998         UNUSED(origin);
1999
2000         return;
2001 #else
2002         dns_rdataset_t keyset;
2003         dns_rdataset_t nsecset, signsecset;
2004         isc_boolean_t haszonekey = ISC_FALSE;
2005         isc_boolean_t hasnsec = ISC_FALSE;
2006         isc_result_t result;
2007
2008         dns_rdataset_init(&keyset);
2009         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
2010                                      0, 0, &keyset, NULL);
2011         if (result == ISC_R_SUCCESS) {
2012                 result = dns_rdataset_first(&keyset);
2013                 while (result == ISC_R_SUCCESS) {
2014                         dns_rdata_t keyrdata = DNS_RDATA_INIT;
2015                         dns_rdataset_current(&keyset, &keyrdata);
2016                         if (dns_zonekey_iszonekey(&keyrdata)) {
2017                                 haszonekey = ISC_TRUE;
2018                                 break;
2019                         }
2020                         result = dns_rdataset_next(&keyset);
2021                 }
2022                 dns_rdataset_disassociate(&keyset);
2023         }
2024         if (!haszonekey) {
2025                 version->secure = dns_db_insecure;
2026                 version->havensec3 = ISC_FALSE;
2027                 return;
2028         }
2029
2030         dns_rdataset_init(&nsecset);
2031         dns_rdataset_init(&signsecset);
2032         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
2033                                      0, 0, &nsecset, &signsecset);
2034         if (result == ISC_R_SUCCESS) {
2035                 if (dns_rdataset_isassociated(&signsecset)) {
2036                         hasnsec = ISC_TRUE;
2037                         dns_rdataset_disassociate(&signsecset);
2038                 }
2039                 dns_rdataset_disassociate(&nsecset);
2040         }
2041
2042         setnsec3parameters(db, version);
2043
2044         /*
2045          * Do we have a valid NSEC/NSEC3 chain?
2046          */
2047         if (version->havensec3 || hasnsec)
2048                 version->secure = dns_db_secure;
2049         else
2050                 version->secure = dns_db_insecure;
2051 #endif
2052 }
2053
2054 /*%<
2055  * Walk the origin node looking for NSEC3PARAM records.
2056  * Cache the nsec3 parameters.
2057  */
2058 #ifdef BIND9
2059 static void
2060 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2061         dns_rbtnode_t *node;
2062         dns_rdata_nsec3param_t nsec3param;
2063         dns_rdata_t rdata = DNS_RDATA_INIT;
2064         isc_region_t region;
2065         isc_result_t result;
2066         rdatasetheader_t *header, *header_next;
2067         unsigned char *raw;             /* RDATASLAB */
2068         unsigned int count, length;
2069         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2070
2071         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2072         version->havensec3 = ISC_FALSE;
2073         node = rbtdb->origin_node;
2074         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2075                   isc_rwlocktype_read);
2076         for (header = node->data;
2077              header != NULL;
2078              header = header_next) {
2079                 header_next = header->next;
2080                 do {
2081                         if (header->serial <= version->serial &&
2082                             !IGNORE(header)) {
2083                                 if (NONEXISTENT(header))
2084                                         header = NULL;
2085                                 break;
2086                         } else
2087                                 header = header->down;
2088                 } while (header != NULL);
2089
2090                 if (header != NULL &&
2091                     (header->type == dns_rdatatype_nsec3param)) {
2092                         /*
2093                          * Find A NSEC3PARAM with a supported algorithm.
2094                          */
2095                         raw = (unsigned char *)header + sizeof(*header);
2096                         count = raw[0] * 256 + raw[1]; /* count */
2097 #if DNS_RDATASET_FIXED
2098                         raw += count * 4 + 2;
2099 #else
2100                         raw += 2;
2101 #endif
2102                         while (count-- > 0U) {
2103                                 length = raw[0] * 256 + raw[1];
2104 #if DNS_RDATASET_FIXED
2105                                 raw += 4;
2106 #else
2107                                 raw += 2;
2108 #endif
2109                                 region.base = raw;
2110                                 region.length = length;
2111                                 raw += length;
2112                                 dns_rdata_fromregion(&rdata,
2113                                                      rbtdb->common.rdclass,
2114                                                      dns_rdatatype_nsec3param,
2115                                                      &region);
2116                                 result = dns_rdata_tostruct(&rdata,
2117                                                             &nsec3param,
2118                                                             NULL);
2119                                 INSIST(result == ISC_R_SUCCESS);
2120                                 dns_rdata_reset(&rdata);
2121
2122                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2123                                     !dns_nsec3_supportedhash(nsec3param.hash))
2124                                         continue;
2125
2126                                 if (nsec3param.flags != 0)
2127                                         continue;
2128
2129                                 memcpy(version->salt, nsec3param.salt,
2130                                        nsec3param.salt_length);
2131                                 version->hash = nsec3param.hash;
2132                                 version->salt_length = nsec3param.salt_length;
2133                                 version->iterations = nsec3param.iterations;
2134                                 version->flags = nsec3param.flags;
2135                                 version->havensec3 = ISC_TRUE;
2136                                 /*
2137                                  * Look for a better algorithm than the
2138                                  * unknown test algorithm.
2139                                  */
2140                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2141                                         goto unlock;
2142                         }
2143                 }
2144         }
2145  unlock:
2146         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2147                     isc_rwlocktype_read);
2148         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2149 }
2150 #endif
2151
2152 static void
2153 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2154         dns_rbtdb_t *rbtdb = event->ev_arg;
2155         isc_boolean_t again = ISC_FALSE;
2156         unsigned int locknum;
2157         unsigned int refs;
2158
2159         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2160         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2161                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2162                           isc_rwlocktype_write);
2163                 cleanup_dead_nodes(rbtdb, locknum);
2164                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2165                         again = ISC_TRUE;
2166                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2167                             isc_rwlocktype_write);
2168         }
2169         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2170         if (again)
2171                 isc_task_send(task, &event);
2172         else {
2173                 isc_event_free(&event);
2174                 isc_refcount_decrement(&rbtdb->references, &refs);
2175                 if (refs == 0)
2176                         maybe_free_rbtdb(rbtdb);
2177         }
2178 }
2179
2180 static void
2181 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2182         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2183         rbtdb_version_t *version, *cleanup_version, *least_greater;
2184         isc_boolean_t rollback = ISC_FALSE;
2185         rbtdb_changedlist_t cleanup_list;
2186         rdatasetheaderlist_t resigned_list;
2187         rbtdb_changed_t *changed, *next_changed;
2188         rbtdb_serial_t serial, least_serial;
2189         dns_rbtnode_t *rbtnode;
2190         unsigned int refs;
2191         rdatasetheader_t *header;
2192         isc_boolean_t writer;
2193
2194         REQUIRE(VALID_RBTDB(rbtdb));
2195         version = (rbtdb_version_t *)*versionp;
2196         INSIST(version->rbtdb == rbtdb);
2197
2198         cleanup_version = NULL;
2199         ISC_LIST_INIT(cleanup_list);
2200         ISC_LIST_INIT(resigned_list);
2201
2202         isc_refcount_decrement(&version->references, &refs);
2203         if (refs > 0) {         /* typical and easy case first */
2204                 if (commit) {
2205                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2206                         INSIST(!version->writer);
2207                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2208                 }
2209                 goto end;
2210         }
2211
2212         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2213         serial = version->serial;
2214         writer = version->writer;
2215         if (version->writer) {
2216                 if (commit) {
2217                         unsigned cur_ref;
2218                         rbtdb_version_t *cur_version;
2219
2220                         INSIST(version->commit_ok);
2221                         INSIST(version == rbtdb->future_version);
2222                         /*
2223                          * The current version is going to be replaced.
2224                          * Release the (likely last) reference to it from the
2225                          * DB itself and unlink it from the open list.
2226                          */
2227                         cur_version = rbtdb->current_version;
2228                         isc_refcount_decrement(&cur_version->references,
2229                                                &cur_ref);
2230                         if (cur_ref == 0) {
2231                                 if (cur_version->serial == rbtdb->least_serial)
2232                                         INSIST(EMPTY(cur_version->changed_list));
2233                                 UNLINK(rbtdb->open_versions,
2234                                        cur_version, link);
2235                         }
2236                         if (EMPTY(rbtdb->open_versions)) {
2237                                 /*
2238                                  * We're going to become the least open
2239                                  * version.
2240                                  */
2241                                 make_least_version(rbtdb, version,
2242                                                    &cleanup_list);
2243                         } else {
2244                                 /*
2245                                  * Some other open version is the
2246                                  * least version.  We can't cleanup
2247                                  * records that were changed in this
2248                                  * version because the older versions
2249                                  * may still be in use by an open
2250                                  * version.
2251                                  *
2252                                  * We can, however, discard the
2253                                  * changed records for things that
2254                                  * we've added that didn't exist in
2255                                  * prior versions.
2256                                  */
2257                                 cleanup_nondirty(version, &cleanup_list);
2258                         }
2259                         /*
2260                          * If the (soon to be former) current version
2261                          * isn't being used by anyone, we can clean
2262                          * it up.
2263                          */
2264                         if (cur_ref == 0) {
2265                                 cleanup_version = cur_version;
2266                                 APPENDLIST(version->changed_list,
2267                                            cleanup_version->changed_list,
2268                                            link);
2269                         }
2270                         /*
2271                          * Become the current version.
2272                          */
2273                         version->writer = ISC_FALSE;
2274                         rbtdb->current_version = version;
2275                         rbtdb->current_serial = version->serial;
2276                         rbtdb->future_version = NULL;
2277
2278                         /*
2279                          * Keep the current version in the open list, and
2280                          * gain a reference for the DB itself (see the DB
2281                          * creation function below).  This must be the only
2282                          * case where we need to increment the counter from
2283                          * zero and need to use isc_refcount_increment0().
2284                          */
2285                         isc_refcount_increment0(&version->references,
2286                                                 &cur_ref);
2287                         INSIST(cur_ref == 1);
2288                         PREPEND(rbtdb->open_versions,
2289                                 rbtdb->current_version, link);
2290                         resigned_list = version->resigned_list;
2291                         ISC_LIST_INIT(version->resigned_list);
2292                 } else {
2293                         /*
2294                          * We're rolling back this transaction.
2295                          */
2296                         cleanup_list = version->changed_list;
2297                         ISC_LIST_INIT(version->changed_list);
2298                         resigned_list = version->resigned_list;
2299                         ISC_LIST_INIT(version->resigned_list);
2300                         rollback = ISC_TRUE;
2301                         cleanup_version = version;
2302                         rbtdb->future_version = NULL;
2303                 }
2304         } else {
2305                 if (version != rbtdb->current_version) {
2306                         /*
2307                          * There are no external or internal references
2308                          * to this version and it can be cleaned up.
2309                          */
2310                         cleanup_version = version;
2311
2312                         /*
2313                          * Find the version with the least serial
2314                          * number greater than ours.
2315                          */
2316                         least_greater = PREV(version, link);
2317                         if (least_greater == NULL)
2318                                 least_greater = rbtdb->current_version;
2319
2320                         INSIST(version->serial < least_greater->serial);
2321                         /*
2322                          * Is this the least open version?
2323                          */
2324                         if (version->serial == rbtdb->least_serial) {
2325                                 /*
2326                                  * Yes.  Install the new least open
2327                                  * version.
2328                                  */
2329                                 make_least_version(rbtdb,
2330                                                    least_greater,
2331                                                    &cleanup_list);
2332                         } else {
2333                                 /*
2334                                  * Add any unexecuted cleanups to
2335                                  * those of the least greater version.
2336                                  */
2337                                 APPENDLIST(least_greater->changed_list,
2338                                            version->changed_list,
2339                                            link);
2340                         }
2341                 } else if (version->serial == rbtdb->least_serial)
2342                         INSIST(EMPTY(version->changed_list));
2343                 UNLINK(rbtdb->open_versions, version, link);
2344         }
2345         least_serial = rbtdb->least_serial;
2346         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2347
2348         /*
2349          * Update the zone's secure status.
2350          */
2351         if (writer && commit && !IS_CACHE(rbtdb))
2352                 iszonesecure(db, version, rbtdb->origin_node);
2353
2354         if (cleanup_version != NULL) {
2355                 INSIST(EMPTY(cleanup_version->changed_list));
2356                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2357                             sizeof(*cleanup_version));
2358         }
2359
2360         /*
2361          * Commit/rollback re-signed headers.
2362          */
2363         for (header = HEAD(resigned_list);
2364              header != NULL;
2365              header = HEAD(resigned_list)) {
2366                 nodelock_t *lock;
2367
2368                 ISC_LIST_UNLINK(resigned_list, header, link);
2369
2370                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2371                 NODE_LOCK(lock, isc_rwlocktype_write);
2372                 if (rollback)
2373                         resign_insert(rbtdb, header->node->locknum, header);
2374                 decrement_reference(rbtdb, header->node, least_serial,
2375                                     isc_rwlocktype_write, isc_rwlocktype_none,
2376                                     ISC_FALSE);
2377                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2378         }
2379
2380         if (!EMPTY(cleanup_list)) {
2381                 isc_event_t *event = NULL;
2382                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2383
2384                 if (rbtdb->task != NULL)
2385                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
2386                                                    DNS_EVENT_RBTDEADNODES,
2387                                                    cleanup_dead_nodes_callback,
2388                                                    rbtdb, sizeof(isc_event_t));
2389                 if (event == NULL) {
2390                         /*
2391                          * We acquire a tree write lock here in order to make
2392                          * sure that stale nodes will be removed in
2393                          * decrement_reference().  If we didn't have the lock,
2394                          * those nodes could miss the chance to be removed
2395                          * until the server stops.  The write lock is
2396                          * expensive, but this event should be rare enough
2397                          * to justify the cost.
2398                          */
2399                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2400                         tlock = isc_rwlocktype_write;
2401                 }
2402
2403                 for (changed = HEAD(cleanup_list);
2404                      changed != NULL;
2405                      changed = next_changed) {
2406                         nodelock_t *lock;
2407
2408                         next_changed = NEXT(changed, link);
2409                         rbtnode = changed->node;
2410                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2411
2412                         NODE_LOCK(lock, isc_rwlocktype_write);
2413                         /*
2414                          * This is a good opportunity to purge any dead nodes,
2415                          * so use it.
2416                          */
2417                         if (event == NULL)
2418                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2419
2420                         if (rollback)
2421                                 rollback_node(rbtnode, serial);
2422                         decrement_reference(rbtdb, rbtnode, least_serial,
2423                                             isc_rwlocktype_write, tlock,
2424                                             ISC_FALSE);
2425
2426                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2427
2428                         isc_mem_put(rbtdb->common.mctx, changed,
2429                                     sizeof(*changed));
2430                 }
2431                 if (event != NULL) {
2432                         isc_refcount_increment(&rbtdb->references, NULL);
2433                         isc_task_send(rbtdb->task, &event);
2434                 } else
2435                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2436         }
2437
2438  end:
2439         *versionp = NULL;
2440 }
2441
2442 /*
2443  * Add the necessary magic for the wildcard name 'name'
2444  * to be found in 'rbtdb'.
2445  *
2446  * In order for wildcard matching to work correctly in
2447  * zone_find(), we must ensure that a node for the wildcarding
2448  * level exists in the database, and has its 'find_callback'
2449  * and 'wild' bits set.
2450  *
2451  * E.g. if the wildcard name is "*.sub.example." then we
2452  * must ensure that "sub.example." exists and is marked as
2453  * a wildcard level.
2454  */
2455 static isc_result_t
2456 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2457         isc_result_t result;
2458         dns_name_t foundname;
2459         dns_offsets_t offsets;
2460         unsigned int n;
2461         dns_rbtnode_t *node = NULL;
2462
2463         dns_name_init(&foundname, offsets);
2464         n = dns_name_countlabels(name);
2465         INSIST(n >= 2);
2466         n--;
2467         dns_name_getlabelsequence(name, 1, n, &foundname);
2468         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2469         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2470                 return (result);
2471         if (result == ISC_R_SUCCESS)
2472                 node->nsec = DNS_RBT_NSEC_NORMAL;
2473         node->find_callback = 1;
2474         node->wild = 1;
2475         return (ISC_R_SUCCESS);
2476 }
2477
2478 static isc_result_t
2479 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2480         isc_result_t result;
2481         dns_name_t foundname;
2482         dns_offsets_t offsets;
2483         unsigned int n, l, i;
2484
2485         dns_name_init(&foundname, offsets);
2486         n = dns_name_countlabels(name);
2487         l = dns_name_countlabels(&rbtdb->common.origin);
2488         i = l + 1;
2489         while (i < n) {
2490                 dns_rbtnode_t *node = NULL;     /* dummy */
2491                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2492                 if (dns_name_iswildcard(&foundname)) {
2493                         result = add_wildcard_magic(rbtdb, &foundname);
2494                         if (result != ISC_R_SUCCESS)
2495                                 return (result);
2496                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2497                                                  &node);
2498                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2499                                 return (result);
2500                         if (result == ISC_R_SUCCESS)
2501                                 node->nsec = DNS_RBT_NSEC_NORMAL;
2502                 }
2503                 i++;
2504         }
2505         return (ISC_R_SUCCESS);
2506 }
2507
2508 static isc_result_t
2509 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
2510                isc_boolean_t create, dns_dbnode_t **nodep)
2511 {
2512         dns_rbtnode_t *node = NULL;
2513         dns_name_t nodename;
2514         isc_result_t result;
2515         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2516
2517         INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
2518
2519         dns_name_init(&nodename, NULL);
2520         RWLOCK(&rbtdb->tree_lock, locktype);
2521         result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
2522                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2523         if (result != ISC_R_SUCCESS) {
2524                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2525                 if (!create) {
2526                         if (result == DNS_R_PARTIALMATCH)
2527                                 result = ISC_R_NOTFOUND;
2528                         return (result);
2529                 }
2530                 /*
2531                  * It would be nice to try to upgrade the lock instead of
2532                  * unlocking then relocking.
2533                  */
2534                 locktype = isc_rwlocktype_write;
2535                 RWLOCK(&rbtdb->tree_lock, locktype);
2536                 node = NULL;
2537                 result = dns_rbt_addnode(tree, name, &node);
2538                 if (result == ISC_R_SUCCESS) {
2539 #ifdef BIND9
2540                         if (tree == rbtdb->tree && rbtdb->rpz_cidr != NULL) {
2541                                 dns_fixedname_t fnamef;
2542                                 dns_name_t *fname;
2543
2544                                 dns_fixedname_init(&fnamef);
2545                                 fname = dns_fixedname_name(&fnamef);
2546                                 dns_rbt_fullnamefromnode(node, fname);
2547                                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, fname);
2548                         }
2549 #endif
2550                         dns_rbt_namefromnode(node, &nodename);
2551 #ifdef DNS_RBT_USEHASH
2552                         node->locknum = node->hashval % rbtdb->node_lock_count;
2553 #else
2554                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2555                                 rbtdb->node_lock_count;
2556 #endif
2557                         if (tree == rbtdb->tree) {
2558                                 add_empty_wildcards(rbtdb, name);
2559
2560                                 if (dns_name_iswildcard(name)) {
2561                                         result = add_wildcard_magic(rbtdb, name);
2562                                         if (result != ISC_R_SUCCESS) {
2563                                                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2564                                                 return (result);
2565                                         }
2566                                 }
2567                         }
2568                         if (tree == rbtdb->nsec3)
2569                                 node->nsec = DNS_RBT_NSEC_NSEC3;
2570                 } else if (result != ISC_R_EXISTS) {
2571                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2572                         return (result);
2573                 }
2574         }
2575
2576         if (tree == rbtdb->nsec3)
2577                 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2578
2579         reactivate_node(rbtdb, node, locktype);
2580         RWUNLOCK(&rbtdb->tree_lock, locktype);
2581
2582         *nodep = (dns_dbnode_t *)node;
2583
2584         return (ISC_R_SUCCESS);
2585 }
2586
2587 static isc_result_t
2588 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2589          dns_dbnode_t **nodep)
2590 {
2591         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2592
2593         REQUIRE(VALID_RBTDB(rbtdb));
2594
2595         return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
2596 }
2597
2598 static isc_result_t
2599 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2600               dns_dbnode_t **nodep)
2601 {
2602         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2603
2604         REQUIRE(VALID_RBTDB(rbtdb));
2605
2606         return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
2607 }
2608
2609 static isc_result_t
2610 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2611         rbtdb_search_t *search = arg;
2612         rdatasetheader_t *header, *header_next;
2613         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2614         rdatasetheader_t *found;
2615         isc_result_t result;
2616         dns_rbtnode_t *onode;
2617
2618         /*
2619          * We only want to remember the topmost zone cut, since it's the one
2620          * that counts, so we'll just continue if we've already found a
2621          * zonecut.
2622          */
2623         if (search->zonecut != NULL)
2624                 return (DNS_R_CONTINUE);
2625
2626         found = NULL;
2627         result = DNS_R_CONTINUE;
2628         onode = search->rbtdb->origin_node;
2629
2630         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2631                   isc_rwlocktype_read);
2632
2633         /*
2634          * Look for an NS or DNAME rdataset active in our version.
2635          */
2636         ns_header = NULL;
2637         dname_header = NULL;
2638         sigdname_header = NULL;
2639         for (header = node->data; header != NULL; header = header_next) {
2640                 header_next = header->next;
2641                 if (header->type == dns_rdatatype_ns ||
2642                     header->type == dns_rdatatype_dname ||
2643                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2644                         do {
2645                                 if (header->serial <= search->serial &&
2646                                     !IGNORE(header)) {
2647                                         /*
2648                                          * Is this a "this rdataset doesn't
2649                                          * exist" record?
2650                                          */
2651                                         if (NONEXISTENT(header))
2652                                                 header = NULL;
2653                                         break;
2654                                 } else
2655                                         header = header->down;
2656                         } while (header != NULL);
2657                         if (header != NULL) {
2658                                 if (header->type == dns_rdatatype_dname)
2659                                         dname_header = header;
2660                                 else if (header->type ==
2661                                            RBTDB_RDATATYPE_SIGDNAME)
2662                                         sigdname_header = header;
2663                                 else if (node != onode ||
2664                                          IS_STUB(search->rbtdb)) {
2665                                         /*
2666                                          * We've found an NS rdataset that
2667                                          * isn't at the origin node.  We check
2668                                          * that they're not at the origin node,
2669                                          * because otherwise we'd erroneously
2670                                          * treat the zone top as if it were
2671                                          * a delegation.
2672                                          */
2673                                         ns_header = header;
2674                                 }
2675                         }
2676                 }
2677         }
2678
2679         /*
2680          * Did we find anything?
2681          */
2682         if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
2683             ns_header != NULL) {
2684                 /*
2685                  * Note that NS has precedence over DNAME if both exist
2686                  * in a zone.  Otherwise DNAME take precedence over NS.
2687                  */
2688                 found = ns_header;
2689                 search->zonecut_sigrdataset = NULL;
2690         } else if (dname_header != NULL) {
2691                 found = dname_header;
2692                 search->zonecut_sigrdataset = sigdname_header;
2693         } else if (ns_header != NULL) {
2694                 found = ns_header;
2695                 search->zonecut_sigrdataset = NULL;
2696         }
2697
2698         if (found != NULL) {
2699                 /*
2700                  * We increment the reference count on node to ensure that
2701                  * search->zonecut_rdataset will still be valid later.
2702                  */
2703                 new_reference(search->rbtdb, node);
2704                 search->zonecut = node;
2705                 search->zonecut_rdataset = found;
2706                 search->need_cleanup = ISC_TRUE;
2707                 /*
2708                  * Since we've found a zonecut, anything beneath it is
2709                  * glue and is not subject to wildcard matching, so we
2710                  * may clear search->wild.
2711                  */
2712                 search->wild = ISC_FALSE;
2713                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2714                         /*
2715                          * If the caller does not want to find glue, then
2716                          * this is the best answer and the search should
2717                          * stop now.
2718                          */
2719                         result = DNS_R_PARTIALMATCH;
2720                 } else {
2721                         dns_name_t *zcname;
2722
2723                         /*
2724                          * The search will continue beneath the zone cut.
2725                          * This may or may not be the best match.  In case it
2726                          * is, we need to remember the node name.
2727                          */
2728                         zcname = dns_fixedname_name(&search->zonecut_name);
2729                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2730                                       ISC_R_SUCCESS);
2731                         search->copy_name = ISC_TRUE;
2732                 }
2733         } else {
2734                 /*
2735                  * There is no zonecut at this node which is active in this
2736                  * version.
2737                  *
2738                  * If this is a "wild" node and the caller hasn't disabled
2739                  * wildcard matching, remember that we've seen a wild node
2740                  * in case we need to go searching for wildcard matches
2741                  * later on.
2742                  */
2743                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2744                         search->wild = ISC_TRUE;
2745         }
2746
2747         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2748                     isc_rwlocktype_read);
2749
2750         return (result);
2751 }
2752
2753 static inline void
2754 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2755               rdatasetheader_t *header, isc_stdtime_t now,
2756               dns_rdataset_t *rdataset)
2757 {
2758         unsigned char *raw;     /* RDATASLAB */
2759
2760         /*
2761          * Caller must be holding the node reader lock.
2762          * XXXJT: technically, we need a writer lock, since we'll increment
2763          * the header count below.  However, since the actual counter value
2764          * doesn't matter, we prioritize performance here.  (We may want to
2765          * use atomic increment when available).
2766          */
2767
2768         if (rdataset == NULL)
2769                 return;
2770
2771         new_reference(rbtdb, node);
2772
2773         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2774
2775         rdataset->methods = &rdataset_methods;
2776         rdataset->rdclass = rbtdb->common.rdclass;
2777         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2778         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2779         rdataset->ttl = header->rdh_ttl - now;
2780         rdataset->trust = header->trust;
2781         if (NEGATIVE(header))
2782                 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
2783         if (NXDOMAIN(header))
2784                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2785         if (OPTOUT(header))
2786                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2787         rdataset->private1 = rbtdb;
2788         rdataset->private2 = node;
2789         raw = (unsigned char *)header + sizeof(*header);
2790         rdataset->private3 = raw;
2791         rdataset->count = header->count++;
2792         if (rdataset->count == ISC_UINT32_MAX)
2793                 rdataset->count = 0;
2794
2795         /*
2796          * Reset iterator state.
2797          */
2798         rdataset->privateuint4 = 0;
2799         rdataset->private5 = NULL;
2800
2801         /*
2802          * Add noqname proof.
2803          */
2804         rdataset->private6 = header->noqname;
2805         if (rdataset->private6 != NULL)
2806                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2807         rdataset->private7 = header->closest;
2808         if (rdataset->private7 != NULL)
2809                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2810
2811         /*
2812          * Copy out re-signing information.
2813          */
2814         if (RESIGN(header)) {
2815                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2816                 rdataset->resign = header->resign;
2817         } else
2818                 rdataset->resign = 0;
2819 }
2820
2821 static inline isc_result_t
2822 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2823                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2824                  dns_rdataset_t *sigrdataset)
2825 {
2826         isc_result_t result;
2827         dns_name_t *zcname;
2828         rbtdb_rdatatype_t type;
2829         dns_rbtnode_t *node;
2830
2831         /*
2832          * The caller MUST NOT be holding any node locks.
2833          */
2834
2835         node = search->zonecut;
2836         type = search->zonecut_rdataset->type;
2837
2838         /*
2839          * If we have to set foundname, we do it before anything else.
2840          * If we were to set foundname after we had set nodep or bound the
2841          * rdataset, then we'd have to undo that work if dns_name_copy()
2842          * failed.  By setting foundname first, there's nothing to undo if
2843          * we have trouble.
2844          */
2845         if (foundname != NULL && search->copy_name) {
2846                 zcname = dns_fixedname_name(&search->zonecut_name);
2847                 result = dns_name_copy(zcname, foundname, NULL);
2848                 if (result != ISC_R_SUCCESS)
2849                         return (result);
2850         }
2851         if (nodep != NULL) {
2852                 /*
2853                  * Note that we don't have to increment the node's reference
2854                  * count here because we're going to use the reference we
2855                  * already have in the search block.
2856                  */
2857                 *nodep = node;
2858                 search->need_cleanup = ISC_FALSE;
2859         }
2860         if (rdataset != NULL) {
2861                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2862                           isc_rwlocktype_read);
2863                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2864                               search->now, rdataset);
2865                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2866                         bind_rdataset(search->rbtdb, node,
2867                                       search->zonecut_sigrdataset,
2868                                       search->now, sigrdataset);
2869                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2870                             isc_rwlocktype_read);
2871         }
2872
2873         if (type == dns_rdatatype_dname)
2874                 return (DNS_R_DNAME);
2875         return (DNS_R_DELEGATION);
2876 }
2877
2878 static inline isc_boolean_t
2879 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2880            dns_rbtnode_t *node)
2881 {
2882         unsigned char *raw;     /* RDATASLAB */
2883         unsigned int count, size;
2884         dns_name_t ns_name;
2885         isc_boolean_t valid = ISC_FALSE;
2886         dns_offsets_t offsets;
2887         isc_region_t region;
2888         rdatasetheader_t *header;
2889
2890         /*
2891          * No additional locking is required.
2892          */
2893
2894         /*
2895          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2896          * if it occurs at a zone cut, but is not valid below it.
2897          */
2898         if (type == dns_rdatatype_ns) {
2899                 if (node != search->zonecut) {
2900                         return (ISC_FALSE);
2901                 }
2902         } else if (type != dns_rdatatype_a &&
2903                    type != dns_rdatatype_aaaa &&
2904                    type != dns_rdatatype_a6) {
2905                 return (ISC_FALSE);
2906         }
2907
2908         header = search->zonecut_rdataset;
2909         raw = (unsigned char *)header + sizeof(*header);
2910         count = raw[0] * 256 + raw[1];
2911 #if DNS_RDATASET_FIXED
2912         raw += 2 + (4 * count);
2913 #else
2914         raw += 2;
2915 #endif
2916
2917         while (count > 0) {
2918                 count--;
2919                 size = raw[0] * 256 + raw[1];
2920 #if DNS_RDATASET_FIXED
2921                 raw += 4;
2922 #else
2923                 raw += 2;
2924 #endif
2925                 region.base = raw;
2926                 region.length = size;
2927                 raw += size;
2928                 /*
2929                  * XXX Until we have rdata structures, we have no choice but
2930                  * to directly access the rdata format.
2931                  */
2932                 dns_name_init(&ns_name, offsets);
2933                 dns_name_fromregion(&ns_name, &region);
2934                 if (dns_name_compare(&ns_name, name) == 0) {
2935                         valid = ISC_TRUE;
2936                         break;
2937                 }
2938         }
2939
2940         return (valid);
2941 }
2942
2943 static inline isc_boolean_t
2944 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2945             dns_name_t *name)
2946 {
2947         dns_fixedname_t fnext;
2948         dns_fixedname_t forigin;
2949         dns_name_t *next;
2950         dns_name_t *origin;
2951         dns_name_t prefix;
2952         dns_rbtdb_t *rbtdb;
2953         dns_rbtnode_t *node;
2954         isc_result_t result;
2955         isc_boolean_t answer = ISC_FALSE;
2956         rdatasetheader_t *header;
2957
2958         rbtdb = search->rbtdb;
2959
2960         dns_name_init(&prefix, NULL);
2961         dns_fixedname_init(&fnext);
2962         next = dns_fixedname_name(&fnext);
2963         dns_fixedname_init(&forigin);
2964         origin = dns_fixedname_name(&forigin);
2965
2966         result = dns_rbtnodechain_next(chain, NULL, NULL);
2967         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2968                 node = NULL;
2969                 result = dns_rbtnodechain_current(chain, &prefix,
2970                                                   origin, &node);
2971                 if (result != ISC_R_SUCCESS)
2972                         break;
2973                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2974                           isc_rwlocktype_read);
2975                 for (header = node->data;
2976                      header != NULL;
2977                      header = header->next) {
2978                         if (header->serial <= search->serial &&
2979                             !IGNORE(header) && EXISTS(header))
2980                                 break;
2981                 }
2982                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2983                             isc_rwlocktype_read);
2984                 if (header != NULL)
2985                         break;
2986                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2987         }
2988         if (result == ISC_R_SUCCESS)
2989                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2990         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2991                 answer = ISC_TRUE;
2992         return (answer);
2993 }
2994
2995 static inline isc_boolean_t
2996 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2997         dns_fixedname_t fnext;
2998         dns_fixedname_t forigin;
2999         dns_fixedname_t fprev;
3000         dns_name_t *next;
3001         dns_name_t *origin;
3002         dns_name_t *prev;
3003         dns_name_t name;
3004         dns_name_t rname;
3005         dns_name_t tname;
3006         dns_rbtdb_t *rbtdb;
3007         dns_rbtnode_t *node;
3008         dns_rbtnodechain_t chain;
3009         isc_boolean_t check_next = ISC_TRUE;
3010         isc_boolean_t check_prev = ISC_TRUE;
3011         isc_boolean_t answer = ISC_FALSE;
3012         isc_result_t result;
3013         rdatasetheader_t *header;
3014         unsigned int n;
3015
3016         rbtdb = search->rbtdb;
3017
3018         dns_name_init(&name, NULL);
3019         dns_name_init(&tname, NULL);
3020         dns_name_init(&rname, NULL);
3021         dns_fixedname_init(&fnext);
3022         next = dns_fixedname_name(&fnext);
3023         dns_fixedname_init(&fprev);
3024         prev = dns_fixedname_name(&fprev);
3025         dns_fixedname_init(&forigin);
3026         origin = dns_fixedname_name(&forigin);
3027
3028         /*
3029          * Find if qname is at or below a empty node.
3030          * Use our own copy of the chain.
3031          */
3032
3033         chain = search->chain;
3034         do {
3035                 node = NULL;
3036                 result = dns_rbtnodechain_current(&chain, &name,
3037                                                   origin, &node);
3038                 if (result != ISC_R_SUCCESS)
3039                         break;
3040                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3041                           isc_rwlocktype_read);
3042                 for (header = node->data;
3043                      header != NULL;
3044                      header = header->next) {
3045                         if (header->serial <= search->serial &&
3046                             !IGNORE(header) && EXISTS(header))
3047                                 break;
3048                 }
3049                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3050                             isc_rwlocktype_read);
3051                 if (header != NULL)
3052                         break;
3053                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3054         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3055         if (result == ISC_R_SUCCESS)
3056                 result = dns_name_concatenate(&name, origin, prev, NULL);
3057         if (result != ISC_R_SUCCESS)
3058                 check_prev = ISC_FALSE;
3059
3060         result = dns_rbtnodechain_next(&chain, NULL, NULL);
3061         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3062                 node = NULL;
3063                 result = dns_rbtnodechain_current(&chain, &name,
3064                                                   origin, &node);
3065                 if (result != ISC_R_SUCCESS)
3066                         break;
3067                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3068                           isc_rwlocktype_read);
3069                 for (header = node->data;
3070                      header != NULL;
3071                      header = header->next) {
3072                         if (header->serial <= search->serial &&
3073                             !IGNORE(header) && EXISTS(header))
3074                                 break;
3075                 }
3076                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3077                             isc_rwlocktype_read);
3078                 if (header != NULL)
3079                         break;
3080                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3081         }
3082         if (result == ISC_R_SUCCESS)
3083                 result = dns_name_concatenate(&name, origin, next, NULL);
3084         if (result != ISC_R_SUCCESS)
3085                 check_next = ISC_FALSE;
3086
3087         dns_name_clone(qname, &rname);
3088
3089         /*
3090          * Remove the wildcard label to find the terminal name.
3091          */
3092         n = dns_name_countlabels(wname);
3093         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3094
3095         do {
3096                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3097                     (check_next && dns_name_issubdomain(next, &rname))) {
3098                         answer = ISC_TRUE;
3099                         break;
3100                 }
3101                 /*
3102                  * Remove the left hand label.
3103                  */
3104                 n = dns_name_countlabels(&rname);
3105                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3106         } while (!dns_name_equal(&rname, &tname));
3107         return (answer);
3108 }
3109
3110 static inline isc_result_t
3111 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3112               dns_name_t *qname)
3113 {
3114         unsigned int i, j;
3115         dns_rbtnode_t *node, *level_node, *wnode;
3116         rdatasetheader_t *header;
3117         isc_result_t result = ISC_R_NOTFOUND;
3118         dns_name_t name;
3119         dns_name_t *wname;
3120         dns_fixedname_t fwname;
3121         dns_rbtdb_t *rbtdb;
3122         isc_boolean_t done, wild, active;
3123         dns_rbtnodechain_t wchain;
3124
3125         /*
3126          * Caller must be holding the tree lock and MUST NOT be holding
3127          * any node locks.
3128          */
3129
3130         /*
3131          * Examine each ancestor level.  If the level's wild bit
3132          * is set, then construct the corresponding wildcard name and
3133          * search for it.  If the wildcard node exists, and is active in
3134          * this version, we're done.  If not, then we next check to see
3135          * if the ancestor is active in this version.  If so, then there
3136          * can be no possible wildcard match and again we're done.  If not,
3137          * continue the search.
3138          */
3139
3140         rbtdb = search->rbtdb;
3141         i = search->chain.level_matches;
3142         done = ISC_FALSE;
3143         node = *nodep;
3144         do {
3145                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3146                           isc_rwlocktype_read);
3147
3148                 /*
3149                  * First we try to figure out if this node is active in
3150                  * the search's version.  We do this now, even though we
3151                  * may not need the information, because it simplifies the
3152                  * locking and code flow.
3153                  */
3154                 for (header = node->data;
3155                      header != NULL;
3156                      header = header->next) {
3157                         if (header->serial <= search->serial &&
3158                             !IGNORE(header) && EXISTS(header))
3159                                 break;
3160                 }
3161                 if (header != NULL)
3162                         active = ISC_TRUE;
3163                 else
3164                         active = ISC_FALSE;
3165
3166                 if (node->wild)
3167                         wild = ISC_TRUE;
3168                 else
3169                         wild = ISC_FALSE;
3170
3171                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3172                             isc_rwlocktype_read);
3173
3174                 if (wild) {
3175                         /*
3176                          * Construct the wildcard name for this level.
3177                          */
3178                         dns_name_init(&name, NULL);
3179                         dns_rbt_namefromnode(node, &name);
3180                         dns_fixedname_init(&fwname);
3181                         wname = dns_fixedname_name(&fwname);
3182                         result = dns_name_concatenate(dns_wildcardname, &name,
3183                                                       wname, NULL);
3184                         j = i;
3185                         while (result == ISC_R_SUCCESS && j != 0) {
3186                                 j--;
3187                                 level_node = search->chain.levels[j];
3188                                 dns_name_init(&name, NULL);
3189                                 dns_rbt_namefromnode(level_node, &name);
3190                                 result = dns_name_concatenate(wname,
3191                                                               &name,
3192                                                               wname,
3193                                                               NULL);
3194                         }
3195                         if (result != ISC_R_SUCCESS)
3196                                 break;
3197
3198                         wnode = NULL;
3199                         dns_rbtnodechain_init(&wchain, NULL);
3200                         result = dns_rbt_findnode(rbtdb->tree, wname,
3201                                                   NULL, &wnode, &wchain,
3202                                                   DNS_RBTFIND_EMPTYDATA,
3203                                                   NULL, NULL);
3204                         if (result == ISC_R_SUCCESS) {
3205                                 nodelock_t *lock;
3206
3207                                 /*
3208                                  * We have found the wildcard node.  If it
3209                                  * is active in the search's version, we're
3210                                  * done.
3211                                  */
3212                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3213                                 NODE_LOCK(lock, isc_rwlocktype_read);
3214                                 for (header = wnode->data;
3215                                      header != NULL;
3216                                      header = header->next) {
3217                                         if (header->serial <= search->serial &&
3218                                             !IGNORE(header) && EXISTS(header))
3219                                                 break;
3220                                 }
3221                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3222                                 if (header != NULL ||
3223                                     activeempty(search, &wchain, wname)) {
3224                                         if (activeemtpynode(search, qname,
3225                                                             wname)) {
3226                                                 return (ISC_R_NOTFOUND);
3227                                         }
3228                                         /*
3229                                          * The wildcard node is active!
3230                                          *
3231                                          * Note: result is still ISC_R_SUCCESS
3232                                          * so we don't have to set it.
3233                                          */
3234                                         *nodep = wnode;
3235                                         break;
3236                                 }
3237                         } else if (result != ISC_R_NOTFOUND &&
3238                                    result != DNS_R_PARTIALMATCH) {
3239                                 /*
3240                                  * An error has occurred.  Bail out.
3241                                  */
3242                                 break;
3243                         }
3244                 }
3245
3246                 if (active) {
3247                         /*
3248                          * The level node is active.  Any wildcarding
3249                          * present at higher levels has no
3250                          * effect and we're done.
3251                          */
3252                         result = ISC_R_NOTFOUND;
3253                         break;
3254                 }
3255
3256                 if (i > 0) {
3257                         i--;
3258                         node = search->chain.levels[i];
3259                 } else
3260                         done = ISC_TRUE;
3261         } while (!done);
3262
3263         return (result);
3264 }
3265
3266 static isc_boolean_t
3267 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3268 {
3269         dns_rdata_t rdata = DNS_RDATA_INIT;
3270         dns_rdata_nsec3_t nsec3;
3271         unsigned char *raw;                     /* RDATASLAB */
3272         unsigned int rdlen, count;
3273         isc_region_t region;
3274         isc_result_t result;
3275
3276         REQUIRE(header->type == dns_rdatatype_nsec3);
3277
3278         raw = (unsigned char *)header + sizeof(*header);
3279         count = raw[0] * 256 + raw[1]; /* count */
3280 #if DNS_RDATASET_FIXED
3281         raw += count * 4 + 2;
3282 #else
3283         raw += 2;
3284 #endif
3285         while (count-- > 0) {
3286                 rdlen = raw[0] * 256 + raw[1];
3287 #if DNS_RDATASET_FIXED
3288                 raw += 4;
3289 #else
3290                 raw += 2;
3291 #endif
3292                 region.base = raw;
3293                 region.length = rdlen;
3294                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3295                                      dns_rdatatype_nsec3, &region);
3296                 raw += rdlen;
3297                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3298                 INSIST(result == ISC_R_SUCCESS);
3299                 if (nsec3.hash == search->rbtversion->hash &&
3300                     nsec3.iterations == search->rbtversion->iterations &&
3301                     nsec3.salt_length == search->rbtversion->salt_length &&
3302                     memcmp(nsec3.salt, search->rbtversion->salt,
3303                            nsec3.salt_length) == 0)
3304                         return (ISC_TRUE);
3305                 dns_rdata_reset(&rdata);
3306         }
3307         return (ISC_FALSE);
3308 }
3309
3310 /*
3311  * Find node of the NSEC/NSEC3 record that is 'name'.
3312  */
3313 static inline isc_result_t
3314 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3315                     dns_name_t *name, dns_name_t *origin,
3316                     dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3317                     isc_boolean_t *firstp)
3318 {
3319         dns_fixedname_t ftarget;
3320         dns_name_t *target;
3321         dns_rbtnode_t *nsecnode;
3322         isc_result_t result;
3323
3324         REQUIRE(nodep != NULL && *nodep == NULL);
3325
3326         if (type == dns_rdatatype_nsec3) {
3327                 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3328                 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3329                         return (result);
3330                 result = dns_rbtnodechain_current(&search->chain, name, origin,
3331                                                   nodep);
3332                 return (result);
3333         }
3334
3335         dns_fixedname_init(&ftarget);
3336         target = dns_fixedname_name(&ftarget);
3337
3338         for (;;) {
3339                 if (*firstp) {
3340                         /*
3341                          * Construct the name of the second node to check.
3342                          * It is the first node sought in the NSEC tree.
3343                          */
3344                         *firstp = ISC_FALSE;
3345                         dns_rbtnodechain_init(nsecchain, NULL);
3346                         result = dns_name_concatenate(name, origin,
3347                                                       target, NULL);
3348                         if (result != ISC_R_SUCCESS)
3349                                 return (result);
3350                         nsecnode = NULL;
3351                         result = dns_rbt_findnode(search->rbtdb->nsec,
3352                                                   target, NULL,
3353                                                   &nsecnode, nsecchain,
3354                                                   DNS_RBTFIND_NOOPTIONS,
3355                                                   NULL, NULL);
3356                         if (result == ISC_R_SUCCESS) {
3357                                 /*
3358                                  * Since this was the first loop, finding the
3359                                  * name in the NSEC tree implies that the first
3360                                  * node checked in the main tree had an
3361                                  * unacceptable NSEC record.
3362                                  * Try the previous node in the NSEC tree.
3363                                  */
3364                                 result = dns_rbtnodechain_prev(nsecchain,
3365                                                                name, origin);
3366                                 if (result == DNS_R_NEWORIGIN)
3367                                         result = ISC_R_SUCCESS;
3368                         } else if (result == ISC_R_NOTFOUND ||
3369                                    result == DNS_R_PARTIALMATCH) {
3370                                 result = dns_rbtnodechain_current(nsecchain,
3371                                                         name, origin, NULL);
3372                                 if (result == ISC_R_NOTFOUND)
3373                                         result = ISC_R_NOMORE;
3374                         }
3375                 } else {
3376                         /*
3377                          * This is a second or later trip through the auxiliary
3378                          * tree for the name of a third or earlier NSEC node in
3379                          * the main tree.  Previous trips through the NSEC tree
3380                          * must have found nodes in the main tree with NSEC
3381                          * records.  Perhaps they lacked signature records.
3382                          */
3383                         result = dns_rbtnodechain_prev(nsecchain, name, origin);
3384                         if (result == DNS_R_NEWORIGIN)
3385                                 result = ISC_R_SUCCESS;
3386                 }
3387                 if (result != ISC_R_SUCCESS)
3388                         return (result);
3389
3390                 /*
3391                  * Construct the name to seek in the main tree.
3392                  */
3393                 result = dns_name_concatenate(name, origin, target, NULL);
3394                 if (result != ISC_R_SUCCESS)
3395                         return (result);
3396
3397                 *nodep = NULL;
3398                 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3399                                           nodep, &search->chain,
3400                                           DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3401                 if (result == ISC_R_SUCCESS)
3402                         return (result);
3403
3404                 /*
3405                  * There should always be a node in the main tree with the
3406                  * same name as the node in the auxiliary NSEC tree, except for
3407                  * nodes in the auxiliary tree that are awaiting deletion.
3408                  */
3409                 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
3410                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3411                                       DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3412                                       "previous_closest_nsec(): %s",
3413                                       isc_result_totext(result));
3414                         return (DNS_R_BADDB);
3415                 }
3416         }
3417 }
3418
3419 /*
3420  * Find the NSEC/NSEC3 which is or before the current point on the
3421  * search chain.  For NSEC3 records only NSEC3 records that match the
3422  * current NSEC3PARAM record are considered.
3423  */
3424 static inline isc_result_t
3425 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3426                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3427                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3428                   dns_db_secure_t secure)
3429 {
3430         dns_rbtnode_t *node, *prevnode;
3431         rdatasetheader_t *header, *header_next, *found, *foundsig;
3432         dns_rbtnodechain_t nsecchain;
3433         isc_boolean_t empty_node;
3434         isc_result_t result;
3435         dns_fixedname_t fname, forigin;
3436         dns_name_t *name, *origin;
3437         dns_rdatatype_t type;
3438         rbtdb_rdatatype_t sigtype;
3439         isc_boolean_t wraps;
3440         isc_boolean_t first = ISC_TRUE;
3441         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3442
3443         if (tree == search->rbtdb->nsec3) {
3444                 type = dns_rdatatype_nsec3;
3445                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3446                 wraps = ISC_TRUE;
3447         } else {
3448                 type = dns_rdatatype_nsec;
3449                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3450                 wraps = ISC_FALSE;
3451         }
3452
3453         /*
3454          * Use the auxiliary tree only starting with the second node in the
3455          * hope that the original node will be right much of the time.
3456          */
3457         dns_fixedname_init(&fname);
3458         name = dns_fixedname_name(&fname);
3459         dns_fixedname_init(&forigin);
3460         origin = dns_fixedname_name(&forigin);
3461  again:
3462         node = NULL;
3463         prevnode = NULL;
3464         result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3465         if (result != ISC_R_SUCCESS)
3466                 return (result);
3467         do {
3468                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3469                           isc_rwlocktype_read);
3470                 found = NULL;
3471                 foundsig = NULL;
3472                 empty_node = ISC_TRUE;
3473                 for (header = node->data;
3474                      header != NULL;
3475                      header = header_next) {
3476                         header_next = header->next;
3477                         /*
3478                          * Look for an active, extant NSEC or RRSIG NSEC.
3479                          */
3480                         do {
3481                                 if (header->serial <= search->serial &&
3482                                     !IGNORE(header)) {
3483                                         /*
3484                                          * Is this a "this rdataset doesn't
3485                                          * exist" record?
3486                                          */
3487                                         if (NONEXISTENT(header))
3488                                                 header = NULL;
3489                                         break;
3490                                 } else
3491                                         header = header->down;
3492                         } while (header != NULL);
3493                         if (header != NULL) {
3494                                 /*
3495                                  * We now know that there is at least one
3496                                  * active rdataset at this node.
3497                                  */
3498                                 empty_node = ISC_FALSE;
3499                                 if (header->type == type) {
3500                                         found = header;
3501                                         if (foundsig != NULL)
3502                                                 break;
3503                                 } else if (header->type == sigtype) {
3504                                         foundsig = header;
3505                                         if (found != NULL)
3506                                                 break;
3507                                 }
3508                         }
3509                 }
3510                 if (!empty_node) {
3511                         if (found != NULL && search->rbtversion->havensec3 &&
3512                             found->type == dns_rdatatype_nsec3 &&
3513                             !matchparams(found, search)) {
3514                                 empty_node = ISC_TRUE;
3515                                 found = NULL;
3516                                 foundsig = NULL;
3517                                 result = previous_closest_nsec(type, search,
3518                                                                name, origin,
3519                                                                &prevnode, NULL,
3520                                                                NULL);
3521                         } else if (found != NULL &&
3522                                    (foundsig != NULL || !need_sig)) {
3523                                 /*
3524                                  * We've found the right NSEC/NSEC3 record.
3525                                  *
3526                                  * Note: for this to really be the right
3527                                  * NSEC record, it's essential that the NSEC
3528                                  * records of any nodes obscured by a zone
3529                                  * cut have been removed; we assume this is
3530                                  * the case.
3531                                  */
3532                                 result = dns_name_concatenate(name, origin,
3533                                                               foundname, NULL);
3534                                 if (result == ISC_R_SUCCESS) {
3535                                         if (nodep != NULL) {
3536                                                 new_reference(search->rbtdb,
3537                                                               node);
3538                                                 *nodep = node;
3539                                         }
3540                                         bind_rdataset(search->rbtdb, node,
3541                                                       found, search->now,
3542                                                       rdataset);
3543                                         if (foundsig != NULL)
3544                                                 bind_rdataset(search->rbtdb,
3545                                                               node,
3546                                                               foundsig,
3547                                                               search->now,
3548                                                               sigrdataset);
3549                                 }
3550                         } else if (found == NULL && foundsig == NULL) {
3551                                 /*
3552                                  * This node is active, but has no NSEC or
3553                                  * RRSIG NSEC.  That means it's glue or
3554                                  * other obscured zone data that isn't
3555                                  * relevant for our search.  Treat the
3556                                  * node as if it were empty and keep looking.
3557                                  */
3558                                 empty_node = ISC_TRUE;
3559                                 result = previous_closest_nsec(type, search,
3560                                                                name, origin,
3561                                                                &prevnode,
3562                                                                &nsecchain,
3563                                                                &first);
3564                         } else {
3565                                 /*
3566                                  * We found an active node, but either the
3567                                  * NSEC or the RRSIG NSEC is missing.  This
3568                                  * shouldn't happen.
3569                                  */
3570                                 result = DNS_R_BADDB;
3571                         }
3572                 } else {
3573                         /*
3574                          * This node isn't active.  We've got to keep
3575                          * looking.
3576                          */
3577                         result = previous_closest_nsec(type, search,
3578                                                        name, origin, &prevnode,
3579                                                        &nsecchain, &first);
3580                 }
3581                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3582                             isc_rwlocktype_read);
3583                 node = prevnode;
3584                 prevnode = NULL;
3585         } while (empty_node && result == ISC_R_SUCCESS);
3586
3587         if (!first)
3588                 dns_rbtnodechain_invalidate(&nsecchain);
3589
3590         if (result == ISC_R_NOMORE && wraps) {
3591                 result = dns_rbtnodechain_last(&search->chain, tree,
3592                                                NULL, NULL);
3593                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3594                         wraps = ISC_FALSE;
3595                         goto again;
3596                 }
3597         }
3598
3599         /*
3600          * If the result is ISC_R_NOMORE, then we got to the beginning of
3601          * the database and didn't find a NSEC record.  This shouldn't
3602          * happen.
3603          */
3604         if (result == ISC_R_NOMORE)
3605                 result = DNS_R_BADDB;
3606
3607         return (result);
3608 }
3609
3610 static isc_result_t
3611 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3612           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3613           dns_dbnode_t **nodep, dns_name_t *foundname,
3614           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3615 {
3616         dns_rbtnode_t *node = NULL;
3617         isc_result_t result;
3618         rbtdb_search_t search;
3619         isc_boolean_t cname_ok = ISC_TRUE;
3620         isc_boolean_t close_version = ISC_FALSE;
3621         isc_boolean_t maybe_zonecut = ISC_FALSE;
3622         isc_boolean_t at_zonecut = ISC_FALSE;
3623         isc_boolean_t wild;
3624         isc_boolean_t empty_node;
3625         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3626         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3627         rbtdb_rdatatype_t sigtype;
3628         isc_boolean_t active;
3629         dns_rbtnodechain_t chain;
3630         nodelock_t *lock;
3631         dns_rbt_t *tree;
3632
3633         search.rbtdb = (dns_rbtdb_t *)db;
3634
3635         REQUIRE(VALID_RBTDB(search.rbtdb));
3636         INSIST(version == NULL ||
3637                ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
3638
3639         /*
3640          * We don't care about 'now'.
3641          */
3642         UNUSED(now);
3643
3644         /*
3645          * If the caller didn't supply a version, attach to the current
3646          * version.
3647          */
3648         if (version == NULL) {
3649                 currentversion(db, &version);
3650                 close_version = ISC_TRUE;
3651         }
3652
3653         search.rbtversion = version;
3654         search.serial = search.rbtversion->serial;
3655         search.options = options;
3656         search.copy_name = ISC_FALSE;
3657         search.need_cleanup = ISC_FALSE;
3658         search.wild = ISC_FALSE;
3659         search.zonecut = NULL;
3660         dns_fixedname_init(&search.zonecut_name);
3661         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3662         search.now = 0;
3663
3664         /*
3665          * 'wild' will be true iff. we've matched a wildcard.
3666          */
3667         wild = ISC_FALSE;
3668
3669         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3670
3671         /*
3672          * Search down from the root of the tree.  If, while going down, we
3673          * encounter a callback node, zone_zonecut_callback() will search the
3674          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3675          */
3676         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3677                                                          search.rbtdb->tree;
3678         result = dns_rbt_findnode(tree, name, foundname, &node,
3679                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3680                                   zone_zonecut_callback, &search);
3681
3682         if (result == DNS_R_PARTIALMATCH) {
3683         partial_match:
3684                 if (search.zonecut != NULL) {
3685                     result = setup_delegation(&search, nodep, foundname,
3686                                               rdataset, sigrdataset);
3687                     goto tree_exit;
3688                 }
3689
3690                 if (search.wild) {
3691                         /*
3692                          * At least one of the levels in the search chain
3693                          * potentially has a wildcard.  For each such level,
3694                          * we must see if there's a matching wildcard active
3695                          * in the current version.
3696                          */
3697                         result = find_wildcard(&search, &node, name);
3698                         if (result == ISC_R_SUCCESS) {
3699                                 result = dns_name_copy(name, foundname, NULL);
3700                                 if (result != ISC_R_SUCCESS)
3701                                         goto tree_exit;
3702                                 wild = ISC_TRUE;
3703                                 goto found;
3704                         }
3705                         else if (result != ISC_R_NOTFOUND)
3706                                 goto tree_exit;
3707                 }
3708
3709                 chain = search.chain;
3710                 active = activeempty(&search, &chain, name);
3711
3712                 /*
3713                  * If we're here, then the name does not exist, is not
3714                  * beneath a zonecut, and there's no matching wildcard.
3715                  */
3716                 if ((search.rbtversion->secure == dns_db_secure &&
3717                      !search.rbtversion->havensec3) ||
3718                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3719                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3720                 {
3721                         result = find_closest_nsec(&search, nodep, foundname,
3722                                                    rdataset, sigrdataset, tree,
3723                                                    search.rbtversion->secure);
3724                         if (result == ISC_R_SUCCESS)
3725                                 result = active ? DNS_R_EMPTYNAME :
3726                                                   DNS_R_NXDOMAIN;
3727                 } else
3728                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3729                 goto tree_exit;
3730         } else if (result != ISC_R_SUCCESS)
3731                 goto tree_exit;
3732
3733  found:
3734         /*
3735          * We have found a node whose name is the desired name, or we
3736          * have matched a wildcard.
3737          */
3738
3739         if (search.zonecut != NULL) {
3740                 /*
3741                  * If we're beneath a zone cut, we don't want to look for
3742                  * CNAMEs because they're not legitimate zone glue.
3743                  */
3744                 cname_ok = ISC_FALSE;
3745         } else {
3746                 /*
3747                  * The node may be a zone cut itself.  If it might be one,
3748                  * make sure we check for it later.
3749                  *
3750                  * DS records live above the zone cut in ordinary zone so
3751                  * we want to ignore any referral.
3752                  *
3753                  * Stub zones don't have anything "above" the delgation so
3754                  * we always return a referral.
3755                  */
3756                 if (node->find_callback &&
3757                     ((node != search.rbtdb->origin_node &&
3758                       !dns_rdatatype_atparent(type)) ||
3759                      IS_STUB(search.rbtdb)))
3760                         maybe_zonecut = ISC_TRUE;
3761         }
3762
3763         /*
3764          * Certain DNSSEC types are not subject to CNAME matching
3765          * (RFC4035, section 2.5 and RFC3007).
3766          *
3767          * We don't check for RRSIG, because we don't store RRSIG records
3768          * directly.
3769          */
3770         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3771                 cname_ok = ISC_FALSE;
3772
3773         /*
3774          * We now go looking for rdata...
3775          */
3776
3777         lock = &search.rbtdb->node_locks[node->locknum].lock;
3778         NODE_LOCK(lock, isc_rwlocktype_read);
3779
3780         found = NULL;
3781         foundsig = NULL;
3782         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3783         nsecheader = NULL;
3784         nsecsig = NULL;
3785         cnamesig = NULL;
3786         empty_node = ISC_TRUE;
3787         for (header = node->data; header != NULL; header = header_next) {
3788                 header_next = header->next;
3789                 /*
3790                  * Look for an active, extant rdataset.
3791                  */
3792                 do {
3793                         if (header->serial <= search.serial &&
3794                             !IGNORE(header)) {
3795                                 /*
3796                                  * Is this a "this rdataset doesn't
3797                                  * exist" record?
3798                                  */
3799                                 if (NONEXISTENT(header))
3800                                         header = NULL;
3801                                 break;
3802                         } else
3803                                 header = header->down;
3804                 } while (header != NULL);
3805                 if (header != NULL) {
3806                         /*
3807                          * We now know that there is at least one active
3808                          * rdataset at this node.
3809                          */
3810                         empty_node = ISC_FALSE;
3811
3812                         /*
3813                          * Do special zone cut handling, if requested.
3814                          */
3815                         if (maybe_zonecut &&
3816                             header->type == dns_rdatatype_ns) {
3817                                 /*
3818                                  * We increment the reference count on node to
3819                                  * ensure that search->zonecut_rdataset will
3820                                  * still be valid later.
3821                                  */
3822                                 new_reference(search.rbtdb, node);
3823                                 search.zonecut = node;
3824                                 search.zonecut_rdataset = header;
3825                                 search.zonecut_sigrdataset = NULL;
3826                                 search.need_cleanup = ISC_TRUE;
3827                                 maybe_zonecut = ISC_FALSE;
3828                                 at_zonecut = ISC_TRUE;
3829                                 /*
3830                                  * It is not clear if KEY should still be
3831                                  * allowed at the parent side of the zone
3832                                  * cut or not.  It is needed for RFC3007
3833                                  * validated updates.
3834                                  */
3835                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3836                                     && type != dns_rdatatype_nsec
3837                                     && type != dns_rdatatype_key) {
3838                                         /*
3839                                          * Glue is not OK, but any answer we
3840                                          * could return would be glue.  Return
3841                                          * the delegation.
3842                                          */
3843                                         found = NULL;
3844                                         break;
3845                                 }
3846                                 if (found != NULL && foundsig != NULL)
3847                                         break;
3848                         }
3849
3850
3851                         /*
3852                          * If the NSEC3 record doesn't match the chain
3853                          * we are using behave as if it isn't here.
3854                          */
3855                         if (header->type == dns_rdatatype_nsec3 &&
3856                            !matchparams(header, &search)) {
3857                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3858                                 goto partial_match;
3859                         }
3860                         /*
3861                          * If we found a type we were looking for,
3862                          * remember it.
3863                          */
3864                         if (header->type == type ||
3865                             type == dns_rdatatype_any ||
3866                             (header->type == dns_rdatatype_cname &&
3867                              cname_ok)) {
3868                                 /*
3869                                  * We've found the answer!
3870                                  */
3871                                 found = header;
3872                                 if (header->type == dns_rdatatype_cname &&
3873                                     cname_ok) {
3874                                         /*
3875                                          * We may be finding a CNAME instead
3876                                          * of the desired type.
3877                                          *
3878                                          * If we've already got the CNAME RRSIG,
3879                                          * use it, otherwise change sigtype
3880                                          * so that we find it.
3881                                          */
3882                                         if (cnamesig != NULL)
3883                                                 foundsig = cnamesig;
3884                                         else
3885                                                 sigtype =
3886                                                     RBTDB_RDATATYPE_SIGCNAME;
3887                                 }
3888                                 /*
3889                                  * If we've got all we need, end the search.
3890                                  */
3891                                 if (!maybe_zonecut && foundsig != NULL)
3892                                         break;
3893                         } else if (header->type == sigtype) {
3894                                 /*
3895                                  * We've found the RRSIG rdataset for our
3896                                  * target type.  Remember it.
3897                                  */
3898                                 foundsig = header;
3899                                 /*
3900                                  * If we've got all we need, end the search.
3901                                  */
3902                                 if (!maybe_zonecut && found != NULL)
3903                                         break;
3904                         } else if (header->type == dns_rdatatype_nsec &&
3905                                    !search.rbtversion->havensec3) {
3906                                 /*
3907                                  * Remember a NSEC rdataset even if we're
3908                                  * not specifically looking for it, because
3909                                  * we might need it later.
3910                                  */
3911                                 nsecheader = header;
3912                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3913                                    !search.rbtversion->havensec3) {
3914                                 /*
3915                                  * If we need the NSEC rdataset, we'll also
3916                                  * need its signature.
3917                                  */
3918                                 nsecsig = header;
3919                         } else if (cname_ok &&
3920                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3921                                 /*
3922                                  * If we get a CNAME match, we'll also need
3923                                  * its signature.
3924                                  */
3925                                 cnamesig = header;
3926                         }
3927                 }
3928         }
3929
3930         if (empty_node) {
3931                 /*
3932                  * We have an exact match for the name, but there are no
3933                  * active rdatasets in the desired version.  That means that
3934                  * this node doesn't exist in the desired version, and that
3935                  * we really have a partial match.
3936                  */
3937                 if (!wild) {
3938                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3939                         goto partial_match;
3940                 }
3941         }
3942
3943         /*
3944          * If we didn't find what we were looking for...
3945          */
3946         if (found == NULL) {
3947                 if (search.zonecut != NULL) {
3948                         /*
3949                          * We were trying to find glue at a node beneath a
3950                          * zone cut, but didn't.
3951                          *
3952                          * Return the delegation.
3953                          */
3954                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3955                         result = setup_delegation(&search, nodep, foundname,
3956                                                   rdataset, sigrdataset);
3957                         goto tree_exit;
3958                 }
3959                 /*
3960                  * The desired type doesn't exist.
3961                  */
3962                 result = DNS_R_NXRRSET;
3963                 if (search.rbtversion->secure == dns_db_secure &&
3964                     !search.rbtversion->havensec3 &&
3965                     (nsecheader == NULL || nsecsig == NULL)) {
3966                         /*
3967                          * The zone is secure but there's no NSEC,
3968                          * or the NSEC has no signature!
3969                          */
3970                         if (!wild) {
3971                                 result = DNS_R_BADDB;
3972                                 goto node_exit;
3973                         }
3974
3975                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3976                         result = find_closest_nsec(&search, nodep, foundname,
3977                                                    rdataset, sigrdataset,
3978                                                    search.rbtdb->tree,
3979                                                    search.rbtversion->secure);
3980                         if (result == ISC_R_SUCCESS)
3981                                 result = DNS_R_EMPTYWILD;
3982                         goto tree_exit;
3983                 }
3984                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3985                     nsecheader == NULL)
3986                 {
3987                         /*
3988                          * There's no NSEC record, and we were told
3989                          * to find one.
3990                          */
3991                         result = DNS_R_BADDB;
3992                         goto node_exit;
3993                 }
3994                 if (nodep != NULL) {
3995                         new_reference(search.rbtdb, node);
3996                         *nodep = node;
3997                 }
3998                 if ((search.rbtversion->secure == dns_db_secure &&
3999                      !search.rbtversion->havensec3) ||
4000                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
4001                 {
4002                         bind_rdataset(search.rbtdb, node, nsecheader,
4003                                       0, rdataset);
4004                         if (nsecsig != NULL)
4005                                 bind_rdataset(search.rbtdb, node,
4006                                               nsecsig, 0, sigrdataset);
4007                 }
4008                 if (wild)
4009                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4010                 goto node_exit;
4011         }
4012
4013         /*
4014          * We found what we were looking for, or we found a CNAME.
4015          */
4016
4017         if (type != found->type &&
4018             type != dns_rdatatype_any &&
4019             found->type == dns_rdatatype_cname) {
4020                 /*
4021                  * We weren't doing an ANY query and we found a CNAME instead
4022                  * of the type we were looking for, so we need to indicate
4023                  * that result to the caller.
4024                  */
4025                 result = DNS_R_CNAME;
4026         } else if (search.zonecut != NULL) {
4027                 /*
4028                  * If we're beneath a zone cut, we must indicate that the
4029                  * result is glue, unless we're actually at the zone cut
4030                  * and the type is NSEC or KEY.
4031                  */
4032                 if (search.zonecut == node) {
4033                         /*
4034                          * It is not clear if KEY should still be
4035                          * allowed at the parent side of the zone
4036                          * cut or not.  It is needed for RFC3007
4037                          * validated updates.
4038                          */
4039                         if (type == dns_rdatatype_nsec ||
4040                             type == dns_rdatatype_nsec3 ||
4041                             type == dns_rdatatype_key)
4042                                 result = ISC_R_SUCCESS;
4043                         else if (type == dns_rdatatype_any)
4044                                 result = DNS_R_ZONECUT;
4045                         else
4046                                 result = DNS_R_GLUE;
4047                 } else
4048                         result = DNS_R_GLUE;
4049                 /*
4050                  * We might have found data that isn't glue, but was occluded
4051                  * by a dynamic update.  If the caller cares about this, they
4052                  * will have told us to validate glue.
4053                  *
4054                  * XXX We should cache the glue validity state!
4055                  */
4056                 if (result == DNS_R_GLUE &&
4057                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4058                     !valid_glue(&search, foundname, type, node)) {
4059                         NODE_UNLOCK(lock, isc_rwlocktype_read);
4060                         result = setup_delegation(&search, nodep, foundname,
4061                                                   rdataset, sigrdataset);
4062                     goto tree_exit;
4063                 }
4064         } else {
4065                 /*
4066                  * An ordinary successful query!
4067                  */
4068                 result = ISC_R_SUCCESS;
4069         }
4070
4071         if (nodep != NULL) {
4072                 if (!at_zonecut)
4073                         new_reference(search.rbtdb, node);
4074                 else
4075                         search.need_cleanup = ISC_FALSE;
4076                 *nodep = node;
4077         }
4078
4079         if (type != dns_rdatatype_any) {
4080                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4081                 if (foundsig != NULL)
4082                         bind_rdataset(search.rbtdb, node, foundsig, 0,
4083                                       sigrdataset);
4084         }
4085
4086         if (wild)
4087                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4088
4089  node_exit:
4090         NODE_UNLOCK(lock, isc_rwlocktype_read);
4091
4092  tree_exit:
4093         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4094
4095         /*
4096          * If we found a zonecut but aren't going to use it, we have to
4097          * let go of it.
4098          */
4099         if (search.need_cleanup) {
4100                 node = search.zonecut;
4101                 INSIST(node != NULL);
4102                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4103
4104                 NODE_LOCK(lock, isc_rwlocktype_read);
4105                 decrement_reference(search.rbtdb, node, 0,
4106                                     isc_rwlocktype_read, isc_rwlocktype_none,
4107                                     ISC_FALSE);
4108                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4109         }
4110
4111         if (close_version)
4112                 closeversion(db, &version, ISC_FALSE);
4113
4114         dns_rbtnodechain_reset(&search.chain);
4115
4116         return (result);
4117 }
4118
4119 static isc_result_t
4120 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4121                  isc_stdtime_t now, dns_dbnode_t **nodep,
4122                  dns_name_t *foundname,
4123                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4124 {
4125         UNUSED(db);
4126         UNUSED(name);
4127         UNUSED(options);
4128         UNUSED(now);
4129         UNUSED(nodep);
4130         UNUSED(foundname);
4131         UNUSED(rdataset);
4132         UNUSED(sigrdataset);
4133
4134         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4135
4136         /* NOTREACHED */
4137         return (ISC_R_NOTIMPLEMENTED);
4138 }
4139
4140 static isc_result_t
4141 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4142         rbtdb_search_t *search = arg;
4143         rdatasetheader_t *header, *header_prev, *header_next;
4144         rdatasetheader_t *dname_header, *sigdname_header;
4145         isc_result_t result;
4146         nodelock_t *lock;
4147         isc_rwlocktype_t locktype;
4148
4149         /* XXX comment */
4150
4151         REQUIRE(search->zonecut == NULL);
4152
4153         /*
4154          * Keep compiler silent.
4155          */
4156         UNUSED(name);
4157
4158         lock = &(search->rbtdb->node_locks[node->locknum].lock);
4159         locktype = isc_rwlocktype_read;
4160         NODE_LOCK(lock, locktype);
4161
4162         /*
4163          * Look for a DNAME or RRSIG DNAME rdataset.
4164          */
4165         dname_header = NULL;
4166         sigdname_header = NULL;
4167         header_prev = NULL;
4168         for (header = node->data; header != NULL; header = header_next) {
4169                 header_next = header->next;
4170                 if (header->rdh_ttl <= search->now) {
4171                         /*
4172                          * This rdataset is stale.  If no one else is
4173                          * using the node, we can clean it up right
4174                          * now, otherwise we mark it as stale, and
4175                          * the node as dirty, so it will get cleaned
4176                          * up later.
4177                          */
4178                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4179                             (locktype == isc_rwlocktype_write ||
4180                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4181                                 /*
4182                                  * We update the node's status only when we
4183                                  * can get write access; otherwise, we leave
4184                                  * others to this work.  Periodical cleaning
4185                                  * will eventually take the job as the last
4186                                  * resort.
4187                                  * We won't downgrade the lock, since other
4188                                  * rdatasets are probably stale, too.
4189                                  */
4190                                 locktype = isc_rwlocktype_write;
4191
4192                                 if (dns_rbtnode_refcurrent(node) == 0) {
4193                                         isc_mem_t *mctx;
4194
4195                                         /*
4196                                          * header->down can be non-NULL if the
4197                                          * refcount has just decremented to 0
4198                                          * but decrement_reference() has not
4199                                          * performed clean_cache_node(), in
4200                                          * which case we need to purge the
4201                                          * stale headers first.
4202                                          */
4203                                         mctx = search->rbtdb->common.mctx;
4204                                         clean_stale_headers(search->rbtdb,
4205                                                             mctx,
4206                                                             header);
4207                                         if (header_prev != NULL)
4208                                                 header_prev->next =
4209                                                         header->next;
4210                                         else
4211                                                 node->data = header->next;
4212                                         free_rdataset(search->rbtdb, mctx,
4213                                                       header);
4214                                 } else {
4215                                         header->attributes |=
4216                                                 RDATASET_ATTR_STALE;
4217                                         node->dirty = 1;
4218                                         header_prev = header;
4219                                 }
4220                         } else
4221                                 header_prev = header;
4222                 } else if (header->type == dns_rdatatype_dname &&
4223                            EXISTS(header)) {
4224                         dname_header = header;
4225                         header_prev = header;
4226                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4227                          EXISTS(header)) {
4228                         sigdname_header = header;
4229                         header_prev = header;
4230                 } else
4231                         header_prev = header;
4232         }
4233
4234         if (dname_header != NULL &&
4235             (!DNS_TRUST_PENDING(dname_header->trust) ||
4236              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4237                 /*
4238                  * We increment the reference count on node to ensure that
4239                  * search->zonecut_rdataset will still be valid later.
4240                  */
4241                 new_reference(search->rbtdb, node);
4242                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4243                 search->zonecut = node;
4244                 search->zonecut_rdataset = dname_header;
4245                 search->zonecut_sigrdataset = sigdname_header;
4246                 search->need_cleanup = ISC_TRUE;
4247                 result = DNS_R_PARTIALMATCH;
4248         } else
4249                 result = DNS_R_CONTINUE;
4250
4251         NODE_UNLOCK(lock, locktype);
4252
4253         return (result);
4254 }
4255
4256 static inline isc_result_t
4257 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4258                      dns_dbnode_t **nodep, dns_name_t *foundname,
4259                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4260 {
4261         unsigned int i;
4262         dns_rbtnode_t *level_node;
4263         rdatasetheader_t *header, *header_prev, *header_next;
4264         rdatasetheader_t *found, *foundsig;
4265         isc_result_t result = ISC_R_NOTFOUND;
4266         dns_name_t name;
4267         dns_rbtdb_t *rbtdb;
4268         isc_boolean_t done;
4269         nodelock_t *lock;
4270         isc_rwlocktype_t locktype;
4271
4272         /*
4273          * Caller must be holding the tree lock.
4274          */
4275
4276         rbtdb = search->rbtdb;
4277         i = search->chain.level_matches;
4278         done = ISC_FALSE;
4279         do {
4280                 locktype = isc_rwlocktype_read;
4281                 lock = &rbtdb->node_locks[node->locknum].lock;
4282                 NODE_LOCK(lock, locktype);
4283
4284                 /*
4285                  * Look for NS and RRSIG NS rdatasets.
4286                  */
4287                 found = NULL;
4288                 foundsig = NULL;
4289                 header_prev = NULL;
4290                 for (header = node->data;
4291                      header != NULL;
4292                      header = header_next) {
4293                         header_next = header->next;
4294                         if (header->rdh_ttl <= search->now) {
4295                                 /*
4296                                  * This rdataset is stale.  If no one else is
4297                                  * using the node, we can clean it up right
4298                                  * now, otherwise we mark it as stale, and
4299                                  * the node as dirty, so it will get cleaned
4300                                  * up later.
4301                                  */
4302                                 if ((header->rdh_ttl <= search->now -
4303                                                     RBTDB_VIRTUAL) &&
4304                                     (locktype == isc_rwlocktype_write ||
4305                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4306                                         /*
4307                                          * We update the node's status only
4308                                          * when we can get write access.
4309                                          */
4310                                         locktype = isc_rwlocktype_write;
4311
4312                                         if (dns_rbtnode_refcurrent(node)
4313                                             == 0) {
4314                                                 isc_mem_t *m;
4315
4316                                                 m = search->rbtdb->common.mctx;
4317                                                 clean_stale_headers(
4318                                                         search->rbtdb,
4319                                                         m, header);
4320                                                 if (header_prev != NULL)
4321                                                         header_prev->next =
4322                                                                 header->next;
4323                                                 else
4324                                                         node->data =
4325                                                                 header->next;
4326                                                 free_rdataset(rbtdb, m,
4327                                                               header);
4328                                         } else {
4329                                                 header->attributes |=
4330                                                         RDATASET_ATTR_STALE;
4331                                                 node->dirty = 1;
4332                                                 header_prev = header;
4333                                         }
4334                                 } else
4335                                         header_prev = header;
4336                         } else if (EXISTS(header)) {
4337                                 /*
4338                                  * We've found an extant rdataset.  See if
4339                                  * we're interested in it.
4340                                  */
4341                                 if (header->type == dns_rdatatype_ns) {
4342                                         found = header;
4343                                         if (foundsig != NULL)
4344                                                 break;
4345                                 } else if (header->type ==
4346                                            RBTDB_RDATATYPE_SIGNS) {
4347                                         foundsig = header;
4348                                         if (found != NULL)
4349                                                 break;
4350                                 }
4351                                 header_prev = header;
4352                         } else
4353                                 header_prev = header;
4354                 }
4355
4356                 if (found != NULL) {
4357                         /*
4358                          * If we have to set foundname, we do it before
4359                          * anything else.  If we were to set foundname after
4360                          * we had set nodep or bound the rdataset, then we'd
4361                          * have to undo that work if dns_name_concatenate()
4362                          * failed.  By setting foundname first, there's
4363                          * nothing to undo if we have trouble.
4364                          */
4365                         if (foundname != NULL) {
4366                                 dns_name_init(&name, NULL);
4367                                 dns_rbt_namefromnode(node, &name);
4368                                 result = dns_name_copy(&name, foundname, NULL);
4369                                 while (result == ISC_R_SUCCESS && i > 0) {
4370                                         i--;
4371                                         level_node = search->chain.levels[i];
4372                                         dns_name_init(&name, NULL);
4373                                         dns_rbt_namefromnode(level_node,
4374                                                              &name);
4375                                         result =
4376                                                 dns_name_concatenate(foundname,
4377                                                                      &name,
4378                                                                      foundname,
4379                                                                      NULL);
4380                                 }
4381                                 if (result != ISC_R_SUCCESS) {
4382                                         *nodep = NULL;
4383                                         goto node_exit;
4384                                 }
4385                         }
4386                         result = DNS_R_DELEGATION;
4387                         if (nodep != NULL) {
4388                                 new_reference(search->rbtdb, node);
4389                                 *nodep = node;
4390                         }
4391                         bind_rdataset(search->rbtdb, node, found, search->now,
4392                                       rdataset);
4393                         if (foundsig != NULL)
4394                                 bind_rdataset(search->rbtdb, node, foundsig,
4395                                               search->now, sigrdataset);
4396                         if (need_headerupdate(found, search->now) ||
4397                             (foundsig != NULL &&
4398                              need_headerupdate(foundsig, search->now))) {
4399                                 if (locktype != isc_rwlocktype_write) {
4400                                         NODE_UNLOCK(lock, locktype);
4401                                         NODE_LOCK(lock, isc_rwlocktype_write);
4402                                         locktype = isc_rwlocktype_write;
4403                                         POST(locktype);
4404                                 }
4405                                 if (need_headerupdate(found, search->now))
4406                                         update_header(search->rbtdb, found,
4407                                                       search->now);
4408                                 if (foundsig != NULL &&
4409                                     need_headerupdate(foundsig, search->now)) {
4410                                         update_header(search->rbtdb, foundsig,
4411                                                       search->now);
4412                                 }
4413                         }
4414                 }
4415
4416         node_exit:
4417                 NODE_UNLOCK(lock, locktype);
4418
4419                 if (found == NULL && i > 0) {
4420                         i--;
4421                         node = search->chain.levels[i];
4422                 } else
4423                         done = ISC_TRUE;
4424
4425         } while (!done);
4426
4427         return (result);
4428 }
4429
4430 static isc_result_t
4431 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4432                   isc_stdtime_t now, dns_name_t *foundname,
4433                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4434 {
4435         dns_rbtnode_t *node;
4436         rdatasetheader_t *header, *header_next, *header_prev;
4437         rdatasetheader_t *found, *foundsig;
4438         isc_boolean_t empty_node;
4439         isc_result_t result;
4440         dns_fixedname_t fname, forigin;
4441         dns_name_t *name, *origin;
4442         rbtdb_rdatatype_t matchtype, sigmatchtype;
4443         nodelock_t *lock;
4444         isc_rwlocktype_t locktype;
4445
4446         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4447         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4448                                              dns_rdatatype_nsec);
4449
4450         do {
4451                 node = NULL;
4452                 dns_fixedname_init(&fname);
4453                 name = dns_fixedname_name(&fname);
4454                 dns_fixedname_init(&forigin);
4455                 origin = dns_fixedname_name(&forigin);
4456                 result = dns_rbtnodechain_current(&search->chain, name,
4457                                                   origin, &node);
4458                 if (result != ISC_R_SUCCESS)
4459                         return (result);
4460                 locktype = isc_rwlocktype_read;
4461                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4462                 NODE_LOCK(lock, locktype);
4463                 found = NULL;
4464                 foundsig = NULL;
4465                 empty_node = ISC_TRUE;
4466                 header_prev = NULL;
4467                 for (header = node->data;
4468                      header != NULL;
4469                      header = header_next) {
4470                         header_next = header->next;
4471                         if (header->rdh_ttl <= now) {
4472                                 /*
4473                                  * This rdataset is stale.  If no one else is
4474                                  * using the node, we can clean it up right
4475                                  * now, otherwise we mark it as stale, and the
4476                                  * node as dirty, so it will get cleaned up
4477                                  * later.
4478                                  */
4479                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4480                                     (locktype == isc_rwlocktype_write ||
4481                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4482                                         /*
4483                                          * We update the node's status only
4484                                          * when we can get write access.
4485                                          */
4486                                         locktype = isc_rwlocktype_write;
4487
4488                                         if (dns_rbtnode_refcurrent(node)
4489                                             == 0) {
4490                                                 isc_mem_t *m;
4491
4492                                                 m = search->rbtdb->common.mctx;
4493                                                 clean_stale_headers(
4494                                                         search->rbtdb,
4495                                                         m, header);
4496                                                 if (header_prev != NULL)
4497                                                         header_prev->next =
4498                                                                 header->next;
4499                                                 else
4500                                                         node->data = header->next;
4501                                                 free_rdataset(search->rbtdb, m,
4502                                                               header);
4503                                         } else {
4504                                                 header->attributes |=
4505                                                         RDATASET_ATTR_STALE;
4506                                                 node->dirty = 1;
4507                                                 header_prev = header;
4508                                         }
4509                                 } else
4510                                         header_prev = header;
4511                                 continue;
4512                         }
4513                         if (NONEXISTENT(header) ||
4514                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4515                                 header_prev = header;
4516                                 continue;
4517                         }
4518                         empty_node = ISC_FALSE;
4519                         if (header->type == matchtype)
4520                                 found = header;
4521                         else if (header->type == sigmatchtype)
4522                                 foundsig = header;
4523                         header_prev = header;
4524                 }
4525                 if (found != NULL) {
4526                         result = dns_name_concatenate(name, origin,
4527                                                       foundname, NULL);
4528                         if (result != ISC_R_SUCCESS)
4529                                 goto unlock_node;
4530                         bind_rdataset(search->rbtdb, node, found,
4531                                       now, rdataset);
4532                         if (foundsig != NULL)
4533                                 bind_rdataset(search->rbtdb, node, foundsig,
4534                                               now, sigrdataset);
4535                         new_reference(search->rbtdb, node);
4536                         *nodep = node;
4537                         result = DNS_R_COVERINGNSEC;
4538                 } else if (!empty_node) {
4539                         result = ISC_R_NOTFOUND;
4540                 } else
4541                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4542                                                        NULL);
4543  unlock_node:
4544                 NODE_UNLOCK(lock, locktype);
4545         } while (empty_node && result == ISC_R_SUCCESS);
4546         return (result);
4547 }
4548
4549 /*
4550  * Mark a database for response policy rewriting.
4551  */
4552 #ifdef BIND9
4553 static void
4554 get_rpz_enabled(dns_db_t *db, dns_rpz_st_t *st)
4555 {
4556         dns_rbtdb_t *rbtdb;
4557
4558         rbtdb = (dns_rbtdb_t *)db;
4559         REQUIRE(VALID_RBTDB(rbtdb));
4560         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4561         dns_rpz_enabled(rbtdb->rpz_cidr, st);
4562         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4563 }
4564
4565 /*
4566  * Search the CDIR block tree of a response policy tree of trees for all of
4567  * the IP addresses in an A or AAAA rdataset.
4568  * Among the policies for all IPv4 and IPv6 addresses for a name, choose
4569  *      the earliest configured policy,
4570  *      QNAME over IP over NSDNAME over NSIP,
4571  *      the longest prefix,
4572  *      the lexically smallest address.
4573  * The caller must have already checked that any existing policy was not
4574  * configured earlier than this policy zone and does not have a higher
4575  * precedence type.
4576  */
4577 static void
4578 rpz_findips(dns_rpz_zone_t *rpz, dns_rpz_type_t rpz_type,
4579             dns_zone_t *zone, dns_db_t *db, dns_dbversion_t *version,
4580             dns_rdataset_t *ardataset, dns_rpz_st_t *st,
4581             dns_name_t *query_qname)
4582 {
4583         dns_rbtdb_t *rbtdb;
4584         struct in_addr ina;
4585         struct in6_addr in6a;
4586         isc_netaddr_t netaddr;
4587         dns_fixedname_t selfnamef, qnamef;
4588         dns_name_t *selfname, *qname;
4589         dns_rbtnode_t *node;
4590         dns_rdataset_t zrdataset;
4591         dns_rpz_cidr_bits_t prefix;
4592         isc_result_t result;
4593         dns_rpz_policy_t rpz_policy;
4594         dns_ttl_t ttl;
4595
4596         rbtdb = (dns_rbtdb_t *)db;
4597         REQUIRE(VALID_RBTDB(rbtdb));
4598         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4599
4600         if (rbtdb->rpz_cidr == NULL) {
4601                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4602                 return;
4603         }
4604
4605         dns_fixedname_init(&selfnamef);
4606         dns_fixedname_init(&qnamef);
4607         selfname = dns_fixedname_name(&selfnamef);
4608         qname = dns_fixedname_name(&qnamef);
4609
4610         for (result = dns_rdataset_first(ardataset);
4611              result == ISC_R_SUCCESS;
4612              result = dns_rdataset_next(ardataset)) {
4613                 dns_rdata_t rdata = DNS_RDATA_INIT;
4614                 dns_rdataset_current(ardataset, &rdata);
4615                 switch (rdata.type) {
4616                 case dns_rdatatype_a:
4617                         INSIST(rdata.length == 4);
4618                         memcpy(&ina.s_addr, rdata.data, 4);
4619                         isc_netaddr_fromin(&netaddr, &ina);
4620                         break;
4621                 case dns_rdatatype_aaaa:
4622                         INSIST(rdata.length == 16);
4623                         memcpy(in6a.s6_addr, rdata.data, 16);
4624                         isc_netaddr_fromin6(&netaddr, &in6a);
4625                         break;
4626                 default:
4627                         continue;
4628                 }
4629
4630                 result = dns_rpz_cidr_find(rbtdb->rpz_cidr, &netaddr, rpz_type,
4631                                            selfname, qname, &prefix);
4632                 if (result != ISC_R_SUCCESS)
4633                         continue;
4634
4635                 /*
4636                  * If we already have a rule, discard this new rule if
4637                  * is not better.
4638                  * The caller has checked that st->m.rpz->num > rpz->num
4639                  * or st->m.rpz->num == rpz->num and st->m.type >= rpz_type
4640                  */
4641                 if (st->m.policy != DNS_RPZ_POLICY_MISS &&
4642                     st->m.rpz->num == rpz->num &&
4643                     (st->m.type < rpz_type ||
4644                      (st->m.type == rpz_type &&
4645                       (st->m.prefix > prefix ||
4646                        (st->m.prefix == prefix &&
4647                         0 > dns_name_rdatacompare(st->qname, qname))))))
4648                         continue;
4649
4650                 /*
4651                  * We have rpz_st an entry with a prefix at least as long as
4652                  * the prefix of the entry we had before.  Find the node
4653                  * corresponding to CDIR tree entry.
4654                  */
4655                 node = NULL;
4656                 result = dns_rbt_findnode(rbtdb->tree, qname, NULL,
4657                                           &node, NULL, 0, NULL, NULL);
4658                 if (result != ISC_R_SUCCESS) {
4659                         char namebuf[DNS_NAME_FORMATSIZE];
4660
4661                         dns_name_format(qname, namebuf, sizeof(namebuf));
4662                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RPZ,
4663                                       DNS_LOGMODULE_RBTDB, DNS_RPZ_ERROR_LEVEL,
4664                                       "rpz_findips findnode(%s) failed: %s",
4665                                       namebuf, isc_result_totext(result));
4666                         continue;
4667                 }
4668                 /*
4669                  * First look for a simple rewrite of the IP address.
4670                  * If that fails, look for a CNAME.  If we cannot find
4671                  * a CNAME or the CNAME is neither of the special forms
4672                  * "*" or ".", treat it like a real CNAME.
4673                  */
4674                 dns_rdataset_init(&zrdataset);
4675                 result = dns_db_findrdataset(db, node, version, ardataset->type,
4676                                              0, 0, &zrdataset, NULL);
4677                 if (result != ISC_R_SUCCESS)
4678                         result = dns_db_findrdataset(db, node, version,
4679                                                      dns_rdatatype_cname,
4680                                                      0, 0, &zrdataset, NULL);
4681                 if (result == ISC_R_SUCCESS) {
4682                         if (zrdataset.type != dns_rdatatype_cname) {
4683                                 rpz_policy = DNS_RPZ_POLICY_RECORD;
4684                         } else {
4685                                 rpz_policy = dns_rpz_decode_cname(rpz,
4686                                                                   &zrdataset,
4687                                                                   selfname);
4688                                 if (rpz_policy == DNS_RPZ_POLICY_RECORD ||
4689                                     rpz_policy == DNS_RPZ_POLICY_WILDCNAME)
4690                                         result = DNS_R_CNAME;
4691                         }
4692                         ttl = zrdataset.ttl;
4693                 } else {
4694                         rpz_policy = DNS_RPZ_POLICY_RECORD;
4695                         result = DNS_R_NXRRSET;
4696                         ttl = DNS_RPZ_TTL_DEFAULT;
4697                 }
4698
4699                 /*
4700                  * Use an overriding action specified in the configuration file
4701                  */
4702                 if (rpz->policy != DNS_RPZ_POLICY_GIVEN) {
4703                         /*
4704                          * only log DNS_RPZ_POLICY_DISABLED hits
4705                          */
4706                         if (rpz->policy == DNS_RPZ_POLICY_DISABLED) {
4707                                 if (isc_log_wouldlog(dns_lctx,
4708                                                      DNS_RPZ_INFO_LEVEL)) {
4709                                         char qname_buf[DNS_NAME_FORMATSIZE];
4710                                         char rpz_qname_buf[DNS_NAME_FORMATSIZE];
4711                                         dns_name_format(query_qname, qname_buf,
4712                                                         sizeof(qname_buf));
4713                                         dns_name_format(qname, rpz_qname_buf,
4714                                                         sizeof(rpz_qname_buf));
4715
4716                                         isc_log_write(dns_lctx,
4717                                                 DNS_LOGCATEGORY_RPZ,
4718                                                 DNS_LOGMODULE_RBTDB,
4719                                                 DNS_RPZ_INFO_LEVEL,
4720                                                 "disabled rpz %s %s rewrite"
4721                                                 " %s via %s",
4722                                                 dns_rpz_type2str(rpz_type),
4723                                                 dns_rpz_policy2str(rpz_policy),
4724                                                 qname_buf, rpz_qname_buf);
4725                                 }
4726                                 continue;
4727                         }
4728
4729                         rpz_policy = rpz->policy;
4730                 }
4731
4732                 if (dns_rdataset_isassociated(st->m.rdataset))
4733                         dns_rdataset_disassociate(st->m.rdataset);
4734                 if (st->m.node != NULL)
4735                         dns_db_detachnode(st->m.db, &st->m.node);
4736                 if (st->m.db != NULL)
4737                         dns_db_detach(&st->m.db);
4738                 if (st->m.zone != NULL)
4739                         dns_zone_detach(&st->m.zone);
4740                 st->m.rpz = rpz;
4741                 st->m.type = rpz_type;
4742                 st->m.prefix = prefix;
4743                 st->m.policy = rpz_policy;
4744                 st->m.ttl = ISC_MIN(ttl, rpz->max_policy_ttl);
4745                 st->m.result = result;
4746                 dns_name_copy(qname, st->qname, NULL);
4747                 if ((rpz_policy == DNS_RPZ_POLICY_RECORD ||
4748                     rpz_policy == DNS_RPZ_POLICY_WILDCNAME) &&
4749                     result != DNS_R_NXRRSET) {
4750                         dns_rdataset_clone(&zrdataset,st->m.rdataset);
4751                         dns_db_attachnode(db, node, &st->m.node);
4752                 }
4753                 dns_db_attach(db, &st->m.db);
4754                 st->m.version = version;
4755                 dns_zone_attach(zone, &st->m.zone);
4756                 if (dns_rdataset_isassociated(&zrdataset))
4757                         dns_rdataset_disassociate(&zrdataset);
4758         }
4759
4760         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
4761 }
4762 #endif
4763
4764 static isc_result_t
4765 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4766            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4767            dns_dbnode_t **nodep, dns_name_t *foundname,
4768            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4769 {
4770         dns_rbtnode_t *node = NULL;
4771         isc_result_t result;
4772         rbtdb_search_t search;
4773         isc_boolean_t cname_ok = ISC_TRUE;
4774         isc_boolean_t empty_node;
4775         nodelock_t *lock;
4776         isc_rwlocktype_t locktype;
4777         rdatasetheader_t *header, *header_prev, *header_next;
4778         rdatasetheader_t *found, *nsheader;
4779         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4780         rdatasetheader_t *update, *updatesig;
4781         rbtdb_rdatatype_t sigtype, negtype;
4782
4783         UNUSED(version);
4784
4785         search.rbtdb = (dns_rbtdb_t *)db;
4786
4787         REQUIRE(VALID_RBTDB(search.rbtdb));
4788         REQUIRE(version == NULL);
4789
4790         if (now == 0)
4791                 isc_stdtime_get(&now);
4792
4793         search.rbtversion = NULL;
4794         search.serial = 1;
4795         search.options = options;
4796         search.copy_name = ISC_FALSE;
4797         search.need_cleanup = ISC_FALSE;
4798         search.wild = ISC_FALSE;
4799         search.zonecut = NULL;
4800         dns_fixedname_init(&search.zonecut_name);
4801         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4802         search.now = now;
4803         update = NULL;
4804         updatesig = NULL;
4805
4806         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4807
4808         /*
4809          * Search down from the root of the tree.  If, while going down, we
4810          * encounter a callback node, cache_zonecut_callback() will search the
4811          * rdatasets at the zone cut for a DNAME rdataset.
4812          */
4813         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4814                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4815                                   cache_zonecut_callback, &search);
4816
4817         if (result == DNS_R_PARTIALMATCH) {
4818                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4819                         result = find_coveringnsec(&search, nodep, now,
4820                                                    foundname, rdataset,
4821                                                    sigrdataset);
4822                         if (result == DNS_R_COVERINGNSEC)
4823                                 goto tree_exit;
4824                 }
4825                 if (search.zonecut != NULL) {
4826                     result = setup_delegation(&search, nodep, foundname,
4827                                               rdataset, sigrdataset);
4828                     goto tree_exit;
4829                 } else {
4830                 find_ns:
4831                         result = find_deepest_zonecut(&search, node, nodep,
4832                                                       foundname, rdataset,
4833                                                       sigrdataset);
4834                         goto tree_exit;
4835                 }
4836         } else if (result != ISC_R_SUCCESS)
4837                 goto tree_exit;
4838
4839         /*
4840          * Certain DNSSEC types are not subject to CNAME matching
4841          * (RFC4035, section 2.5 and RFC3007).
4842          *
4843          * We don't check for RRSIG, because we don't store RRSIG records
4844          * directly.
4845          */
4846         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4847                 cname_ok = ISC_FALSE;
4848
4849         /*
4850          * We now go looking for rdata...
4851          */
4852
4853         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4854         locktype = isc_rwlocktype_read;
4855         NODE_LOCK(lock, locktype);
4856
4857         found = NULL;
4858         foundsig = NULL;
4859         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4860         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4861         nsheader = NULL;
4862         nssig = NULL;
4863         cnamesig = NULL;
4864         empty_node = ISC_TRUE;
4865         header_prev = NULL;
4866         for (header = node->data; header != NULL; header = header_next) {
4867                 header_next = header->next;
4868                 if (header->rdh_ttl <= now) {
4869                         /*
4870                          * This rdataset is stale.  If no one else is using the
4871                          * node, we can clean it up right now, otherwise we
4872                          * mark it as stale, and the node as dirty, so it will
4873                          * get cleaned up later.
4874                          */
4875                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4876                             (locktype == isc_rwlocktype_write ||
4877                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4878                                 /*
4879                                  * We update the node's status only when we
4880                                  * can get write access.
4881                                  */
4882                                 locktype = isc_rwlocktype_write;
4883
4884                                 if (dns_rbtnode_refcurrent(node) == 0) {
4885                                         isc_mem_t *mctx;
4886
4887                                         mctx = search.rbtdb->common.mctx;
4888                                         clean_stale_headers(search.rbtdb, mctx,
4889                                                             header);
4890                                         if (header_prev != NULL)
4891                                                 header_prev->next =
4892                                                         header->next;
4893                                         else
4894                                                 node->data = header->next;
4895                                         free_rdataset(search.rbtdb, mctx,
4896                                                       header);
4897                                 } else {
4898                                         header->attributes |=
4899                                                 RDATASET_ATTR_STALE;
4900                                         node->dirty = 1;
4901                                         header_prev = header;
4902                                 }
4903                         } else
4904                                 header_prev = header;
4905                 } else if (EXISTS(header)) {
4906                         /*
4907                          * We now know that there is at least one active
4908                          * non-stale rdataset at this node.
4909                          */
4910                         empty_node = ISC_FALSE;
4911
4912                         /*
4913                          * If we found a type we were looking for, remember
4914                          * it.
4915                          */
4916                         if (header->type == type ||
4917                             (type == dns_rdatatype_any &&
4918                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4919                             (cname_ok && header->type ==
4920                              dns_rdatatype_cname)) {
4921                                 /*
4922                                  * We've found the answer.
4923                                  */
4924                                 found = header;
4925                                 if (header->type == dns_rdatatype_cname &&
4926                                     cname_ok &&
4927                                     cnamesig != NULL) {
4928                                         /*
4929                                          * If we've already got the
4930                                          * CNAME RRSIG, use it.
4931                                          */
4932                                         foundsig = cnamesig;
4933                                 }
4934                         } else if (header->type == sigtype) {
4935                                 /*
4936                                  * We've found the RRSIG rdataset for our
4937                                  * target type.  Remember it.
4938                                  */
4939                                 foundsig = header;
4940                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4941                                    header->type == negtype) {
4942                                 /*
4943                                  * We've found a negative cache entry.
4944                                  */
4945                                 found = header;
4946                         } else if (header->type == dns_rdatatype_ns) {
4947                                 /*
4948                                  * Remember a NS rdataset even if we're
4949                                  * not specifically looking for it, because
4950                                  * we might need it later.
4951                                  */
4952                                 nsheader = header;
4953                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4954                                 /*
4955                                  * If we need the NS rdataset, we'll also
4956                                  * need its signature.
4957                                  */
4958                                 nssig = header;
4959                         } else if (cname_ok &&
4960                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4961                                 /*
4962                                  * If we get a CNAME match, we'll also need
4963                                  * its signature.
4964                                  */
4965                                 cnamesig = header;
4966                         }
4967                         header_prev = header;
4968                 } else
4969                         header_prev = header;
4970         }
4971
4972         if (empty_node) {
4973                 /*
4974                  * We have an exact match for the name, but there are no
4975                  * extant rdatasets.  That means that this node doesn't
4976                  * meaningfully exist, and that we really have a partial match.
4977                  */
4978                 NODE_UNLOCK(lock, locktype);
4979                 goto find_ns;
4980         }
4981
4982         /*
4983          * If we didn't find what we were looking for...
4984          */
4985         if (found == NULL ||
4986             (DNS_TRUST_ADDITIONAL(found->trust) &&
4987              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4988             (found->trust == dns_trust_glue &&
4989              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4990             (DNS_TRUST_PENDING(found->trust) &&
4991              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4992                 /*
4993                  * If there is an NS rdataset at this node, then this is the
4994                  * deepest zone cut.
4995                  */
4996                 if (nsheader != NULL) {
4997                         if (nodep != NULL) {
4998                                 new_reference(search.rbtdb, node);
4999                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5000                                 *nodep = node;
5001                         }
5002                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
5003                                       rdataset);
5004                         if (need_headerupdate(nsheader, search.now))
5005                                 update = nsheader;
5006                         if (nssig != NULL) {
5007                                 bind_rdataset(search.rbtdb, node, nssig,
5008                                               search.now, sigrdataset);
5009                                 if (need_headerupdate(nssig, search.now))
5010                                         updatesig = nssig;
5011                         }
5012                         result = DNS_R_DELEGATION;
5013                         goto node_exit;
5014                 }
5015
5016                 /*
5017                  * Go find the deepest zone cut.
5018                  */
5019                 NODE_UNLOCK(lock, locktype);
5020                 goto find_ns;
5021         }
5022
5023         /*
5024          * We found what we were looking for, or we found a CNAME.
5025          */
5026
5027         if (nodep != NULL) {
5028                 new_reference(search.rbtdb, node);
5029                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5030                 *nodep = node;
5031         }
5032
5033         if (NEGATIVE(found)) {
5034                 /*
5035                  * We found a negative cache entry.
5036                  */
5037                 if (NXDOMAIN(found))
5038                         result = DNS_R_NCACHENXDOMAIN;
5039                 else
5040                         result = DNS_R_NCACHENXRRSET;
5041         } else if (type != found->type &&
5042                    type != dns_rdatatype_any &&
5043                    found->type == dns_rdatatype_cname) {
5044                 /*
5045                  * We weren't doing an ANY query and we found a CNAME instead
5046                  * of the type we were looking for, so we need to indicate
5047                  * that result to the caller.
5048                  */
5049                 result = DNS_R_CNAME;
5050         } else {
5051                 /*
5052                  * An ordinary successful query!
5053                  */
5054                 result = ISC_R_SUCCESS;
5055         }
5056
5057         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
5058             result == DNS_R_NCACHENXRRSET) {
5059                 bind_rdataset(search.rbtdb, node, found, search.now,
5060                               rdataset);
5061                 if (need_headerupdate(found, search.now))
5062                         update = found;
5063                 if (!NEGATIVE(found) && foundsig != NULL) {
5064                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
5065                                       sigrdataset);
5066                         if (need_headerupdate(foundsig, search.now))
5067                                 updatesig = foundsig;
5068                 }
5069         }
5070
5071  node_exit:
5072         if ((update != NULL || updatesig != NULL) &&
5073             locktype != isc_rwlocktype_write) {
5074                 NODE_UNLOCK(lock, locktype);
5075                 NODE_LOCK(lock, isc_rwlocktype_write);
5076                 locktype = isc_rwlocktype_write;
5077                 POST(locktype);
5078         }
5079         if (update != NULL && need_headerupdate(update, search.now))
5080                 update_header(search.rbtdb, update, search.now);
5081         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
5082                 update_header(search.rbtdb, updatesig, search.now);
5083
5084         NODE_UNLOCK(lock, locktype);
5085
5086  tree_exit:
5087         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5088
5089         /*
5090          * If we found a zonecut but aren't going to use it, we have to
5091          * let go of it.
5092          */
5093         if (search.need_cleanup) {
5094                 node = search.zonecut;
5095                 INSIST(node != NULL);
5096                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
5097
5098                 NODE_LOCK(lock, isc_rwlocktype_read);
5099                 decrement_reference(search.rbtdb, node, 0,
5100                                     isc_rwlocktype_read, isc_rwlocktype_none,
5101                                     ISC_FALSE);
5102                 NODE_UNLOCK(lock, isc_rwlocktype_read);
5103         }
5104
5105         dns_rbtnodechain_reset(&search.chain);
5106
5107         return (result);
5108 }
5109
5110 static isc_result_t
5111 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
5112                   isc_stdtime_t now, dns_dbnode_t **nodep,
5113                   dns_name_t *foundname,
5114                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
5115 {
5116         dns_rbtnode_t *node = NULL;
5117         nodelock_t *lock;
5118         isc_result_t result;
5119         rbtdb_search_t search;
5120         rdatasetheader_t *header, *header_prev, *header_next;
5121         rdatasetheader_t *found, *foundsig;
5122         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
5123         isc_rwlocktype_t locktype;
5124
5125         search.rbtdb = (dns_rbtdb_t *)db;
5126
5127         REQUIRE(VALID_RBTDB(search.rbtdb));
5128
5129         if (now == 0)
5130                 isc_stdtime_get(&now);
5131
5132         search.rbtversion = NULL;
5133         search.serial = 1;
5134         search.options = options;
5135         search.copy_name = ISC_FALSE;
5136         search.need_cleanup = ISC_FALSE;
5137         search.wild = ISC_FALSE;
5138         search.zonecut = NULL;
5139         dns_fixedname_init(&search.zonecut_name);
5140         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
5141         search.now = now;
5142
5143         if ((options & DNS_DBFIND_NOEXACT) != 0)
5144                 rbtoptions |= DNS_RBTFIND_NOEXACT;
5145
5146         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5147
5148         /*
5149          * Search down from the root of the tree.
5150          */
5151         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
5152                                   &search.chain, rbtoptions, NULL, &search);
5153
5154         if (result == DNS_R_PARTIALMATCH) {
5155         find_ns:
5156                 result = find_deepest_zonecut(&search, node, nodep, foundname,
5157                                               rdataset, sigrdataset);
5158                 goto tree_exit;
5159         } else if (result != ISC_R_SUCCESS)
5160                 goto tree_exit;
5161
5162         /*
5163          * We now go looking for an NS rdataset at the node.
5164          */
5165
5166         lock = &(search.rbtdb->node_locks[node->locknum].lock);
5167         locktype = isc_rwlocktype_read;
5168         NODE_LOCK(lock, locktype);
5169
5170         found = NULL;
5171         foundsig = NULL;
5172         header_prev = NULL;
5173         for (header = node->data; header != NULL; header = header_next) {
5174                 header_next = header->next;
5175                 if (header->rdh_ttl <= now) {
5176                         /*
5177                          * This rdataset is stale.  If no one else is using the
5178                          * node, we can clean it up right now, otherwise we
5179                          * mark it as stale, and the node as dirty, so it will
5180                          * get cleaned up later.
5181                          */
5182                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5183                             (locktype == isc_rwlocktype_write ||
5184                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5185                                 /*
5186                                  * We update the node's status only when we
5187                                  * can get write access.
5188                                  */
5189                                 locktype = isc_rwlocktype_write;
5190
5191                                 if (dns_rbtnode_refcurrent(node) == 0) {
5192                                         isc_mem_t *mctx;
5193
5194                                         mctx = search.rbtdb->common.mctx;
5195                                         clean_stale_headers(search.rbtdb, mctx,
5196                                                             header);
5197                                         if (header_prev != NULL)
5198                                                 header_prev->next =
5199                                                         header->next;
5200                                         else
5201                                                 node->data = header->next;
5202                                         free_rdataset(search.rbtdb, mctx,
5203                                                       header);
5204                                 } else {
5205                                         header->attributes |=
5206                                                 RDATASET_ATTR_STALE;
5207                                         node->dirty = 1;
5208                                         header_prev = header;
5209                                 }
5210                         } else
5211                                 header_prev = header;
5212                 } else if (EXISTS(header)) {
5213                         /*
5214                          * If we found a type we were looking for, remember
5215                          * it.
5216                          */
5217                         if (header->type == dns_rdatatype_ns) {
5218                                 /*
5219                                  * Remember a NS rdataset even if we're
5220                                  * not specifically looking for it, because
5221                                  * we might need it later.
5222                                  */
5223                                 found = header;
5224                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
5225                                 /*
5226                                  * If we need the NS rdataset, we'll also
5227                                  * need its signature.
5228                                  */
5229                                 foundsig = header;
5230                         }
5231                         header_prev = header;
5232                 } else
5233                         header_prev = header;
5234         }
5235
5236         if (found == NULL) {
5237                 /*
5238                  * No NS records here.
5239                  */
5240                 NODE_UNLOCK(lock, locktype);
5241                 goto find_ns;
5242         }
5243
5244         if (nodep != NULL) {
5245                 new_reference(search.rbtdb, node);
5246                 INSIST(!ISC_LINK_LINKED(node, deadlink));
5247                 *nodep = node;
5248         }
5249
5250         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
5251         if (foundsig != NULL)
5252                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5253                               sigrdataset);
5254
5255         if (need_headerupdate(found, search.now) ||
5256             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
5257                 if (locktype != isc_rwlocktype_write) {
5258                         NODE_UNLOCK(lock, locktype);
5259                         NODE_LOCK(lock, isc_rwlocktype_write);
5260                         locktype = isc_rwlocktype_write;
5261                         POST(locktype);
5262                 }
5263                 if (need_headerupdate(found, search.now))
5264                         update_header(search.rbtdb, found, search.now);
5265                 if (foundsig != NULL &&
5266                     need_headerupdate(foundsig, search.now)) {
5267                         update_header(search.rbtdb, foundsig, search.now);
5268                 }
5269         }
5270
5271         NODE_UNLOCK(lock, locktype);
5272
5273  tree_exit:
5274         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5275
5276         INSIST(!search.need_cleanup);
5277
5278         dns_rbtnodechain_reset(&search.chain);
5279
5280         if (result == DNS_R_DELEGATION)
5281                 result = ISC_R_SUCCESS;
5282
5283         return (result);
5284 }
5285
5286 static void
5287 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5288         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5289         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5290         unsigned int refs;
5291
5292         REQUIRE(VALID_RBTDB(rbtdb));
5293         REQUIRE(targetp != NULL && *targetp == NULL);
5294
5295         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5296         dns_rbtnode_refincrement(node, &refs);
5297         INSIST(refs != 0);
5298         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5299
5300         *targetp = source;
5301 }
5302
5303 static void
5304 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5305         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5306         dns_rbtnode_t *node;
5307         isc_boolean_t want_free = ISC_FALSE;
5308         isc_boolean_t inactive = ISC_FALSE;
5309         rbtdb_nodelock_t *nodelock;
5310
5311         REQUIRE(VALID_RBTDB(rbtdb));
5312         REQUIRE(targetp != NULL && *targetp != NULL);
5313
5314         node = (dns_rbtnode_t *)(*targetp);
5315         nodelock = &rbtdb->node_locks[node->locknum];
5316
5317         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5318
5319         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5320                                 isc_rwlocktype_none, ISC_FALSE)) {
5321                 if (isc_refcount_current(&nodelock->references) == 0 &&
5322                     nodelock->exiting) {
5323                         inactive = ISC_TRUE;
5324                 }
5325         }
5326
5327         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5328
5329         *targetp = NULL;
5330
5331         if (inactive) {
5332                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5333                 rbtdb->active--;
5334                 if (rbtdb->active == 0)
5335                         want_free = ISC_TRUE;
5336                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5337                 if (want_free) {
5338                         char buf[DNS_NAME_FORMATSIZE];
5339                         if (dns_name_dynamic(&rbtdb->common.origin))
5340                                 dns_name_format(&rbtdb->common.origin, buf,
5341                                                 sizeof(buf));
5342                         else
5343                                 strcpy(buf, "<UNKNOWN>");
5344                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5345                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5346                                       "calling free_rbtdb(%s)", buf);
5347                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
5348                 }
5349         }
5350 }
5351
5352 static isc_result_t
5353 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5354         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5355         dns_rbtnode_t *rbtnode = node;
5356         rdatasetheader_t *header;
5357         isc_boolean_t force_expire = ISC_FALSE;
5358         /*
5359          * These are the category and module used by the cache cleaner.
5360          */
5361         isc_boolean_t log = ISC_FALSE;
5362         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5363         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5364         int level = ISC_LOG_DEBUG(2);
5365         char printname[DNS_NAME_FORMATSIZE];
5366
5367         REQUIRE(VALID_RBTDB(rbtdb));
5368
5369         /*
5370          * Caller must hold a tree lock.
5371          */
5372
5373         if (now == 0)
5374                 isc_stdtime_get(&now);
5375
5376         if (isc_mem_isovermem(rbtdb->common.mctx)) {
5377                 isc_uint32_t val;
5378
5379                 isc_random_get(&val);
5380                 /*
5381                  * XXXDCL Could stand to have a better policy, like LRU.
5382                  */
5383                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5384
5385                 /*
5386                  * Note that 'log' can be true IFF overmem is also true.
5387                  * overmem can currently only be true for cache
5388                  * databases -- hence all of the "overmem cache" log strings.
5389                  */
5390                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5391                 if (log)
5392                         isc_log_write(dns_lctx, category, module, level,
5393                                       "overmem cache: %s %s",
5394                                       force_expire ? "FORCE" : "check",
5395                                       dns_rbt_formatnodename(rbtnode,
5396                                                            printname,
5397                                                            sizeof(printname)));
5398         }
5399
5400         /*
5401          * We may not need write access, but this code path is not performance
5402          * sensitive, so it should be okay to always lock as a writer.
5403          */
5404         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5405                   isc_rwlocktype_write);
5406
5407         for (header = rbtnode->data; header != NULL; header = header->next)
5408                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5409                         /*
5410                          * We don't check if refcurrent(rbtnode) == 0 and try
5411                          * to free like we do in cache_find(), because
5412                          * refcurrent(rbtnode) must be non-zero.  This is so
5413                          * because 'node' is an argument to the function.
5414                          */
5415                         header->attributes |= RDATASET_ATTR_STALE;
5416                         rbtnode->dirty = 1;
5417                         if (log)
5418                                 isc_log_write(dns_lctx, category, module,
5419                                               level, "overmem cache: stale %s",
5420                                               printname);
5421                 } else if (force_expire) {
5422                         if (! RETAIN(header)) {
5423                                 set_ttl(rbtdb, header, 0);
5424                                 header->attributes |= RDATASET_ATTR_STALE;
5425                                 rbtnode->dirty = 1;
5426                         } else if (log) {
5427                                 isc_log_write(dns_lctx, category, module,
5428                                               level, "overmem cache: "
5429                                               "reprieve by RETAIN() %s",
5430                                               printname);
5431                         }
5432                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
5433                         isc_log_write(dns_lctx, category, module, level,
5434                                       "overmem cache: saved %s", printname);
5435
5436         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5437                     isc_rwlocktype_write);
5438
5439         return (ISC_R_SUCCESS);
5440 }
5441
5442 static void
5443 overmem(dns_db_t *db, isc_boolean_t overmem) {
5444         /* This is an empty callback.  See adb.c:water() */
5445
5446         UNUSED(db);
5447         UNUSED(overmem);
5448
5449         return;
5450 }
5451
5452 static void
5453 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5454         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5455         dns_rbtnode_t *rbtnode = node;
5456         isc_boolean_t first;
5457
5458         REQUIRE(VALID_RBTDB(rbtdb));
5459
5460         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5461                   isc_rwlocktype_read);
5462
5463         fprintf(out, "node %p, %u references, locknum = %u\n",
5464                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5465                 rbtnode->locknum);
5466         if (rbtnode->data != NULL) {
5467                 rdatasetheader_t *current, *top_next;
5468
5469                 for (current = rbtnode->data; current != NULL;
5470                      current = top_next) {
5471                         top_next = current->next;
5472                         first = ISC_TRUE;
5473                         fprintf(out, "\ttype %u", current->type);
5474                         do {
5475                                 if (!first)
5476                                         fprintf(out, "\t");
5477                                 first = ISC_FALSE;
5478                                 fprintf(out,
5479                                         "\tserial = %lu, ttl = %u, "
5480                                         "trust = %u, attributes = %u, "
5481                                         "resign = %u\n",
5482                                         (unsigned long)current->serial,
5483                                         current->rdh_ttl,
5484                                         current->trust,
5485                                         current->attributes,
5486                                         current->resign);
5487                                 current = current->down;
5488                         } while (current != NULL);
5489                 }
5490         } else
5491                 fprintf(out, "(empty)\n");
5492
5493         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5494                     isc_rwlocktype_read);
5495 }
5496
5497 static isc_result_t
5498 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5499 {
5500         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5501         rbtdb_dbiterator_t *rbtdbiter;
5502
5503         REQUIRE(VALID_RBTDB(rbtdb));
5504
5505         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5506         if (rbtdbiter == NULL)
5507                 return (ISC_R_NOMEMORY);
5508
5509         rbtdbiter->common.methods = &dbiterator_methods;
5510         rbtdbiter->common.db = NULL;
5511         dns_db_attach(db, &rbtdbiter->common.db);
5512         rbtdbiter->common.relative_names =
5513                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5514         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5515         rbtdbiter->common.cleaning = ISC_FALSE;
5516         rbtdbiter->paused = ISC_TRUE;
5517         rbtdbiter->tree_locked = isc_rwlocktype_none;
5518         rbtdbiter->result = ISC_R_SUCCESS;
5519         dns_fixedname_init(&rbtdbiter->name);
5520         dns_fixedname_init(&rbtdbiter->origin);
5521         rbtdbiter->node = NULL;
5522         rbtdbiter->delete = 0;
5523         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5524         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5525         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5526         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5527         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5528         if (rbtdbiter->nsec3only)
5529                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5530         else
5531                 rbtdbiter->current = &rbtdbiter->chain;
5532
5533         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5534
5535         return (ISC_R_SUCCESS);
5536 }
5537
5538 static isc_result_t
5539 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5540                   dns_rdatatype_t type, dns_rdatatype_t covers,
5541                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5542                   dns_rdataset_t *sigrdataset)
5543 {
5544         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5545         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5546         rdatasetheader_t *header, *header_next, *found, *foundsig;
5547         rbtdb_serial_t serial;
5548         rbtdb_version_t *rbtversion = version;
5549         isc_boolean_t close_version = ISC_FALSE;
5550         rbtdb_rdatatype_t matchtype, sigmatchtype;
5551
5552         REQUIRE(VALID_RBTDB(rbtdb));
5553         REQUIRE(type != dns_rdatatype_any);
5554         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
5555
5556         if (rbtversion == NULL) {
5557                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5558                 close_version = ISC_TRUE;
5559         }
5560         serial = rbtversion->serial;
5561         now = 0;
5562
5563         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5564                   isc_rwlocktype_read);
5565
5566         found = NULL;
5567         foundsig = NULL;
5568         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5569         if (covers == 0)
5570                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5571         else
5572                 sigmatchtype = 0;
5573
5574         for (header = rbtnode->data; header != NULL; header = header_next) {
5575                 header_next = header->next;
5576                 do {
5577                         if (header->serial <= serial &&
5578                             !IGNORE(header)) {
5579                                 /*
5580                                  * Is this a "this rdataset doesn't
5581                                  * exist" record?
5582                                  */
5583                                 if (NONEXISTENT(header))
5584                                         header = NULL;
5585                                 break;
5586                         } else
5587                                 header = header->down;
5588                 } while (header != NULL);
5589                 if (header != NULL) {
5590                         /*
5591                          * We have an active, extant rdataset.  If it's a
5592                          * type we're looking for, remember it.
5593                          */
5594                         if (header->type == matchtype) {
5595                                 found = header;
5596                                 if (foundsig != NULL)
5597                                         break;
5598                         } else if (header->type == sigmatchtype) {
5599                                 foundsig = header;
5600                                 if (found != NULL)
5601                                         break;
5602                         }
5603                 }
5604         }
5605         if (found != NULL) {
5606                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5607                 if (foundsig != NULL)
5608                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5609                                       sigrdataset);
5610         }
5611
5612         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5613                     isc_rwlocktype_read);
5614
5615         if (close_version)
5616                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5617                              ISC_FALSE);
5618
5619         if (found == NULL)
5620                 return (ISC_R_NOTFOUND);
5621
5622         return (ISC_R_SUCCESS);
5623 }
5624
5625 static isc_result_t
5626 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5627                    dns_rdatatype_t type, dns_rdatatype_t covers,
5628                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5629                    dns_rdataset_t *sigrdataset)
5630 {
5631         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5632         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5633         rdatasetheader_t *header, *header_next, *found, *foundsig;
5634         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5635         isc_result_t result;
5636         nodelock_t *lock;
5637         isc_rwlocktype_t locktype;
5638
5639         REQUIRE(VALID_RBTDB(rbtdb));
5640         REQUIRE(type != dns_rdatatype_any);
5641
5642         UNUSED(version);
5643
5644         result = ISC_R_SUCCESS;
5645
5646         if (now == 0)
5647                 isc_stdtime_get(&now);
5648
5649         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5650         locktype = isc_rwlocktype_read;
5651         NODE_LOCK(lock, locktype);
5652
5653         found = NULL;
5654         foundsig = NULL;
5655         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5656         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5657         if (covers == 0)
5658                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5659         else
5660                 sigmatchtype = 0;
5661
5662         for (header = rbtnode->data; header != NULL; header = header_next) {
5663                 header_next = header->next;
5664                 if (header->rdh_ttl <= now) {
5665                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5666                             (locktype == isc_rwlocktype_write ||
5667                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5668                                 /*
5669                                  * We update the node's status only when we
5670                                  * can get write access.
5671                                  */
5672                                 locktype = isc_rwlocktype_write;
5673
5674                                 /*
5675                                  * We don't check if refcurrent(rbtnode) == 0
5676                                  * and try to free like we do in cache_find(),
5677                                  * because refcurrent(rbtnode) must be
5678                                  * non-zero.  This is so because 'node' is an
5679                                  * argument to the function.
5680                                  */
5681                                 header->attributes |= RDATASET_ATTR_STALE;
5682                                 rbtnode->dirty = 1;
5683                         }
5684                 } else if (EXISTS(header)) {
5685                         if (header->type == matchtype)
5686                                 found = header;
5687                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5688                                  header->type == negtype)
5689                                 found = header;
5690                         else if (header->type == sigmatchtype)
5691                                 foundsig = header;
5692                 }
5693         }
5694         if (found != NULL) {
5695                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5696                 if (!NEGATIVE(found) && foundsig != NULL)
5697                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5698                                       sigrdataset);
5699         }
5700
5701         NODE_UNLOCK(lock, locktype);
5702
5703         if (found == NULL)
5704                 return (ISC_R_NOTFOUND);
5705
5706         if (NEGATIVE(found)) {
5707                 /*
5708                  * We found a negative cache entry.
5709                  */
5710                 if (NXDOMAIN(found))
5711                         result = DNS_R_NCACHENXDOMAIN;
5712                 else
5713                         result = DNS_R_NCACHENXRRSET;
5714         }
5715
5716         return (result);
5717 }
5718
5719 static isc_result_t
5720 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5721              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5722 {
5723         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5724         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5725         rbtdb_version_t *rbtversion = version;
5726         rbtdb_rdatasetiter_t *iterator;
5727         unsigned int refs;
5728
5729         REQUIRE(VALID_RBTDB(rbtdb));
5730
5731         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5732         if (iterator == NULL)
5733                 return (ISC_R_NOMEMORY);
5734
5735         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5736                 now = 0;
5737                 if (rbtversion == NULL)
5738                         currentversion(db,
5739                                  (dns_dbversion_t **) (void *)(&rbtversion));
5740                 else {
5741                         unsigned int refs;
5742
5743                         INSIST(rbtversion->rbtdb == rbtdb);
5744
5745                         isc_refcount_increment(&rbtversion->references,
5746                                                &refs);
5747                         INSIST(refs > 1);
5748                 }
5749         } else {
5750                 if (now == 0)
5751                         isc_stdtime_get(&now);
5752                 rbtversion = NULL;
5753         }
5754
5755         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5756         iterator->common.methods = &rdatasetiter_methods;
5757         iterator->common.db = db;
5758         iterator->common.node = node;
5759         iterator->common.version = (dns_dbversion_t *)rbtversion;
5760         iterator->common.now = now;
5761
5762         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5763
5764         dns_rbtnode_refincrement(rbtnode, &refs);
5765         INSIST(refs != 0);
5766
5767         iterator->current = NULL;
5768
5769         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5770
5771         *iteratorp = (dns_rdatasetiter_t *)iterator;
5772
5773         return (ISC_R_SUCCESS);
5774 }
5775
5776 static isc_boolean_t
5777 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5778         rdatasetheader_t *header, *header_next;
5779         isc_boolean_t cname, other_data;
5780         dns_rdatatype_t rdtype;
5781
5782         /*
5783          * The caller must hold the node lock.
5784          */
5785
5786         /*
5787          * Look for CNAME and "other data" rdatasets active in our version.
5788          */
5789         cname = ISC_FALSE;
5790         other_data = ISC_FALSE;
5791         for (header = node->data; header != NULL; header = header_next) {
5792                 header_next = header->next;
5793                 if (header->type == dns_rdatatype_cname) {
5794                         /*
5795                          * Look for an active extant CNAME.
5796                          */
5797                         do {
5798                                 if (header->serial <= serial &&
5799                                     !IGNORE(header)) {
5800                                         /*
5801                                          * Is this a "this rdataset doesn't
5802                                          * exist" record?
5803                                          */
5804                                         if (NONEXISTENT(header))
5805                                                 header = NULL;
5806                                         break;
5807                                 } else
5808                                         header = header->down;
5809                         } while (header != NULL);
5810                         if (header != NULL)
5811                                 cname = ISC_TRUE;
5812                 } else {
5813                         /*
5814                          * Look for active extant "other data".
5815                          *
5816                          * "Other data" is any rdataset whose type is not
5817                          * KEY, NSEC, SIG or RRSIG.
5818                          */
5819                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5820                         if (rdtype != dns_rdatatype_key &&
5821                             rdtype != dns_rdatatype_sig &&
5822                             rdtype != dns_rdatatype_nsec &&
5823                             rdtype != dns_rdatatype_rrsig) {
5824                                 /*
5825                                  * Is it active and extant?
5826                                  */
5827                                 do {
5828                                         if (header->serial <= serial &&
5829                                             !IGNORE(header)) {
5830                                                 /*
5831                                                  * Is this a "this rdataset
5832                                                  * doesn't exist" record?
5833                                                  */
5834                                                 if (NONEXISTENT(header))
5835                                                         header = NULL;
5836                                                 break;
5837                                         } else
5838                                                 header = header->down;
5839                                 } while (header != NULL);
5840                                 if (header != NULL)
5841                                         other_data = ISC_TRUE;
5842                         }
5843                 }
5844         }
5845
5846         if (cname && other_data)
5847                 return (ISC_TRUE);
5848
5849         return (ISC_FALSE);
5850 }
5851
5852 static isc_result_t
5853 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5854         isc_result_t result;
5855
5856         INSIST(!IS_CACHE(rbtdb));
5857         INSIST(newheader->heap_index == 0);
5858         INSIST(!ISC_LINK_LINKED(newheader, link));
5859
5860         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5861         return (result);
5862 }
5863
5864 static isc_result_t
5865 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5866     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5867     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5868 {
5869         rbtdb_changed_t *changed = NULL;
5870         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
5871         unsigned char *merged;
5872         isc_result_t result;
5873         isc_boolean_t header_nx;
5874         isc_boolean_t newheader_nx;
5875         isc_boolean_t merge;
5876         dns_rdatatype_t rdtype, covers;
5877         rbtdb_rdatatype_t negtype, sigtype;
5878         dns_trust_t trust;
5879         int idx;
5880
5881         /*
5882          * Add an rdatasetheader_t to a node.
5883          */
5884
5885         /*
5886          * Caller must be holding the node lock.
5887          */
5888
5889         if ((options & DNS_DBADD_MERGE) != 0) {
5890                 REQUIRE(rbtversion != NULL);
5891                 merge = ISC_TRUE;
5892         } else
5893                 merge = ISC_FALSE;
5894
5895         if ((options & DNS_DBADD_FORCE) != 0)
5896                 trust = dns_trust_ultimate;
5897         else
5898                 trust = newheader->trust;
5899
5900         if (rbtversion != NULL && !loading) {
5901                 /*
5902                  * We always add a changed record, even if no changes end up
5903                  * being made to this node, because it's harmless and
5904                  * simplifies the code.
5905                  */
5906                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5907                 if (changed == NULL) {
5908                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5909                         return (ISC_R_NOMEMORY);
5910                 }
5911         }
5912
5913         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5914         topheader_prev = NULL;
5915         sigheader = NULL;
5916         negtype = 0;
5917         if (rbtversion == NULL && !newheader_nx) {
5918                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5919                 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5920                 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
5921                 if (NEGATIVE(newheader)) {
5922                         /*
5923                          * We're adding a negative cache entry.
5924                          */
5925                         for (topheader = rbtnode->data;
5926                              topheader != NULL;
5927                              topheader = topheader->next) {
5928                                 /*
5929                                  * If we're adding an negative cache entry
5930                                  * which covers all types (NXDOMAIN,
5931                                  * NODATA(QTYPE=ANY)).
5932                                  *
5933                                  * We make all other data stale so that the
5934                                  * only rdataset that can be found at this
5935                                  * node is the negative cache entry.
5936                                  *
5937                                  * Otherwise look for any RRSIGs of the
5938                                  * given type so they can be marked stale
5939                                  * later.
5940                                  */
5941                                 if (covers == dns_rdatatype_any) {
5942                                         set_ttl(rbtdb, topheader, 0);
5943                                         topheader->attributes |=
5944                                                 RDATASET_ATTR_STALE;
5945                                         rbtnode->dirty = 1;
5946                                 } else if (topheader->type == sigtype)
5947                                         sigheader = topheader;
5948                         }
5949                         if (covers == dns_rdatatype_any)
5950                                 goto find_header;
5951                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5952                 } else {
5953                         /*
5954                          * We're adding something that isn't a
5955                          * negative cache entry.  Look for an extant
5956                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5957                          * cache entry.  If we're adding an RRSIG, also
5958                          * check for an extant non-stale NODATA ncache
5959                          * entry which covers the same type as the RRSIG.
5960                          */
5961                         for (topheader = rbtnode->data;
5962                              topheader != NULL;
5963                              topheader = topheader->next) {
5964                                 if ((topheader->type ==
5965                                         RBTDB_RDATATYPE_NCACHEANY) ||
5966                                         (newheader->type == sigtype &&
5967                                         topheader->type ==
5968                                         RBTDB_RDATATYPE_VALUE(0, covers))) {
5969                                                 break;
5970                                         }
5971                         }
5972                         if (topheader != NULL && EXISTS(topheader) &&
5973                             topheader->rdh_ttl > now) {
5974                                 /*
5975                                  * Found one.
5976                                  */
5977                                 if (trust < topheader->trust) {
5978                                         /*
5979                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5980                                          * is more trusted.
5981                                          */
5982                                         free_rdataset(rbtdb,
5983                                                       rbtdb->common.mctx,
5984                                                       newheader);
5985                                         if (addedrdataset != NULL)
5986                                                 bind_rdataset(rbtdb, rbtnode,
5987                                                               topheader, now,
5988                                                               addedrdataset);
5989                                         return (DNS_R_UNCHANGED);
5990                                 }
5991                                 /*
5992                                  * The new rdataset is better.  Expire the
5993                                  * ncache entry.
5994                                  */
5995                                 set_ttl(rbtdb, topheader, 0);
5996                                 topheader->attributes |= RDATASET_ATTR_STALE;
5997                                 rbtnode->dirty = 1;
5998                                 topheader = NULL;
5999                                 goto find_header;
6000                         }
6001                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
6002                 }
6003         }
6004
6005         for (topheader = rbtnode->data;
6006              topheader != NULL;
6007              topheader = topheader->next) {
6008                 if (topheader->type == newheader->type ||
6009                     topheader->type == negtype)
6010                         break;
6011                 topheader_prev = topheader;
6012         }
6013
6014  find_header:
6015         /*
6016          * If header isn't NULL, we've found the right type.  There may be
6017          * IGNORE rdatasets between the top of the chain and the first real
6018          * data.  We skip over them.
6019          */
6020         header = topheader;
6021         while (header != NULL && IGNORE(header))
6022                 header = header->down;
6023         if (header != NULL) {
6024                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
6025
6026                 /*
6027                  * Deleting an already non-existent rdataset has no effect.
6028                  */
6029                 if (header_nx && newheader_nx) {
6030                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6031                         return (DNS_R_UNCHANGED);
6032                 }
6033
6034                 /*
6035                  * Trying to add an rdataset with lower trust to a cache DB
6036                  * has no effect, provided that the cache data isn't stale.
6037                  */
6038                 if (rbtversion == NULL && trust < header->trust &&
6039                     (header->rdh_ttl > now || header_nx)) {
6040                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6041                         if (addedrdataset != NULL)
6042                                 bind_rdataset(rbtdb, rbtnode, header, now,
6043                                               addedrdataset);
6044                         return (DNS_R_UNCHANGED);
6045                 }
6046
6047                 /*
6048                  * Don't merge if a nonexistent rdataset is involved.
6049                  */
6050                 if (merge && (header_nx || newheader_nx))
6051                         merge = ISC_FALSE;
6052
6053                 /*
6054                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
6055                  * that is the union of 'newheader' and 'header'.
6056                  */
6057                 if (merge) {
6058                         unsigned int flags = 0;
6059                         INSIST(rbtversion->serial >= header->serial);
6060                         merged = NULL;
6061                         result = ISC_R_SUCCESS;
6062
6063                         if ((options & DNS_DBADD_EXACT) != 0)
6064                                 flags |= DNS_RDATASLAB_EXACT;
6065                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
6066                              newheader->rdh_ttl != header->rdh_ttl)
6067                                         result = DNS_R_NOTEXACT;
6068                         else if (newheader->rdh_ttl != header->rdh_ttl)
6069                                 flags |= DNS_RDATASLAB_FORCE;
6070                         if (result == ISC_R_SUCCESS)
6071                                 result = dns_rdataslab_merge(
6072                                              (unsigned char *)header,
6073                                              (unsigned char *)newheader,
6074                                              (unsigned int)(sizeof(*newheader)),
6075                                              rbtdb->common.mctx,
6076                                              rbtdb->common.rdclass,
6077                                              (dns_rdatatype_t)header->type,
6078                                              flags, &merged);
6079                         if (result == ISC_R_SUCCESS) {
6080                                 /*
6081                                  * If 'header' has the same serial number as
6082                                  * we do, we could clean it up now if we knew
6083                                  * that our caller had no references to it.
6084                                  * We don't know this, however, so we leave it
6085                                  * alone.  It will get cleaned up when
6086                                  * clean_zone_node() runs.
6087                                  */
6088                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6089                                               newheader);
6090                                 newheader = (rdatasetheader_t *)merged;
6091                                 init_rdataset(rbtdb, newheader);
6092                                 if (loading && RESIGN(newheader) &&
6093                                     RESIGN(header) &&
6094                                     header->resign < newheader->resign)
6095                                         newheader->resign = header->resign;
6096                         } else {
6097                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6098                                               newheader);
6099                                 return (result);
6100                         }
6101                 }
6102                 /*
6103                  * Don't replace existing NS, A and AAAA RRsets
6104                  * in the cache if they are already exist.  This
6105                  * prevents named being locked to old servers.
6106                  * Don't lower trust of existing record if the
6107                  * update is forced.
6108                  */
6109                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6110                     header->type == dns_rdatatype_ns &&
6111                     !header_nx && !newheader_nx &&
6112                     header->trust >= newheader->trust &&
6113                     dns_rdataslab_equalx((unsigned char *)header,
6114                                          (unsigned char *)newheader,
6115                                          (unsigned int)(sizeof(*newheader)),
6116                                          rbtdb->common.rdclass,
6117                                          (dns_rdatatype_t)header->type)) {
6118                         /*
6119                          * Honour the new ttl if it is less than the
6120                          * older one.
6121                          */
6122                         if (header->rdh_ttl > newheader->rdh_ttl)
6123                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6124                         if (header->noqname == NULL &&
6125                             newheader->noqname != NULL) {
6126                                 header->noqname = newheader->noqname;
6127                                 newheader->noqname = NULL;
6128                         }
6129                         if (header->closest == NULL &&
6130                             newheader->closest != NULL) {
6131                                 header->closest = newheader->closest;
6132                                 newheader->closest = NULL;
6133                         }
6134                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6135                         if (addedrdataset != NULL)
6136                                 bind_rdataset(rbtdb, rbtnode, header, now,
6137                                               addedrdataset);
6138                         return (ISC_R_SUCCESS);
6139                 }
6140                 /*
6141                  * If we have will be replacing a NS RRset force its TTL
6142                  * to be no more than the current NS RRset's TTL.  This
6143                  * ensures the delegations that are withdrawn are honoured.
6144                  */
6145                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6146                     header->type == dns_rdatatype_ns &&
6147                     !header_nx && !newheader_nx &&
6148                     header->trust <= newheader->trust) {
6149                         if (newheader->rdh_ttl > header->rdh_ttl) {
6150                                 newheader->rdh_ttl = header->rdh_ttl;
6151                         }
6152                 }
6153                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
6154                     (header->type == dns_rdatatype_a ||
6155                      header->type == dns_rdatatype_aaaa ||
6156                      header->type == dns_rdatatype_ds ||
6157                      header->type == RBTDB_RDATATYPE_SIGDDS) &&
6158                     !header_nx && !newheader_nx &&
6159                     header->trust >= newheader->trust &&
6160                     dns_rdataslab_equal((unsigned char *)header,
6161                                         (unsigned char *)newheader,
6162                                         (unsigned int)(sizeof(*newheader)))) {
6163                         /*
6164                          * Honour the new ttl if it is less than the
6165                          * older one.
6166                          */
6167                         if (header->rdh_ttl > newheader->rdh_ttl)
6168                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
6169                         if (header->noqname == NULL &&
6170                             newheader->noqname != NULL) {
6171                                 header->noqname = newheader->noqname;
6172                                 newheader->noqname = NULL;
6173                         }
6174                         if (header->closest == NULL &&
6175                             newheader->closest != NULL) {
6176                                 header->closest = newheader->closest;
6177                                 newheader->closest = NULL;
6178                         }
6179                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6180                         if (addedrdataset != NULL)
6181                                 bind_rdataset(rbtdb, rbtnode, header, now,
6182                                               addedrdataset);
6183                         return (ISC_R_SUCCESS);
6184                 }
6185                 INSIST(rbtversion == NULL ||
6186                        rbtversion->serial >= topheader->serial);
6187                 if (topheader_prev != NULL)
6188                         topheader_prev->next = newheader;
6189                 else
6190                         rbtnode->data = newheader;
6191                 newheader->next = topheader->next;
6192                 if (loading) {
6193                         /*
6194                          * There are no other references to 'header' when
6195                          * loading, so we MAY clean up 'header' now.
6196                          * Since we don't generate changed records when
6197                          * loading, we MUST clean up 'header' now.
6198                          */
6199                         newheader->down = NULL;
6200                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
6201                 } else {
6202                         newheader->down = topheader;
6203                         topheader->next = newheader;
6204                         rbtnode->dirty = 1;
6205                         if (changed != NULL)
6206                                 changed->dirty = ISC_TRUE;
6207                         if (rbtversion == NULL) {
6208                                 set_ttl(rbtdb, header, 0);
6209                                 header->attributes |= RDATASET_ATTR_STALE;
6210                                 if (sigheader != NULL) {
6211                                         set_ttl(rbtdb, sigheader, 0);
6212                                         sigheader->attributes |=
6213                                                  RDATASET_ATTR_STALE;
6214                                 }
6215                         }
6216                         idx = newheader->node->locknum;
6217                         if (IS_CACHE(rbtdb)) {
6218                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6219                                                  newheader, link);
6220                                 /*
6221                                  * XXXMLG We don't check the return value
6222                                  * here.  If it fails, we will not do TTL
6223                                  * based expiry on this node.  However, we
6224                                  * will do it on the LRU side, so memory
6225                                  * will not leak... for long.
6226                                  */
6227                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
6228                         } else if (RESIGN(newheader))
6229                                 resign_insert(rbtdb, idx, newheader);
6230                 }
6231         } else {
6232                 /*
6233                  * No non-IGNORED rdatasets of the given type exist at
6234                  * this node.
6235                  */
6236
6237                 /*
6238                  * If we're trying to delete the type, don't bother.
6239                  */
6240                 if (newheader_nx) {
6241                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6242                         return (DNS_R_UNCHANGED);
6243                 }
6244
6245                 if (topheader != NULL) {
6246                         /*
6247                          * We have an list of rdatasets of the given type,
6248                          * but they're all marked IGNORE.  We simply insert
6249                          * the new rdataset at the head of the list.
6250                          *
6251                          * Ignored rdatasets cannot occur during loading, so
6252                          * we INSIST on it.
6253                          */
6254                         INSIST(!loading);
6255                         INSIST(rbtversion == NULL ||
6256                                rbtversion->serial >= topheader->serial);
6257                         if (topheader_prev != NULL)
6258                                 topheader_prev->next = newheader;
6259                         else
6260                                 rbtnode->data = newheader;
6261                         newheader->next = topheader->next;
6262                         newheader->down = topheader;
6263                         topheader->next = newheader;
6264                         rbtnode->dirty = 1;
6265                         if (changed != NULL)
6266                                 changed->dirty = ISC_TRUE;
6267                 } else {
6268                         /*
6269                          * No rdatasets of the given type exist at the node.
6270                          */
6271                         newheader->next = rbtnode->data;
6272                         newheader->down = NULL;
6273                         rbtnode->data = newheader;
6274                 }
6275                 idx = newheader->node->locknum;
6276                 if (IS_CACHE(rbtdb)) {
6277                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
6278                                          newheader, link);
6279                         isc_heap_insert(rbtdb->heaps[idx], newheader);
6280                 } else if (RESIGN(newheader)) {
6281                         resign_insert(rbtdb, idx, newheader);
6282                 }
6283         }
6284
6285         /*
6286          * Check if the node now contains CNAME and other data.
6287          */
6288         if (rbtversion != NULL &&
6289             cname_and_other_data(rbtnode, rbtversion->serial))
6290                 return (DNS_R_CNAMEANDOTHER);
6291
6292         if (addedrdataset != NULL)
6293                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6294
6295         return (ISC_R_SUCCESS);
6296 }
6297
6298 static inline isc_boolean_t
6299 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6300                 rbtdb_rdatatype_t type)
6301 {
6302         if (IS_CACHE(rbtdb)) {
6303                 if (type == dns_rdatatype_dname)
6304                         return (ISC_TRUE);
6305                 else
6306                         return (ISC_FALSE);
6307         } else if (type == dns_rdatatype_dname ||
6308                    (type == dns_rdatatype_ns &&
6309                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6310                 return (ISC_TRUE);
6311         return (ISC_FALSE);
6312 }
6313
6314 static inline isc_result_t
6315 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6316            dns_rdataset_t *rdataset)
6317 {
6318         struct noqname *noqname;
6319         isc_mem_t *mctx = rbtdb->common.mctx;
6320         dns_name_t name;
6321         dns_rdataset_t neg, negsig;
6322         isc_result_t result;
6323         isc_region_t r;
6324
6325         dns_name_init(&name, NULL);
6326         dns_rdataset_init(&neg);
6327         dns_rdataset_init(&negsig);
6328
6329         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6330         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6331
6332         noqname = isc_mem_get(mctx, sizeof(*noqname));
6333         if (noqname == NULL) {
6334                 result = ISC_R_NOMEMORY;
6335                 goto cleanup;
6336         }
6337         dns_name_init(&noqname->name, NULL);
6338         noqname->neg = NULL;
6339         noqname->negsig = NULL;
6340         noqname->type = neg.type;
6341         result = dns_name_dup(&name, mctx, &noqname->name);
6342         if (result != ISC_R_SUCCESS)
6343                 goto cleanup;
6344         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6345         if (result != ISC_R_SUCCESS)
6346                 goto cleanup;
6347         noqname->neg = r.base;
6348         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6349         if (result != ISC_R_SUCCESS)
6350                 goto cleanup;
6351         noqname->negsig = r.base;
6352         dns_rdataset_disassociate(&neg);
6353         dns_rdataset_disassociate(&negsig);
6354         newheader->noqname = noqname;
6355         return (ISC_R_SUCCESS);
6356
6357 cleanup:
6358         dns_rdataset_disassociate(&neg);
6359         dns_rdataset_disassociate(&negsig);
6360         free_noqname(mctx, &noqname);
6361         return(result);
6362 }
6363
6364 static inline isc_result_t
6365 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6366            dns_rdataset_t *rdataset)
6367 {
6368         struct noqname *closest;
6369         isc_mem_t *mctx = rbtdb->common.mctx;
6370         dns_name_t name;
6371         dns_rdataset_t neg, negsig;
6372         isc_result_t result;
6373         isc_region_t r;
6374
6375         dns_name_init(&name, NULL);
6376         dns_rdataset_init(&neg);
6377         dns_rdataset_init(&negsig);
6378
6379         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6380         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6381
6382         closest = isc_mem_get(mctx, sizeof(*closest));
6383         if (closest == NULL) {
6384                 result = ISC_R_NOMEMORY;
6385                 goto cleanup;
6386         }
6387         dns_name_init(&closest->name, NULL);
6388         closest->neg = NULL;
6389         closest->negsig = NULL;
6390         closest->type = neg.type;
6391         result = dns_name_dup(&name, mctx, &closest->name);
6392         if (result != ISC_R_SUCCESS)
6393                 goto cleanup;
6394         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6395         if (result != ISC_R_SUCCESS)
6396                 goto cleanup;
6397         closest->neg = r.base;
6398         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6399         if (result != ISC_R_SUCCESS)
6400                 goto cleanup;
6401         closest->negsig = r.base;
6402         dns_rdataset_disassociate(&neg);
6403         dns_rdataset_disassociate(&negsig);
6404         newheader->closest = closest;
6405         return (ISC_R_SUCCESS);
6406
6407  cleanup:
6408         dns_rdataset_disassociate(&neg);
6409         dns_rdataset_disassociate(&negsig);
6410         free_noqname(mctx, &closest);
6411         return(result);
6412 }
6413
6414 static dns_dbmethods_t zone_methods;
6415
6416 static isc_result_t
6417 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6418             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6419             dns_rdataset_t *addedrdataset)
6420 {
6421         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6422         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6423         rbtdb_version_t *rbtversion = version;
6424         isc_region_t region;
6425         rdatasetheader_t *newheader;
6426         rdatasetheader_t *header;
6427         isc_result_t result;
6428         isc_boolean_t delegating;
6429         isc_boolean_t newnsec;
6430         isc_boolean_t tree_locked = ISC_FALSE;
6431         isc_boolean_t cache_is_overmem = ISC_FALSE;
6432
6433         REQUIRE(VALID_RBTDB(rbtdb));
6434         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6435
6436         if (rbtdb->common.methods == &zone_methods)
6437                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6438                           (rdataset->type == dns_rdatatype_nsec3 ||
6439                            rdataset->covers == dns_rdatatype_nsec3)) ||
6440                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6441                            rdataset->type != dns_rdatatype_nsec3 &&
6442                            rdataset->covers != dns_rdatatype_nsec3)));
6443
6444         if (rbtversion == NULL) {
6445                 if (now == 0)
6446                         isc_stdtime_get(&now);
6447         } else
6448                 now = 0;
6449
6450         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6451                                             &region, sizeof(rdatasetheader_t));
6452         if (result != ISC_R_SUCCESS)
6453                 return (result);
6454
6455         newheader = (rdatasetheader_t *)region.base;
6456         init_rdataset(rbtdb, newheader);
6457         set_ttl(rbtdb, newheader, rdataset->ttl + now);
6458         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6459                                                 rdataset->covers);
6460         newheader->attributes = 0;
6461         newheader->noqname = NULL;
6462         newheader->closest = NULL;
6463         newheader->count = init_count++;
6464         newheader->trust = rdataset->trust;
6465         newheader->additional_auth = NULL;
6466         newheader->additional_glue = NULL;
6467         newheader->last_used = now;
6468         newheader->node = rbtnode;
6469         if (rbtversion != NULL) {
6470                 newheader->serial = rbtversion->serial;
6471                 now = 0;
6472
6473                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6474                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6475                         newheader->resign = rdataset->resign;
6476                 } else
6477                         newheader->resign = 0;
6478         } else {
6479                 newheader->serial = 1;
6480                 newheader->resign = 0;
6481                 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
6482                         newheader->attributes |= RDATASET_ATTR_NEGATIVE;
6483                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6484                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6485                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6486                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6487                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6488                         result = addnoqname(rbtdb, newheader, rdataset);
6489                         if (result != ISC_R_SUCCESS) {
6490                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6491                                               newheader);
6492                                 return (result);
6493                         }
6494                 }
6495                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6496                         result = addclosest(rbtdb, newheader, rdataset);
6497                         if (result != ISC_R_SUCCESS) {
6498                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6499                                               newheader);
6500                                 return (result);
6501                         }
6502                 }
6503         }
6504
6505         /*
6506          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6507          * just DNAME for the cache), then we need to set the callback bit
6508          * on the node.
6509          */
6510         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6511                 delegating = ISC_TRUE;
6512         else
6513                 delegating = ISC_FALSE;
6514
6515         /*
6516          * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6517          */
6518         if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6519             rdataset->type == dns_rdatatype_nsec)
6520                 newnsec = ISC_TRUE;
6521         else
6522                 newnsec = ISC_FALSE;
6523
6524         /*
6525          * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6526          * or the DB is a cache in an overmem state, hold an exclusive lock on
6527          * the tree.  In the latter case the lock does not necessarily have to
6528          * be acquired but it will help purge stale entries more effectively.
6529          */
6530         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6531                 cache_is_overmem = ISC_TRUE;
6532         if (delegating || newnsec || cache_is_overmem) {
6533                 tree_locked = ISC_TRUE;
6534                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6535         }
6536
6537         if (cache_is_overmem)
6538                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6539
6540         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6541                   isc_rwlocktype_write);
6542
6543         if (rbtdb->rrsetstats != NULL) {
6544                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6545                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6546         }
6547
6548         if (IS_CACHE(rbtdb)) {
6549                 if (tree_locked)
6550                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6551
6552                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6553                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6554                         expire_header(rbtdb, header, tree_locked);
6555
6556                 /*
6557                  * If we've been holding a write lock on the tree just for
6558                  * cleaning, we can release it now.  However, we still need the
6559                  * node lock.
6560                  */
6561                 if (tree_locked && !delegating && !newnsec) {
6562                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6563                         tree_locked = ISC_FALSE;
6564                 }
6565         }
6566
6567         result = ISC_R_SUCCESS;
6568         if (newnsec) {
6569                 dns_fixedname_t fname;
6570                 dns_name_t *name;
6571                 dns_rbtnode_t *nsecnode;
6572
6573                 dns_fixedname_init(&fname);
6574                 name = dns_fixedname_name(&fname);
6575                 dns_rbt_fullnamefromnode(rbtnode, name);
6576                 nsecnode = NULL;
6577                 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6578                 if (result == ISC_R_SUCCESS) {
6579                         nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6580                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6581                 } else if (result == ISC_R_EXISTS) {
6582                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6583                         result = ISC_R_SUCCESS;
6584                 }
6585         }
6586
6587         if (result == ISC_R_SUCCESS)
6588                 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6589                              ISC_FALSE, addedrdataset, now);
6590         if (result == ISC_R_SUCCESS && delegating)
6591                 rbtnode->find_callback = 1;
6592
6593         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6594                     isc_rwlocktype_write);
6595
6596         if (tree_locked)
6597                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6598
6599         /*
6600          * Update the zone's secure status.  If version is non-NULL
6601          * this is deferred until closeversion() is called.
6602          */
6603         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6604                 iszonesecure(db, version, rbtdb->origin_node);
6605
6606         return (result);
6607 }
6608
6609 static isc_result_t
6610 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6611                  dns_rdataset_t *rdataset, unsigned int options,
6612                  dns_rdataset_t *newrdataset)
6613 {
6614         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6615         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6616         rbtdb_version_t *rbtversion = version;
6617         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6618         unsigned char *subresult;
6619         isc_region_t region;
6620         isc_result_t result;
6621         rbtdb_changed_t *changed;
6622
6623         REQUIRE(VALID_RBTDB(rbtdb));
6624         REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
6625
6626         if (rbtdb->common.methods == &zone_methods)
6627                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6628                           (rdataset->type == dns_rdatatype_nsec3 ||
6629                            rdataset->covers == dns_rdatatype_nsec3)) ||
6630                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6631                            rdataset->type != dns_rdatatype_nsec3 &&
6632                            rdataset->covers != dns_rdatatype_nsec3)));
6633
6634         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6635                                             &region,
6636                                             sizeof(rdatasetheader_t));
6637         if (result != ISC_R_SUCCESS)
6638                 return (result);
6639         newheader = (rdatasetheader_t *)region.base;
6640         init_rdataset(rbtdb, newheader);
6641         set_ttl(rbtdb, newheader, rdataset->ttl);
6642         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6643                                                 rdataset->covers);
6644         newheader->attributes = 0;
6645         newheader->serial = rbtversion->serial;
6646         newheader->trust = 0;
6647         newheader->noqname = NULL;
6648         newheader->closest = NULL;
6649         newheader->count = init_count++;
6650         newheader->additional_auth = NULL;
6651         newheader->additional_glue = NULL;
6652         newheader->last_used = 0;
6653         newheader->node = rbtnode;
6654         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6655                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6656                 newheader->resign = rdataset->resign;
6657         } else
6658                 newheader->resign = 0;
6659
6660         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6661                   isc_rwlocktype_write);
6662
6663         changed = add_changed(rbtdb, rbtversion, rbtnode);
6664         if (changed == NULL) {
6665                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6666                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6667                             isc_rwlocktype_write);
6668                 return (ISC_R_NOMEMORY);
6669         }
6670
6671         topheader_prev = NULL;
6672         for (topheader = rbtnode->data;
6673              topheader != NULL;
6674              topheader = topheader->next) {
6675                 if (topheader->type == newheader->type)
6676                         break;
6677                 topheader_prev = topheader;
6678         }
6679         /*
6680          * If header isn't NULL, we've found the right type.  There may be
6681          * IGNORE rdatasets between the top of the chain and the first real
6682          * data.  We skip over them.
6683          */
6684         header = topheader;
6685         while (header != NULL && IGNORE(header))
6686                 header = header->down;
6687         if (header != NULL && EXISTS(header)) {
6688                 unsigned int flags = 0;
6689                 subresult = NULL;
6690                 result = ISC_R_SUCCESS;
6691                 if ((options & DNS_DBSUB_EXACT) != 0) {
6692                         flags |= DNS_RDATASLAB_EXACT;
6693                         if (newheader->rdh_ttl != header->rdh_ttl)
6694                                 result = DNS_R_NOTEXACT;
6695                 }
6696                 if (result == ISC_R_SUCCESS)
6697                         result = dns_rdataslab_subtract(
6698                                         (unsigned char *)header,
6699                                         (unsigned char *)newheader,
6700                                         (unsigned int)(sizeof(*newheader)),
6701                                         rbtdb->common.mctx,
6702                                         rbtdb->common.rdclass,
6703                                         (dns_rdatatype_t)header->type,
6704                                         flags, &subresult);
6705                 if (result == ISC_R_SUCCESS) {
6706                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6707                         newheader = (rdatasetheader_t *)subresult;
6708                         init_rdataset(rbtdb, newheader);
6709                         /*
6710                          * We have to set the serial since the rdataslab
6711                          * subtraction routine copies the reserved portion of
6712                          * header, not newheader.
6713                          */
6714                         newheader->serial = rbtversion->serial;
6715                         /*
6716                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6717                          * to additional info.  We need to clear these fields
6718                          * to avoid having duplicated references.
6719                          */
6720                         newheader->additional_auth = NULL;
6721                         newheader->additional_glue = NULL;
6722                 } else if (result == DNS_R_NXRRSET) {
6723                         /*
6724                          * This subtraction would remove all of the rdata;
6725                          * add a nonexistent header instead.
6726                          */
6727                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6728                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6729                         if (newheader == NULL) {
6730                                 result = ISC_R_NOMEMORY;
6731                                 goto unlock;
6732                         }
6733                         set_ttl(rbtdb, newheader, 0);
6734                         newheader->type = topheader->type;
6735                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6736                         newheader->trust = 0;
6737                         newheader->serial = rbtversion->serial;
6738                         newheader->noqname = NULL;
6739                         newheader->closest = NULL;
6740                         newheader->count = 0;
6741                         newheader->additional_auth = NULL;
6742                         newheader->additional_glue = NULL;
6743                         newheader->node = rbtnode;
6744                         newheader->resign = 0;
6745                         newheader->last_used = 0;
6746                 } else {
6747                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6748                         goto unlock;
6749                 }
6750
6751                 /*
6752                  * If we're here, we want to link newheader in front of
6753                  * topheader.
6754                  */
6755                 INSIST(rbtversion->serial >= topheader->serial);
6756                 if (topheader_prev != NULL)
6757                         topheader_prev->next = newheader;
6758                 else
6759                         rbtnode->data = newheader;
6760                 newheader->next = topheader->next;
6761                 newheader->down = topheader;
6762                 topheader->next = newheader;
6763                 rbtnode->dirty = 1;
6764                 changed->dirty = ISC_TRUE;
6765         } else {
6766                 /*
6767                  * The rdataset doesn't exist, so we don't need to do anything
6768                  * to satisfy the deletion request.
6769                  */
6770                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6771                 if ((options & DNS_DBSUB_EXACT) != 0)
6772                         result = DNS_R_NOTEXACT;
6773                 else
6774                         result = DNS_R_UNCHANGED;
6775         }
6776
6777         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6778                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6779
6780  unlock:
6781         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6782                     isc_rwlocktype_write);
6783
6784         /*
6785          * Update the zone's secure status.  If version is non-NULL
6786          * this is deferred until closeversion() is called.
6787          */
6788         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6789                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6790
6791         return (result);
6792 }
6793
6794 static isc_result_t
6795 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6796                dns_rdatatype_t type, dns_rdatatype_t covers)
6797 {
6798         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6799         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6800         rbtdb_version_t *rbtversion = version;
6801         isc_result_t result;
6802         rdatasetheader_t *newheader;
6803
6804         REQUIRE(VALID_RBTDB(rbtdb));
6805         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
6806
6807         if (type == dns_rdatatype_any)
6808                 return (ISC_R_NOTIMPLEMENTED);
6809         if (type == dns_rdatatype_rrsig && covers == 0)
6810                 return (ISC_R_NOTIMPLEMENTED);
6811
6812         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6813         if (newheader == NULL)
6814                 return (ISC_R_NOMEMORY);
6815         set_ttl(rbtdb, newheader, 0);
6816         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6817         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6818         newheader->trust = 0;
6819         newheader->noqname = NULL;
6820         newheader->closest = NULL;
6821         newheader->additional_auth = NULL;
6822         newheader->additional_glue = NULL;
6823         if (rbtversion != NULL)
6824                 newheader->serial = rbtversion->serial;
6825         else
6826                 newheader->serial = 0;
6827         newheader->count = 0;
6828         newheader->last_used = 0;
6829         newheader->node = rbtnode;
6830
6831         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6832                   isc_rwlocktype_write);
6833
6834         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6835                      ISC_FALSE, NULL, 0);
6836
6837         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6838                     isc_rwlocktype_write);
6839
6840         /*
6841          * Update the zone's secure status.  If version is non-NULL
6842          * this is deferred until closeversion() is called.
6843          */
6844         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6845                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6846
6847         return (result);
6848 }
6849
6850 /*
6851  * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6852  */
6853 static isc_result_t
6854 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6855          isc_boolean_t hasnsec)
6856 {
6857         isc_result_t noderesult, nsecresult;
6858         dns_rbtnode_t *nsecnode;
6859
6860         noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6861
6862 #ifdef BIND9
6863         if (noderesult == ISC_R_SUCCESS)
6864                 dns_rpz_cidr_addip(rbtdb->rpz_cidr, name);
6865 #endif
6866
6867         if (!hasnsec)
6868                 return (noderesult);
6869         if (noderesult == ISC_R_EXISTS) {
6870                 /*
6871                  * Add a node to the auxiliary NSEC tree for an old node
6872                  * just now getting an NSEC record.
6873                  */
6874                 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6875                         return (noderesult);
6876         } else if (noderesult != ISC_R_SUCCESS) {
6877                 return (noderesult);
6878         }
6879
6880         /*
6881          * Build the auxiliary tree for NSECs as we go.
6882          * This tree speeds searches for closest NSECs that would otherwise
6883          * need to examine many irrelevant nodes in large TLDs.
6884          *
6885          * Add nodes to the auxiliary tree after corresponding nodes have
6886          * been added to the main tree.
6887          */
6888         nsecnode = NULL;
6889         nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6890         if (nsecresult == ISC_R_SUCCESS) {
6891                 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6892                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6893                 return (noderesult);
6894         }
6895
6896         if (nsecresult == ISC_R_EXISTS) {
6897 #if 1 /* 0 */
6898                 isc_log_write(dns_lctx,
6899                               DNS_LOGCATEGORY_DATABASE,
6900                               DNS_LOGMODULE_CACHE,
6901                               ISC_LOG_WARNING,
6902                               "addnode: NSEC node already exists");
6903 #endif
6904                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6905                 return (noderesult);
6906         }
6907
6908         nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6909         if (nsecresult != ISC_R_SUCCESS)
6910                 isc_log_write(dns_lctx,
6911                               DNS_LOGCATEGORY_DATABASE,
6912                               DNS_LOGMODULE_CACHE,
6913                               ISC_LOG_WARNING,
6914                               "loading_addrdataset: "
6915                               "dns_rbt_deletenode: %s after "
6916                               "dns_rbt_addnode(NSEC): %s",
6917                               isc_result_totext(nsecresult),
6918                               isc_result_totext(noderesult));
6919         return (noderesult);
6920 }
6921
6922 static isc_result_t
6923 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6924         rbtdb_load_t *loadctx = arg;
6925         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6926         dns_rbtnode_t *node;
6927         isc_result_t result;
6928         isc_region_t region;
6929         rdatasetheader_t *newheader;
6930
6931         /*
6932          * This routine does no node locking.  See comments in
6933          * 'load' below for more information on loading and
6934          * locking.
6935          */
6936
6937
6938         /*
6939          * SOA records are only allowed at top of zone.
6940          */
6941         if (rdataset->type == dns_rdatatype_soa &&
6942             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6943                 return (DNS_R_NOTZONETOP);
6944
6945         if (rdataset->type != dns_rdatatype_nsec3 &&
6946             rdataset->covers != dns_rdatatype_nsec3)
6947                 add_empty_wildcards(rbtdb, name);
6948
6949         if (dns_name_iswildcard(name)) {
6950                 /*
6951                  * NS record owners cannot legally be wild cards.
6952                  */
6953                 if (rdataset->type == dns_rdatatype_ns)
6954                         return (DNS_R_INVALIDNS);
6955                 /*
6956                  * NSEC3 record owners cannot legally be wild cards.
6957                  */
6958                 if (rdataset->type == dns_rdatatype_nsec3)
6959                         return (DNS_R_INVALIDNSEC3);
6960                 result = add_wildcard_magic(rbtdb, name);
6961                 if (result != ISC_R_SUCCESS)
6962                         return (result);
6963         }
6964
6965         node = NULL;
6966         if (rdataset->type == dns_rdatatype_nsec3 ||
6967             rdataset->covers == dns_rdatatype_nsec3) {
6968                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6969                 if (result == ISC_R_SUCCESS)
6970                         node->nsec = DNS_RBT_NSEC_NSEC3;
6971         } else if (rdataset->type == dns_rdatatype_nsec) {
6972                 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6973         } else {
6974                 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6975         }
6976         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6977                 return (result);
6978         if (result == ISC_R_SUCCESS) {
6979                 dns_name_t foundname;
6980                 dns_name_init(&foundname, NULL);
6981                 dns_rbt_namefromnode(node, &foundname);
6982 #ifdef DNS_RBT_USEHASH
6983                 node->locknum = node->hashval % rbtdb->node_lock_count;
6984 #else
6985                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6986                         rbtdb->node_lock_count;
6987 #endif
6988         }
6989
6990         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6991                                             &region,
6992                                             sizeof(rdatasetheader_t));
6993         if (result != ISC_R_SUCCESS)
6994                 return (result);
6995         newheader = (rdatasetheader_t *)region.base;
6996         init_rdataset(rbtdb, newheader);
6997         set_ttl(rbtdb, newheader,
6998                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6999         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
7000                                                 rdataset->covers);
7001         newheader->attributes = 0;
7002         newheader->trust = rdataset->trust;
7003         newheader->serial = 1;
7004         newheader->noqname = NULL;
7005         newheader->closest = NULL;
7006         newheader->count = init_count++;
7007         newheader->additional_auth = NULL;
7008         newheader->additional_glue = NULL;
7009         newheader->last_used = 0;
7010         newheader->node = node;
7011         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
7012                 newheader->attributes |= RDATASET_ATTR_RESIGN;
7013                 newheader->resign = rdataset->resign;
7014         } else
7015                 newheader->resign = 0;
7016
7017         result = add(rbtdb, node, rbtdb->current_version, newheader,
7018                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
7019         if (result == ISC_R_SUCCESS &&
7020             delegating_type(rbtdb, node, rdataset->type))
7021                 node->find_callback = 1;
7022         else if (result == DNS_R_UNCHANGED)
7023                 result = ISC_R_SUCCESS;
7024
7025         return (result);
7026 }
7027
7028 static isc_result_t
7029 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
7030         rbtdb_load_t *loadctx;
7031         dns_rbtdb_t *rbtdb;
7032
7033         rbtdb = (dns_rbtdb_t *)db;
7034
7035         REQUIRE(VALID_RBTDB(rbtdb));
7036
7037         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
7038         if (loadctx == NULL)
7039                 return (ISC_R_NOMEMORY);
7040
7041         loadctx->rbtdb = rbtdb;
7042         if (IS_CACHE(rbtdb))
7043                 isc_stdtime_get(&loadctx->now);
7044         else
7045                 loadctx->now = 0;
7046
7047         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7048
7049         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
7050                 == 0);
7051         rbtdb->attributes |= RBTDB_ATTR_LOADING;
7052
7053         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7054
7055         *addp = loading_addrdataset;
7056         *dbloadp = loadctx;
7057
7058         return (ISC_R_SUCCESS);
7059 }
7060
7061 static isc_result_t
7062 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
7063         rbtdb_load_t *loadctx;
7064         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7065
7066         REQUIRE(VALID_RBTDB(rbtdb));
7067         REQUIRE(dbloadp != NULL);
7068         loadctx = *dbloadp;
7069         REQUIRE(loadctx->rbtdb == rbtdb);
7070
7071         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7072
7073         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
7074         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
7075
7076         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
7077         rbtdb->attributes |= RBTDB_ATTR_LOADED;
7078
7079         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7080
7081         /*
7082          * If there's a KEY rdataset at the zone origin containing a
7083          * zone key, we consider the zone secure.
7084          */
7085         if (! IS_CACHE(rbtdb))
7086                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
7087
7088         *dbloadp = NULL;
7089
7090         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
7091
7092         return (ISC_R_SUCCESS);
7093 }
7094
7095 static isc_result_t
7096 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
7097      dns_masterformat_t masterformat) {
7098         dns_rbtdb_t *rbtdb;
7099         rbtdb_version_t *rbtversion = version;
7100
7101         rbtdb = (dns_rbtdb_t *)db;
7102
7103         REQUIRE(VALID_RBTDB(rbtdb));
7104         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7105
7106 #ifdef BIND9
7107         return (dns_master_dump2(rbtdb->common.mctx, db, version,
7108                                  &dns_master_style_default,
7109                                  filename, masterformat));
7110 #else
7111         UNUSED(version);
7112         UNUSED(filename);
7113         UNUSED(masterformat);
7114
7115         return (ISC_R_NOTIMPLEMENTED);
7116 #endif /* BIND9 */
7117 }
7118
7119 static void
7120 delete_callback(void *data, void *arg) {
7121         dns_rbtdb_t *rbtdb = arg;
7122         rdatasetheader_t *current, *next;
7123         unsigned int locknum;
7124
7125         current = data;
7126         locknum = current->node->locknum;
7127         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7128         while (current != NULL) {
7129                 next = current->next;
7130                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
7131                 current = next;
7132         }
7133         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
7134 }
7135
7136 static isc_boolean_t
7137 issecure(dns_db_t *db) {
7138         dns_rbtdb_t *rbtdb;
7139         isc_boolean_t secure;
7140
7141         rbtdb = (dns_rbtdb_t *)db;
7142
7143         REQUIRE(VALID_RBTDB(rbtdb));
7144
7145         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7146         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
7147         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7148
7149         return (secure);
7150 }
7151
7152 static isc_boolean_t
7153 isdnssec(dns_db_t *db) {
7154         dns_rbtdb_t *rbtdb;
7155         isc_boolean_t dnssec;
7156
7157         rbtdb = (dns_rbtdb_t *)db;
7158
7159         REQUIRE(VALID_RBTDB(rbtdb));
7160
7161         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7162         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
7163         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7164
7165         return (dnssec);
7166 }
7167
7168 static unsigned int
7169 nodecount(dns_db_t *db) {
7170         dns_rbtdb_t *rbtdb;
7171         unsigned int count;
7172
7173         rbtdb = (dns_rbtdb_t *)db;
7174
7175         REQUIRE(VALID_RBTDB(rbtdb));
7176
7177         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7178         count = dns_rbt_nodecount(rbtdb->tree);
7179         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7180
7181         return (count);
7182 }
7183
7184 static void
7185 settask(dns_db_t *db, isc_task_t *task) {
7186         dns_rbtdb_t *rbtdb;
7187
7188         rbtdb = (dns_rbtdb_t *)db;
7189
7190         REQUIRE(VALID_RBTDB(rbtdb));
7191
7192         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7193         if (rbtdb->task != NULL)
7194                 isc_task_detach(&rbtdb->task);
7195         if (task != NULL)
7196                 isc_task_attach(task, &rbtdb->task);
7197         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7198 }
7199
7200 static isc_boolean_t
7201 ispersistent(dns_db_t *db) {
7202         UNUSED(db);
7203         return (ISC_FALSE);
7204 }
7205
7206 static isc_result_t
7207 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
7208         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7209         dns_rbtnode_t *onode;
7210         isc_result_t result = ISC_R_SUCCESS;
7211
7212         REQUIRE(VALID_RBTDB(rbtdb));
7213         REQUIRE(nodep != NULL && *nodep == NULL);
7214
7215         /* Note that the access to origin_node doesn't require a DB lock */
7216         onode = (dns_rbtnode_t *)rbtdb->origin_node;
7217         if (onode != NULL) {
7218                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
7219                 new_reference(rbtdb, onode);
7220                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
7221
7222                 *nodep = rbtdb->origin_node;
7223         } else {
7224                 INSIST(IS_CACHE(rbtdb));
7225                 result = ISC_R_NOTFOUND;
7226         }
7227
7228         return (result);
7229 }
7230
7231 static isc_result_t
7232 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
7233                    isc_uint8_t *flags, isc_uint16_t *iterations,
7234                    unsigned char *salt, size_t *salt_length)
7235 {
7236         dns_rbtdb_t *rbtdb;
7237         isc_result_t result = ISC_R_NOTFOUND;
7238         rbtdb_version_t *rbtversion = version;
7239
7240         rbtdb = (dns_rbtdb_t *)db;
7241
7242         REQUIRE(VALID_RBTDB(rbtdb));
7243         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
7244
7245         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7246
7247         if (rbtversion == NULL)
7248                 rbtversion = rbtdb->current_version;
7249
7250         if (rbtversion->havensec3) {
7251                 if (hash != NULL)
7252                         *hash = rbtversion->hash;
7253                 if (salt != NULL && salt_length != NULL) {
7254                         REQUIRE(*salt_length >= rbtversion->salt_length);
7255                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
7256                 }
7257                 if (salt_length != NULL)
7258                         *salt_length = rbtversion->salt_length;
7259                 if (iterations != NULL)
7260                         *iterations = rbtversion->iterations;
7261                 if (flags != NULL)
7262                         *flags = rbtversion->flags;
7263                 result = ISC_R_SUCCESS;
7264         }
7265         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7266
7267         return (result);
7268 }
7269
7270 static isc_result_t
7271 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
7272         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7273         isc_stdtime_t oldresign;
7274         isc_result_t result = ISC_R_SUCCESS;
7275         rdatasetheader_t *header;
7276
7277         REQUIRE(VALID_RBTDB(rbtdb));
7278         REQUIRE(!IS_CACHE(rbtdb));
7279         REQUIRE(rdataset != NULL);
7280
7281         header = rdataset->private3;
7282         header--;
7283
7284         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
7285                   isc_rwlocktype_write);
7286
7287         oldresign = header->resign;
7288         header->resign = resign;
7289         if (header->heap_index != 0) {
7290                 INSIST(RESIGN(header));
7291                 if (resign == 0) {
7292                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
7293                                         header->heap_index);
7294                         header->heap_index = 0;
7295                 } else if (resign < oldresign)
7296                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
7297                                            header->heap_index);
7298                 else if (resign > oldresign)
7299                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
7300                                            header->heap_index);
7301         } else if (resign && header->heap_index == 0) {
7302                 header->attributes |= RDATASET_ATTR_RESIGN;
7303                 result = resign_insert(rbtdb, header->node->locknum, header);
7304         }
7305         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7306                     isc_rwlocktype_write);
7307         return (result);
7308 }
7309
7310 static isc_result_t
7311 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7312                dns_name_t *foundname)
7313 {
7314         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7315         rdatasetheader_t *header = NULL, *this;
7316         unsigned int i;
7317         isc_result_t result = ISC_R_NOTFOUND;
7318         unsigned int locknum;
7319
7320         REQUIRE(VALID_RBTDB(rbtdb));
7321
7322         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7323
7324         for (i = 0; i < rbtdb->node_lock_count; i++) {
7325                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7326                 this = isc_heap_element(rbtdb->heaps[i], 1);
7327                 if (this == NULL) {
7328                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7329                                     isc_rwlocktype_read);
7330                         continue;
7331                 }
7332                 if (header == NULL)
7333                         header = this;
7334                 else if (isc_serial_lt(this->resign, header->resign)) {
7335                         locknum = header->node->locknum;
7336                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7337                                     isc_rwlocktype_read);
7338                         header = this;
7339                 } else
7340                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7341                                     isc_rwlocktype_read);
7342         }
7343
7344         if (header == NULL)
7345                 goto unlock;
7346
7347         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7348
7349         if (foundname != NULL)
7350                 dns_rbt_fullnamefromnode(header->node, foundname);
7351
7352         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7353                     isc_rwlocktype_read);
7354
7355         result = ISC_R_SUCCESS;
7356
7357  unlock:
7358         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7359
7360         return (result);
7361 }
7362
7363 static void
7364 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7365 {
7366         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7367         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7368         dns_rbtnode_t *node;
7369         rdatasetheader_t *header;
7370
7371         REQUIRE(VALID_RBTDB(rbtdb));
7372         REQUIRE(rdataset != NULL);
7373         REQUIRE(rdataset->methods == &rdataset_methods);
7374         REQUIRE(rbtdb->future_version == rbtversion);
7375         REQUIRE(rbtversion != NULL);
7376         REQUIRE(rbtversion->writer);
7377         REQUIRE(rbtversion->rbtdb == rbtdb);
7378
7379         node = rdataset->private2;
7380         INSIST(node != NULL);
7381         header = rdataset->private3;
7382         INSIST(header != NULL);
7383         header--;
7384
7385         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7386         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7387                   isc_rwlocktype_write);
7388         /*
7389          * Delete from heap and save to re-signed list so that it can
7390          * be restored if we backout of this change.
7391          */
7392         new_reference(rbtdb, node);
7393         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7394         header->heap_index = 0;
7395         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7396
7397         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7398                     isc_rwlocktype_write);
7399         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7400 }
7401
7402 static dns_stats_t *
7403 getrrsetstats(dns_db_t *db) {
7404         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7405
7406         REQUIRE(VALID_RBTDB(rbtdb));
7407         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7408
7409         return (rbtdb->rrsetstats);
7410 }
7411
7412 static dns_dbmethods_t zone_methods = {
7413         attach,
7414         detach,
7415         beginload,
7416         endload,
7417         dump,
7418         currentversion,
7419         newversion,
7420         attachversion,
7421         closeversion,
7422         findnode,
7423         zone_find,
7424         zone_findzonecut,
7425         attachnode,
7426         detachnode,
7427         expirenode,
7428         printnode,
7429         createiterator,
7430         zone_findrdataset,
7431         allrdatasets,
7432         addrdataset,
7433         subtractrdataset,
7434         deleterdataset,
7435         issecure,
7436         nodecount,
7437         ispersistent,
7438         overmem,
7439         settask,
7440         getoriginnode,
7441         NULL,
7442         getnsec3parameters,
7443         findnsec3node,
7444         setsigningtime,
7445         getsigningtime,
7446         resigned,
7447         isdnssec,
7448         NULL,
7449 #ifdef BIND9
7450         get_rpz_enabled,
7451         rpz_findips
7452 #else
7453         NULL,
7454         NULL
7455 #endif
7456 };
7457
7458 static dns_dbmethods_t cache_methods = {
7459         attach,
7460         detach,
7461         beginload,
7462         endload,
7463         dump,
7464         currentversion,
7465         newversion,
7466         attachversion,
7467         closeversion,
7468         findnode,
7469         cache_find,
7470         cache_findzonecut,
7471         attachnode,
7472         detachnode,
7473         expirenode,
7474         printnode,
7475         createiterator,
7476         cache_findrdataset,
7477         allrdatasets,
7478         addrdataset,
7479         subtractrdataset,
7480         deleterdataset,
7481         issecure,
7482         nodecount,
7483         ispersistent,
7484         overmem,
7485         settask,
7486         getoriginnode,
7487         NULL,
7488         NULL,
7489         NULL,
7490         NULL,
7491         NULL,
7492         NULL,
7493         isdnssec,
7494         getrrsetstats,
7495         NULL,
7496         NULL
7497 };
7498
7499 isc_result_t
7500 #ifdef DNS_RBTDB_VERSION64
7501 dns_rbtdb64_create
7502 #else
7503 dns_rbtdb_create
7504 #endif
7505                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7506                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7507                  void *driverarg, dns_db_t **dbp)
7508 {
7509         dns_rbtdb_t *rbtdb;
7510         isc_result_t result;
7511         int i;
7512         dns_name_t name;
7513         isc_boolean_t (*sooner)(void *, void *);
7514         isc_mem_t *hmctx = mctx;
7515
7516         /* Keep the compiler happy. */
7517         UNUSED(driverarg);
7518
7519         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7520         if (rbtdb == NULL)
7521                 return (ISC_R_NOMEMORY);
7522
7523         /*
7524          * If argv[0] exists, it points to a memory context to use for heap
7525          */
7526         if (argc != 0)
7527                 hmctx = (isc_mem_t *) argv[0];
7528
7529         memset(rbtdb, '\0', sizeof(*rbtdb));
7530         dns_name_init(&rbtdb->common.origin, NULL);
7531         rbtdb->common.attributes = 0;
7532         if (type == dns_dbtype_cache) {
7533                 rbtdb->common.methods = &cache_methods;
7534                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7535         } else if (type == dns_dbtype_stub) {
7536                 rbtdb->common.methods = &zone_methods;
7537                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7538         } else
7539                 rbtdb->common.methods = &zone_methods;
7540         rbtdb->common.rdclass = rdclass;
7541         rbtdb->common.mctx = NULL;
7542
7543         result = RBTDB_INITLOCK(&rbtdb->lock);
7544         if (result != ISC_R_SUCCESS)
7545                 goto cleanup_rbtdb;
7546
7547         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7548         if (result != ISC_R_SUCCESS)
7549                 goto cleanup_lock;
7550
7551         /*
7552          * Initialize node_lock_count in a generic way to support future
7553          * extension which allows the user to specify this value on creation.
7554          * Note that when specified for a cache DB it must be larger than 1
7555          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7556          */
7557         if (rbtdb->node_lock_count == 0) {
7558                 if (IS_CACHE(rbtdb))
7559                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7560                 else
7561                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7562         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7563                 result = ISC_R_RANGE;
7564                 goto cleanup_tree_lock;
7565         }
7566         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7567         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7568                                         sizeof(rbtdb_nodelock_t));
7569         if (rbtdb->node_locks == NULL) {
7570                 result = ISC_R_NOMEMORY;
7571                 goto cleanup_tree_lock;
7572         }
7573
7574         rbtdb->rrsetstats = NULL;
7575         if (IS_CACHE(rbtdb)) {
7576                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7577                 if (result != ISC_R_SUCCESS)
7578                         goto cleanup_node_locks;
7579                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7580                                                sizeof(rdatasetheaderlist_t));
7581                 if (rbtdb->rdatasets == NULL) {
7582                         result = ISC_R_NOMEMORY;
7583                         goto cleanup_rrsetstats;
7584                 }
7585                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7586                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
7587         } else
7588                 rbtdb->rdatasets = NULL;
7589
7590         /*
7591          * Create the heaps.
7592          */
7593         rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
7594                                    sizeof(isc_heap_t *));
7595         if (rbtdb->heaps == NULL) {
7596                 result = ISC_R_NOMEMORY;
7597                 goto cleanup_rdatasets;
7598         }
7599         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7600                 rbtdb->heaps[i] = NULL;
7601         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7602         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7603                 result = isc_heap_create(hmctx, sooner, set_index, 0,
7604                                          &rbtdb->heaps[i]);
7605                 if (result != ISC_R_SUCCESS)
7606                         goto cleanup_heaps;
7607         }
7608
7609         /*
7610          * Create deadnode lists.
7611          */
7612         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7613                                        sizeof(rbtnodelist_t));
7614         if (rbtdb->deadnodes == NULL) {
7615                 result = ISC_R_NOMEMORY;
7616                 goto cleanup_heaps;
7617         }
7618         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7619                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7620
7621         rbtdb->active = rbtdb->node_lock_count;
7622
7623         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7624                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7625                 if (result == ISC_R_SUCCESS) {
7626                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7627                         if (result != ISC_R_SUCCESS)
7628                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7629                 }
7630                 if (result != ISC_R_SUCCESS) {
7631                         while (i-- > 0) {
7632                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7633                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7634                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7635                         }
7636                         goto cleanup_deadnodes;
7637                 }
7638                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7639         }
7640
7641         /*
7642          * Attach to the mctx.  The database will persist so long as there
7643          * are references to it, and attaching to the mctx ensures that our
7644          * mctx won't disappear out from under us.
7645          */
7646         isc_mem_attach(mctx, &rbtdb->common.mctx);
7647         isc_mem_attach(hmctx, &rbtdb->hmctx);
7648
7649         /*
7650          * Must be initialized before free_rbtdb() is called.
7651          */
7652         isc_ondestroy_init(&rbtdb->common.ondest);
7653
7654         /*
7655          * Make a copy of the origin name.
7656          */
7657         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7658         if (result != ISC_R_SUCCESS) {
7659                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7660                 return (result);
7661         }
7662
7663         /*
7664          * Make the Red-Black Trees.
7665          */
7666         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7667         if (result != ISC_R_SUCCESS) {
7668                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7669                 return (result);
7670         }
7671
7672         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7673         if (result != ISC_R_SUCCESS) {
7674                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7675                 return (result);
7676         }
7677
7678         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7679         if (result != ISC_R_SUCCESS) {
7680                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7681                 return (result);
7682         }
7683
7684 #ifdef BIND9
7685         /*
7686          * Get ready for response policy IP address searching if at least one
7687          * zone has been configured as a response policy zone and this
7688          * is not a cache zone.
7689          * It would be better to know that this database is for a policy
7690          * zone named for a view, but that would require knowledge from
7691          * above such as an argv[] set from data in the zone.
7692          */
7693         if (type == dns_dbtype_zone && !dns_name_equal(origin, dns_rootname)) {
7694                 result = dns_rpz_new_cidr(mctx, origin, &rbtdb->rpz_cidr);
7695                 if (result != ISC_R_SUCCESS) {
7696                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7697                         return (result);
7698                 }
7699         }
7700 #endif
7701
7702         /*
7703          * In order to set the node callback bit correctly in zone databases,
7704          * we need to know if the node has the origin name of the zone.
7705          * In loading_addrdataset() we could simply compare the new name
7706          * to the origin name, but this is expensive.  Also, we don't know the
7707          * node name in addrdataset(), so we need another way of knowing the
7708          * zone's top.
7709          *
7710          * We now explicitly create a node for the zone's origin, and then
7711          * we simply remember the node's address.  This is safe, because
7712          * the top-of-zone node can never be deleted, nor can its address
7713          * change.
7714          */
7715         if (!IS_CACHE(rbtdb)) {
7716                 dns_rbtnode_t *nsec3node;
7717
7718                 rbtdb->origin_node = NULL;
7719                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7720                                          &rbtdb->origin_node);
7721                 if (result != ISC_R_SUCCESS) {
7722                         INSIST(result != ISC_R_EXISTS);
7723                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7724                         return (result);
7725                 }
7726                 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7727                 /*
7728                  * We need to give the origin node the right locknum.
7729                  */
7730                 dns_name_init(&name, NULL);
7731                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7732 #ifdef DNS_RBT_USEHASH
7733                 rbtdb->origin_node->locknum =
7734                         rbtdb->origin_node->hashval %
7735                         rbtdb->node_lock_count;
7736 #else
7737                 rbtdb->origin_node->locknum =
7738                         dns_name_hash(&name, ISC_TRUE) %
7739                         rbtdb->node_lock_count;
7740 #endif
7741                 /*
7742                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
7743                  * return partial matches when there is only a single NSEC3
7744                  * record in the tree.
7745                  */
7746                 nsec3node = NULL;
7747                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
7748                                          &nsec3node);
7749                 if (result != ISC_R_SUCCESS) {
7750                         INSIST(result != ISC_R_EXISTS);
7751                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7752                         return (result);
7753                 }
7754                 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
7755                 /*
7756                  * We need to give the nsec3 origin node the right locknum.
7757                  */
7758                 dns_name_init(&name, NULL);
7759                 dns_rbt_namefromnode(nsec3node, &name);
7760 #ifdef DNS_RBT_USEHASH
7761                 nsec3node->locknum = nsec3node->hashval %
7762                         rbtdb->node_lock_count;
7763 #else
7764                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
7765                         rbtdb->node_lock_count;
7766 #endif
7767         }
7768
7769         /*
7770          * Misc. Initialization.
7771          */
7772         result = isc_refcount_init(&rbtdb->references, 1);
7773         if (result != ISC_R_SUCCESS) {
7774                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7775                 return (result);
7776         }
7777         rbtdb->attributes = 0;
7778         rbtdb->task = NULL;
7779
7780         /*
7781          * Version Initialization.
7782          */
7783         rbtdb->current_serial = 1;
7784         rbtdb->least_serial = 1;
7785         rbtdb->next_serial = 2;
7786         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7787         if (rbtdb->current_version == NULL) {
7788                 isc_refcount_decrement(&rbtdb->references, NULL);
7789                 isc_refcount_destroy(&rbtdb->references);
7790                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7791                 return (ISC_R_NOMEMORY);
7792         }
7793         rbtdb->current_version->rbtdb = rbtdb;
7794         rbtdb->current_version->secure = dns_db_insecure;
7795         rbtdb->current_version->havensec3 = ISC_FALSE;
7796         rbtdb->current_version->flags = 0;
7797         rbtdb->current_version->iterations = 0;
7798         rbtdb->current_version->hash = 0;
7799         rbtdb->current_version->salt_length = 0;
7800         memset(rbtdb->current_version->salt, 0,
7801                sizeof(rbtdb->current_version->salt));
7802         rbtdb->future_version = NULL;
7803         ISC_LIST_INIT(rbtdb->open_versions);
7804         /*
7805          * Keep the current version in the open list so that list operation
7806          * won't happen in normal lookup operations.
7807          */
7808         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7809
7810         rbtdb->common.magic = DNS_DB_MAGIC;
7811         rbtdb->common.impmagic = RBTDB_MAGIC;
7812
7813         *dbp = (dns_db_t *)rbtdb;
7814
7815         return (ISC_R_SUCCESS);
7816
7817  cleanup_deadnodes:
7818         isc_mem_put(mctx, rbtdb->deadnodes,
7819                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7820
7821  cleanup_heaps:
7822         if (rbtdb->heaps != NULL) {
7823                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7824                         if (rbtdb->heaps[i] != NULL)
7825                                 isc_heap_destroy(&rbtdb->heaps[i]);
7826                 isc_mem_put(hmctx, rbtdb->heaps,
7827                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7828         }
7829
7830  cleanup_rdatasets:
7831         if (rbtdb->rdatasets != NULL)
7832                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7833                             sizeof(rdatasetheaderlist_t));
7834  cleanup_rrsetstats:
7835         if (rbtdb->rrsetstats != NULL)
7836                 dns_stats_detach(&rbtdb->rrsetstats);
7837
7838  cleanup_node_locks:
7839         isc_mem_put(mctx, rbtdb->node_locks,
7840                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7841
7842  cleanup_tree_lock:
7843         isc_rwlock_destroy(&rbtdb->tree_lock);
7844
7845  cleanup_lock:
7846         RBTDB_DESTROYLOCK(&rbtdb->lock);
7847
7848  cleanup_rbtdb:
7849         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7850         return (result);
7851 }
7852
7853
7854 /*
7855  * Slabbed Rdataset Methods
7856  */
7857
7858 static void
7859 rdataset_disassociate(dns_rdataset_t *rdataset) {
7860         dns_db_t *db = rdataset->private1;
7861         dns_dbnode_t *node = rdataset->private2;
7862
7863         detachnode(db, &node);
7864 }
7865
7866 static isc_result_t
7867 rdataset_first(dns_rdataset_t *rdataset) {
7868         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7869         unsigned int count;
7870
7871         count = raw[0] * 256 + raw[1];
7872         if (count == 0) {
7873                 rdataset->private5 = NULL;
7874                 return (ISC_R_NOMORE);
7875         }
7876
7877 #if DNS_RDATASET_FIXED
7878         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7879                 raw += 2 + (4 * count);
7880         else
7881 #endif
7882                 raw += 2;
7883
7884         /*
7885          * The privateuint4 field is the number of rdata beyond the
7886          * cursor position, so we decrement the total count by one
7887          * before storing it.
7888          *
7889          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7890          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7891          * to the first entry in the offset table.
7892          */
7893         count--;
7894         rdataset->privateuint4 = count;
7895         rdataset->private5 = raw;
7896
7897         return (ISC_R_SUCCESS);
7898 }
7899
7900 static isc_result_t
7901 rdataset_next(dns_rdataset_t *rdataset) {
7902         unsigned int count;
7903         unsigned int length;
7904         unsigned char *raw;     /* RDATASLAB */
7905
7906         count = rdataset->privateuint4;
7907         if (count == 0)
7908                 return (ISC_R_NOMORE);
7909         count--;
7910         rdataset->privateuint4 = count;
7911
7912         /*
7913          * Skip forward one record (length + 4) or one offset (4).
7914          */
7915         raw = rdataset->private5;
7916 #if DNS_RDATASET_FIXED
7917         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7918 #endif
7919                 length = raw[0] * 256 + raw[1];
7920                 raw += length;
7921 #if DNS_RDATASET_FIXED
7922         }
7923         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7924 #else
7925         rdataset->private5 = raw + 2;           /* length(2) */
7926 #endif
7927
7928         return (ISC_R_SUCCESS);
7929 }
7930
7931 static void
7932 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7933         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7934 #if DNS_RDATASET_FIXED
7935         unsigned int offset;
7936 #endif
7937         unsigned int length;
7938         isc_region_t r;
7939         unsigned int flags = 0;
7940
7941         REQUIRE(raw != NULL);
7942
7943         /*
7944          * Find the start of the record if not already in private5
7945          * then skip the length and order fields.
7946          */
7947 #if DNS_RDATASET_FIXED
7948         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7949                 offset = (raw[0] << 24) + (raw[1] << 16) +
7950                          (raw[2] << 8) + raw[3];
7951                 raw = rdataset->private3;
7952                 raw += offset;
7953         }
7954 #endif
7955         length = raw[0] * 256 + raw[1];
7956 #if DNS_RDATASET_FIXED
7957         raw += 4;
7958 #else
7959         raw += 2;
7960 #endif
7961         if (rdataset->type == dns_rdatatype_rrsig) {
7962                 if (*raw & DNS_RDATASLAB_OFFLINE)
7963                         flags |= DNS_RDATA_OFFLINE;
7964                 length--;
7965                 raw++;
7966         }
7967         r.length = length;
7968         r.base = raw;
7969         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7970         rdata->flags |= flags;
7971 }
7972
7973 static void
7974 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7975         dns_db_t *db = source->private1;
7976         dns_dbnode_t *node = source->private2;
7977         dns_dbnode_t *cloned_node = NULL;
7978
7979         attachnode(db, node, &cloned_node);
7980         *target = *source;
7981
7982         /*
7983          * Reset iterator state.
7984          */
7985         target->privateuint4 = 0;
7986         target->private5 = NULL;
7987 }
7988
7989 static unsigned int
7990 rdataset_count(dns_rdataset_t *rdataset) {
7991         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7992         unsigned int count;
7993
7994         count = raw[0] * 256 + raw[1];
7995
7996         return (count);
7997 }
7998
7999 static isc_result_t
8000 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
8001                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8002 {
8003         dns_db_t *db = rdataset->private1;
8004         dns_dbnode_t *node = rdataset->private2;
8005         dns_dbnode_t *cloned_node;
8006         struct noqname *noqname = rdataset->private6;
8007
8008         cloned_node = NULL;
8009         attachnode(db, node, &cloned_node);
8010         nsec->methods = &rdataset_methods;
8011         nsec->rdclass = db->rdclass;
8012         nsec->type = noqname->type;
8013         nsec->covers = 0;
8014         nsec->ttl = rdataset->ttl;
8015         nsec->trust = rdataset->trust;
8016         nsec->private1 = rdataset->private1;
8017         nsec->private2 = rdataset->private2;
8018         nsec->private3 = noqname->neg;
8019         nsec->privateuint4 = 0;
8020         nsec->private5 = NULL;
8021         nsec->private6 = NULL;
8022         nsec->private7 = NULL;
8023
8024         cloned_node = NULL;
8025         attachnode(db, node, &cloned_node);
8026         nsecsig->methods = &rdataset_methods;
8027         nsecsig->rdclass = db->rdclass;
8028         nsecsig->type = dns_rdatatype_rrsig;
8029         nsecsig->covers = noqname->type;
8030         nsecsig->ttl = rdataset->ttl;
8031         nsecsig->trust = rdataset->trust;
8032         nsecsig->private1 = rdataset->private1;
8033         nsecsig->private2 = rdataset->private2;
8034         nsecsig->private3 = noqname->negsig;
8035         nsecsig->privateuint4 = 0;
8036         nsecsig->private5 = NULL;
8037         nsec->private6 = NULL;
8038         nsec->private7 = NULL;
8039
8040         dns_name_clone(&noqname->name, name);
8041
8042         return (ISC_R_SUCCESS);
8043 }
8044
8045 static isc_result_t
8046 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
8047                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
8048 {
8049         dns_db_t *db = rdataset->private1;
8050         dns_dbnode_t *node = rdataset->private2;
8051         dns_dbnode_t *cloned_node;
8052         struct noqname *closest = rdataset->private7;
8053
8054         cloned_node = NULL;
8055         attachnode(db, node, &cloned_node);
8056         nsec->methods = &rdataset_methods;
8057         nsec->rdclass = db->rdclass;
8058         nsec->type = closest->type;
8059         nsec->covers = 0;
8060         nsec->ttl = rdataset->ttl;
8061         nsec->trust = rdataset->trust;
8062         nsec->private1 = rdataset->private1;
8063         nsec->private2 = rdataset->private2;
8064         nsec->private3 = closest->neg;
8065         nsec->privateuint4 = 0;
8066         nsec->private5 = NULL;
8067         nsec->private6 = NULL;
8068         nsec->private7 = NULL;
8069
8070         cloned_node = NULL;
8071         attachnode(db, node, &cloned_node);
8072         nsecsig->methods = &rdataset_methods;
8073         nsecsig->rdclass = db->rdclass;
8074         nsecsig->type = dns_rdatatype_rrsig;
8075         nsecsig->covers = closest->type;
8076         nsecsig->ttl = rdataset->ttl;
8077         nsecsig->trust = rdataset->trust;
8078         nsecsig->private1 = rdataset->private1;
8079         nsecsig->private2 = rdataset->private2;
8080         nsecsig->private3 = closest->negsig;
8081         nsecsig->privateuint4 = 0;
8082         nsecsig->private5 = NULL;
8083         nsec->private6 = NULL;
8084         nsec->private7 = NULL;
8085
8086         dns_name_clone(&closest->name, name);
8087
8088         return (ISC_R_SUCCESS);
8089 }
8090
8091 static void
8092 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
8093         dns_rbtdb_t *rbtdb = rdataset->private1;
8094         dns_rbtnode_t *rbtnode = rdataset->private2;
8095         rdatasetheader_t *header = rdataset->private3;
8096
8097         header--;
8098         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8099                   isc_rwlocktype_write);
8100         header->trust = rdataset->trust = trust;
8101         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8102                   isc_rwlocktype_write);
8103 }
8104
8105 static void
8106 rdataset_expire(dns_rdataset_t *rdataset) {
8107         dns_rbtdb_t *rbtdb = rdataset->private1;
8108         dns_rbtnode_t *rbtnode = rdataset->private2;
8109         rdatasetheader_t *header = rdataset->private3;
8110
8111         header--;
8112         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8113                   isc_rwlocktype_write);
8114         expire_header(rbtdb, header, ISC_FALSE);
8115         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8116                   isc_rwlocktype_write);
8117 }
8118
8119 /*
8120  * Rdataset Iterator Methods
8121  */
8122
8123 static void
8124 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
8125         rbtdb_rdatasetiter_t *rbtiterator;
8126
8127         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
8128
8129         if (rbtiterator->common.version != NULL)
8130                 closeversion(rbtiterator->common.db,
8131                              &rbtiterator->common.version, ISC_FALSE);
8132         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
8133         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
8134                     sizeof(*rbtiterator));
8135
8136         *iteratorp = NULL;
8137 }
8138
8139 static isc_result_t
8140 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
8141         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8142         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8143         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8144         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8145         rdatasetheader_t *header, *top_next;
8146         rbtdb_serial_t serial;
8147         isc_stdtime_t now;
8148
8149         if (IS_CACHE(rbtdb)) {
8150                 serial = 1;
8151                 now = rbtiterator->common.now;
8152         } else {
8153                 serial = rbtversion->serial;
8154                 now = 0;
8155         }
8156
8157         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8158                   isc_rwlocktype_read);
8159
8160         for (header = rbtnode->data; header != NULL; header = top_next) {
8161                 top_next = header->next;
8162                 do {
8163                         if (header->serial <= serial && !IGNORE(header)) {
8164                                 /*
8165                                  * Is this a "this rdataset doesn't exist"
8166                                  * record?  Or is it too old in the cache?
8167                                  *
8168                                  * Note: unlike everywhere else, we
8169                                  * check for now > header->rdh_ttl instead
8170                                  * of now >= header->rdh_ttl.  This allows
8171                                  * ANY and RRSIG queries for 0 TTL
8172                                  * rdatasets to work.
8173                                  */
8174                                 if (NONEXISTENT(header) ||
8175                                     (now != 0 && now > header->rdh_ttl))
8176                                         header = NULL;
8177                                 break;
8178                         } else
8179                                 header = header->down;
8180                 } while (header != NULL);
8181                 if (header != NULL)
8182                         break;
8183         }
8184
8185         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8186                     isc_rwlocktype_read);
8187
8188         rbtiterator->current = header;
8189
8190         if (header == NULL)
8191                 return (ISC_R_NOMORE);
8192
8193         return (ISC_R_SUCCESS);
8194 }
8195
8196 static isc_result_t
8197 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
8198         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8199         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8200         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8201         rbtdb_version_t *rbtversion = rbtiterator->common.version;
8202         rdatasetheader_t *header, *top_next;
8203         rbtdb_serial_t serial;
8204         isc_stdtime_t now;
8205         rbtdb_rdatatype_t type, negtype;
8206         dns_rdatatype_t rdtype, covers;
8207
8208         header = rbtiterator->current;
8209         if (header == NULL)
8210                 return (ISC_R_NOMORE);
8211
8212         if (IS_CACHE(rbtdb)) {
8213                 serial = 1;
8214                 now = rbtiterator->common.now;
8215         } else {
8216                 serial = rbtversion->serial;
8217                 now = 0;
8218         }
8219
8220         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8221                   isc_rwlocktype_read);
8222
8223         type = header->type;
8224         rdtype = RBTDB_RDATATYPE_BASE(header->type);
8225         if (NEGATIVE(header)) {
8226                 covers = RBTDB_RDATATYPE_EXT(header->type);
8227                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
8228         } else
8229                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
8230         for (header = header->next; header != NULL; header = top_next) {
8231                 top_next = header->next;
8232                 /*
8233                  * If not walking back up the down list.
8234                  */
8235                 if (header->type != type && header->type != negtype) {
8236                         do {
8237                                 if (header->serial <= serial &&
8238                                     !IGNORE(header)) {
8239                                         /*
8240                                          * Is this a "this rdataset doesn't
8241                                          * exist" record?
8242                                          *
8243                                          * Note: unlike everywhere else, we
8244                                          * check for now > header->ttl instead
8245                                          * of now >= header->ttl.  This allows
8246                                          * ANY and RRSIG queries for 0 TTL
8247                                          * rdatasets to work.
8248                                          */
8249                                         if ((header->attributes &
8250                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
8251                                             (now != 0 && now > header->rdh_ttl))
8252                                                 header = NULL;
8253                                         break;
8254                                 } else
8255                                         header = header->down;
8256                         } while (header != NULL);
8257                         if (header != NULL)
8258                                 break;
8259                 }
8260         }
8261
8262         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8263                     isc_rwlocktype_read);
8264
8265         rbtiterator->current = header;
8266
8267         if (header == NULL)
8268                 return (ISC_R_NOMORE);
8269
8270         return (ISC_R_SUCCESS);
8271 }
8272
8273 static void
8274 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
8275         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
8276         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
8277         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
8278         rdatasetheader_t *header;
8279
8280         header = rbtiterator->current;
8281         REQUIRE(header != NULL);
8282
8283         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8284                   isc_rwlocktype_read);
8285
8286         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
8287                       rdataset);
8288
8289         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
8290                     isc_rwlocktype_read);
8291 }
8292
8293
8294 /*
8295  * Database Iterator Methods
8296  */
8297
8298 static inline void
8299 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8300         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8301         dns_rbtnode_t *node = rbtdbiter->node;
8302
8303         if (node == NULL)
8304                 return;
8305
8306         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
8307         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
8308 }
8309
8310 static inline void
8311 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
8312         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8313         dns_rbtnode_t *node = rbtdbiter->node;
8314         nodelock_t *lock;
8315
8316         if (node == NULL)
8317                 return;
8318
8319         lock = &rbtdb->node_locks[node->locknum].lock;
8320         NODE_LOCK(lock, isc_rwlocktype_read);
8321         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
8322                             rbtdbiter->tree_locked, ISC_FALSE);
8323         NODE_UNLOCK(lock, isc_rwlocktype_read);
8324
8325         rbtdbiter->node = NULL;
8326 }
8327
8328 static void
8329 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
8330         dns_rbtnode_t *node;
8331         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8332         isc_boolean_t was_read_locked = ISC_FALSE;
8333         nodelock_t *lock;
8334         int i;
8335
8336         if (rbtdbiter->delete != 0) {
8337                 /*
8338                  * Note that "%d node of %d in tree" can report things like
8339                  * "flush_deletions: 59 nodes of 41 in tree".  This means
8340                  * That some nodes appear on the deletions list more than
8341                  * once.  Only the last occurence will actually be deleted.
8342                  */
8343                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
8344                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
8345                               "flush_deletions: %d nodes of %d in tree",
8346                               rbtdbiter->delete,
8347                               dns_rbt_nodecount(rbtdb->tree));
8348
8349                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8350                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8351                         was_read_locked = ISC_TRUE;
8352                 }
8353                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8354                 rbtdbiter->tree_locked = isc_rwlocktype_write;
8355
8356                 for (i = 0; i < rbtdbiter->delete; i++) {
8357                         node = rbtdbiter->deletions[i];
8358                         lock = &rbtdb->node_locks[node->locknum].lock;
8359
8360                         NODE_LOCK(lock, isc_rwlocktype_read);
8361                         decrement_reference(rbtdb, node, 0,
8362                                             isc_rwlocktype_read,
8363                                             rbtdbiter->tree_locked, ISC_FALSE);
8364                         NODE_UNLOCK(lock, isc_rwlocktype_read);
8365                 }
8366
8367                 rbtdbiter->delete = 0;
8368
8369                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
8370                 if (was_read_locked) {
8371                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8372                         rbtdbiter->tree_locked = isc_rwlocktype_read;
8373
8374                 } else {
8375                         rbtdbiter->tree_locked = isc_rwlocktype_none;
8376                 }
8377         }
8378 }
8379
8380 static inline void
8381 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8382         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8383
8384         REQUIRE(rbtdbiter->paused);
8385         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8386
8387         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8388         rbtdbiter->tree_locked = isc_rwlocktype_read;
8389
8390         rbtdbiter->paused = ISC_FALSE;
8391 }
8392
8393 static void
8394 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8395         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8396         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8397         dns_db_t *db = NULL;
8398
8399         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8400                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8401                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8402         } else
8403                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8404
8405         dereference_iter_node(rbtdbiter);
8406
8407         flush_deletions(rbtdbiter);
8408
8409         dns_db_attach(rbtdbiter->common.db, &db);
8410         dns_db_detach(&rbtdbiter->common.db);
8411
8412         dns_rbtnodechain_reset(&rbtdbiter->chain);
8413         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8414         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8415         dns_db_detach(&db);
8416
8417         *iteratorp = NULL;
8418 }
8419
8420 static isc_result_t
8421 dbiterator_first(dns_dbiterator_t *iterator) {
8422         isc_result_t result;
8423         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8424         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8425         dns_name_t *name, *origin;
8426
8427         if (rbtdbiter->result != ISC_R_SUCCESS &&
8428             rbtdbiter->result != ISC_R_NOMORE)
8429                 return (rbtdbiter->result);
8430
8431         if (rbtdbiter->paused)
8432                 resume_iteration(rbtdbiter);
8433
8434         dereference_iter_node(rbtdbiter);
8435
8436         name = dns_fixedname_name(&rbtdbiter->name);
8437         origin = dns_fixedname_name(&rbtdbiter->origin);
8438         dns_rbtnodechain_reset(&rbtdbiter->chain);
8439         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8440
8441         if (rbtdbiter->nsec3only) {
8442                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8443                 result = dns_rbtnodechain_first(rbtdbiter->current,
8444                                                 rbtdb->nsec3, name, origin);
8445         } else {
8446                 rbtdbiter->current = &rbtdbiter->chain;
8447                 result = dns_rbtnodechain_first(rbtdbiter->current,
8448                                                 rbtdb->tree, name, origin);
8449                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8450                         rbtdbiter->current = &rbtdbiter->nsec3chain;
8451                         result = dns_rbtnodechain_first(rbtdbiter->current,
8452                                                         rbtdb->nsec3, name,
8453                                                         origin);
8454                 }
8455         }
8456         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8457                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8458                                                   NULL, &rbtdbiter->node);
8459                 if (result == ISC_R_SUCCESS) {
8460                         rbtdbiter->new_origin = ISC_TRUE;
8461                         reference_iter_node(rbtdbiter);
8462                 }
8463         } else {
8464                 INSIST(result == ISC_R_NOTFOUND);
8465                 result = ISC_R_NOMORE; /* The tree is empty. */
8466         }
8467
8468         rbtdbiter->result = result;
8469
8470         return (result);
8471 }
8472
8473 static isc_result_t
8474 dbiterator_last(dns_dbiterator_t *iterator) {
8475         isc_result_t result;
8476         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8477         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8478         dns_name_t *name, *origin;
8479
8480         if (rbtdbiter->result != ISC_R_SUCCESS &&
8481             rbtdbiter->result != ISC_R_NOMORE)
8482                 return (rbtdbiter->result);
8483
8484         if (rbtdbiter->paused)
8485                 resume_iteration(rbtdbiter);
8486
8487         dereference_iter_node(rbtdbiter);
8488
8489         name = dns_fixedname_name(&rbtdbiter->name);
8490         origin = dns_fixedname_name(&rbtdbiter->origin);
8491         dns_rbtnodechain_reset(&rbtdbiter->chain);
8492         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8493
8494         result = ISC_R_NOTFOUND;
8495         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8496                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8497                 result = dns_rbtnodechain_last(rbtdbiter->current,
8498                                                rbtdb->nsec3, name, origin);
8499         }
8500         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8501                 rbtdbiter->current = &rbtdbiter->chain;
8502                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8503                                                name, origin);
8504         }
8505         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8506                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8507                                                   NULL, &rbtdbiter->node);
8508                 if (result == ISC_R_SUCCESS) {
8509                         rbtdbiter->new_origin = ISC_TRUE;
8510                         reference_iter_node(rbtdbiter);
8511                 }
8512         } else {
8513                 INSIST(result == ISC_R_NOTFOUND);
8514                 result = ISC_R_NOMORE; /* The tree is empty. */
8515         }
8516
8517         rbtdbiter->result = result;
8518
8519         return (result);
8520 }
8521
8522 static isc_result_t
8523 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8524         isc_result_t result, tresult;
8525         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8526         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8527         dns_name_t *iname, *origin;
8528
8529         if (rbtdbiter->result != ISC_R_SUCCESS &&
8530             rbtdbiter->result != ISC_R_NOTFOUND &&
8531             rbtdbiter->result != ISC_R_NOMORE)
8532                 return (rbtdbiter->result);
8533
8534         if (rbtdbiter->paused)
8535                 resume_iteration(rbtdbiter);
8536
8537         dereference_iter_node(rbtdbiter);
8538
8539         iname = dns_fixedname_name(&rbtdbiter->name);
8540         origin = dns_fixedname_name(&rbtdbiter->origin);
8541         dns_rbtnodechain_reset(&rbtdbiter->chain);
8542         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8543
8544         if (rbtdbiter->nsec3only) {
8545                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8546                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8547                                           &rbtdbiter->node,
8548                                           rbtdbiter->current,
8549                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8550         } else if (rbtdbiter->nonsec3) {
8551                 rbtdbiter->current = &rbtdbiter->chain;
8552                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8553                                           &rbtdbiter->node,
8554                                           rbtdbiter->current,
8555                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8556         } else {
8557                 /*
8558                  * Stay on main chain if not found on either chain.
8559                  */
8560                 rbtdbiter->current = &rbtdbiter->chain;
8561                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8562                                           &rbtdbiter->node,
8563                                           rbtdbiter->current,
8564                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8565                 if (result == DNS_R_PARTIALMATCH) {
8566                         dns_rbtnode_t *node = NULL;
8567                         tresult = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8568                                                   &node, &rbtdbiter->nsec3chain,
8569                                                   DNS_RBTFIND_EMPTYDATA,
8570                                                   NULL, NULL);
8571                         if (tresult == ISC_R_SUCCESS) {
8572                                 rbtdbiter->node = node;
8573                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8574                                 result = tresult;
8575                         }
8576                 }
8577         }
8578
8579 #if 1
8580         if (result == ISC_R_SUCCESS) {
8581                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8582                                                   origin, NULL);
8583                 if (result == ISC_R_SUCCESS) {
8584                         rbtdbiter->new_origin = ISC_TRUE;
8585                         reference_iter_node(rbtdbiter);
8586                 }
8587         } else if (result == DNS_R_PARTIALMATCH) {
8588                 result = ISC_R_NOTFOUND;
8589                 rbtdbiter->node = NULL;
8590         }
8591
8592         rbtdbiter->result = result;
8593 #else
8594         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8595                 isc_result_t tresult;
8596                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8597                                                    origin, NULL);
8598                 if (tresult == ISC_R_SUCCESS) {
8599                         rbtdbiter->new_origin = ISC_TRUE;
8600                         reference_iter_node(rbtdbiter);
8601                 } else {
8602                         result = tresult;
8603                         rbtdbiter->node = NULL;
8604                 }
8605         } else
8606                 rbtdbiter->node = NULL;
8607
8608         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8609                             ISC_R_SUCCESS : result;
8610 #endif
8611
8612         return (result);
8613 }
8614
8615 static isc_result_t
8616 dbiterator_prev(dns_dbiterator_t *iterator) {
8617         isc_result_t result;
8618         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8619         dns_name_t *name, *origin;
8620         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8621
8622         REQUIRE(rbtdbiter->node != NULL);
8623
8624         if (rbtdbiter->result != ISC_R_SUCCESS)
8625                 return (rbtdbiter->result);
8626
8627         if (rbtdbiter->paused)
8628                 resume_iteration(rbtdbiter);
8629
8630         name = dns_fixedname_name(&rbtdbiter->name);
8631         origin = dns_fixedname_name(&rbtdbiter->origin);
8632         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8633         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8634             !rbtdbiter->nonsec3 &&
8635             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8636                 rbtdbiter->current = &rbtdbiter->chain;
8637                 dns_rbtnodechain_reset(rbtdbiter->current);
8638                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8639                                                name, origin);
8640                 if (result == ISC_R_NOTFOUND)
8641                         result = ISC_R_NOMORE;
8642         }
8643
8644         dereference_iter_node(rbtdbiter);
8645
8646         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8647                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8648                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8649                                                   NULL, &rbtdbiter->node);
8650         }
8651
8652         if (result == ISC_R_SUCCESS)
8653                 reference_iter_node(rbtdbiter);
8654
8655         rbtdbiter->result = result;
8656
8657         return (result);
8658 }
8659
8660 static isc_result_t
8661 dbiterator_next(dns_dbiterator_t *iterator) {
8662         isc_result_t result;
8663         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8664         dns_name_t *name, *origin;
8665         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8666
8667         REQUIRE(rbtdbiter->node != NULL);
8668
8669         if (rbtdbiter->result != ISC_R_SUCCESS)
8670                 return (rbtdbiter->result);
8671
8672         if (rbtdbiter->paused)
8673                 resume_iteration(rbtdbiter);
8674
8675         name = dns_fixedname_name(&rbtdbiter->name);
8676         origin = dns_fixedname_name(&rbtdbiter->origin);
8677         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8678         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8679             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8680                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8681                 dns_rbtnodechain_reset(rbtdbiter->current);
8682                 result = dns_rbtnodechain_first(rbtdbiter->current,
8683                                                 rbtdb->nsec3, name, origin);
8684                 if (result == ISC_R_NOTFOUND)
8685                         result = ISC_R_NOMORE;
8686         }
8687
8688         dereference_iter_node(rbtdbiter);
8689
8690         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8691                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8692                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8693                                                   NULL, &rbtdbiter->node);
8694         }
8695         if (result == ISC_R_SUCCESS)
8696                 reference_iter_node(rbtdbiter);
8697
8698         rbtdbiter->result = result;
8699
8700         return (result);
8701 }
8702
8703 static isc_result_t
8704 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8705                    dns_name_t *name)
8706 {
8707         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8708         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8709         dns_rbtnode_t *node = rbtdbiter->node;
8710         isc_result_t result;
8711         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8712         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8713
8714         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8715         REQUIRE(rbtdbiter->node != NULL);
8716
8717         if (rbtdbiter->paused)
8718                 resume_iteration(rbtdbiter);
8719
8720         if (name != NULL) {
8721                 if (rbtdbiter->common.relative_names)
8722                         origin = NULL;
8723                 result = dns_name_concatenate(nodename, origin, name, NULL);
8724                 if (result != ISC_R_SUCCESS)
8725                         return (result);
8726                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8727                         result = DNS_R_NEWORIGIN;
8728         } else
8729                 result = ISC_R_SUCCESS;
8730
8731         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8732         new_reference(rbtdb, node);
8733         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8734
8735         *nodep = rbtdbiter->node;
8736
8737         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8738                 isc_result_t expire_result;
8739
8740                 /*
8741                  * If the deletion array is full, flush it before trying
8742                  * to expire the current node.  The current node can't
8743                  * fully deleted while the iteration cursor is still on it.
8744                  */
8745                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8746                         flush_deletions(rbtdbiter);
8747
8748                 expire_result = expirenode(iterator->db, *nodep, 0);
8749
8750                 /*
8751                  * expirenode() currently always returns success.
8752                  */
8753                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8754                         unsigned int refs;
8755
8756                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8757                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8758                         dns_rbtnode_refincrement(node, &refs);
8759                         INSIST(refs != 0);
8760                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8761                 }
8762         }
8763
8764         return (result);
8765 }
8766
8767 static isc_result_t
8768 dbiterator_pause(dns_dbiterator_t *iterator) {
8769         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8770         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8771
8772         if (rbtdbiter->result != ISC_R_SUCCESS &&
8773             rbtdbiter->result != ISC_R_NOMORE)
8774                 return (rbtdbiter->result);
8775
8776         if (rbtdbiter->paused)
8777                 return (ISC_R_SUCCESS);
8778
8779         rbtdbiter->paused = ISC_TRUE;
8780
8781         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8782                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8783                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8784                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8785         }
8786
8787         flush_deletions(rbtdbiter);
8788
8789         return (ISC_R_SUCCESS);
8790 }
8791
8792 static isc_result_t
8793 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8794         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8795         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8796
8797         if (rbtdbiter->result != ISC_R_SUCCESS)
8798                 return (rbtdbiter->result);
8799
8800         return (dns_name_copy(origin, name, NULL));
8801 }
8802
8803 /*%
8804  * Additional cache routines.
8805  */
8806 static isc_result_t
8807 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8808                        dns_rdatatype_t qtype, dns_acache_t *acache,
8809                        dns_zone_t **zonep, dns_db_t **dbp,
8810                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8811                        dns_name_t *fname, dns_message_t *msg,
8812                        isc_stdtime_t now)
8813 {
8814 #ifndef BIND9
8815         UNUSED(rdataset);
8816         UNUSED(type);
8817         UNUSED(qtype);
8818         UNUSED(acache);
8819         UNUSED(zonep);
8820         UNUSED(dbp);
8821         UNUSED(versionp);
8822         UNUSED(nodep);
8823         UNUSED(fname);
8824         UNUSED(msg);
8825         UNUSED(now);
8826
8827         return (ISC_R_NOTIMPLEMENTED);
8828 #else
8829         dns_rbtdb_t *rbtdb = rdataset->private1;
8830         dns_rbtnode_t *rbtnode = rdataset->private2;
8831         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8832         unsigned int current_count = rdataset->privateuint4;
8833         unsigned int count;
8834         rdatasetheader_t *header;
8835         nodelock_t *nodelock;
8836         unsigned int total_count;
8837         acachectl_t *acarray;
8838         dns_acacheentry_t *entry;
8839         isc_result_t result;
8840
8841         UNUSED(qtype); /* we do not use this value at least for now */
8842         UNUSED(acache);
8843
8844         header = (struct rdatasetheader *)(raw - sizeof(*header));
8845
8846         total_count = raw[0] * 256 + raw[1];
8847         INSIST(total_count > current_count);
8848         count = total_count - current_count - 1;
8849
8850         acarray = NULL;
8851
8852         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8853         NODE_LOCK(nodelock, isc_rwlocktype_read);
8854
8855         switch (type) {
8856         case dns_rdatasetadditional_fromauth:
8857                 acarray = header->additional_auth;
8858                 break;
8859         case dns_rdatasetadditional_fromcache:
8860                 acarray = NULL;
8861                 break;
8862         case dns_rdatasetadditional_fromglue:
8863                 acarray = header->additional_glue;
8864                 break;
8865         default:
8866                 INSIST(0);
8867         }
8868
8869         if (acarray == NULL) {
8870                 if (type != dns_rdatasetadditional_fromcache)
8871                         dns_acache_countquerymiss(acache);
8872                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8873                 return (ISC_R_NOTFOUND);
8874         }
8875
8876         if (acarray[count].entry == NULL) {
8877                 dns_acache_countquerymiss(acache);
8878                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8879                 return (ISC_R_NOTFOUND);
8880         }
8881
8882         entry = NULL;
8883         dns_acache_attachentry(acarray[count].entry, &entry);
8884
8885         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8886
8887         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8888                                      nodep, fname, msg, now);
8889
8890         dns_acache_detachentry(&entry);
8891
8892         return (result);
8893 }
8894
8895 static void
8896 acache_callback(dns_acacheentry_t *entry, void **arg) {
8897         dns_rbtdb_t *rbtdb;
8898         dns_rbtnode_t *rbtnode;
8899         nodelock_t *nodelock;
8900         acachectl_t *acarray = NULL;
8901         acache_cbarg_t *cbarg;
8902         unsigned int count;
8903
8904         REQUIRE(arg != NULL);
8905         cbarg = *arg;
8906
8907         /*
8908          * The caller must hold the entry lock.
8909          */
8910
8911         rbtdb = (dns_rbtdb_t *)cbarg->db;
8912         rbtnode = (dns_rbtnode_t *)cbarg->node;
8913
8914         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8915         NODE_LOCK(nodelock, isc_rwlocktype_write);
8916
8917         switch (cbarg->type) {
8918         case dns_rdatasetadditional_fromauth:
8919                 acarray = cbarg->header->additional_auth;
8920                 break;
8921         case dns_rdatasetadditional_fromglue:
8922                 acarray = cbarg->header->additional_glue;
8923                 break;
8924         default:
8925                 INSIST(0);
8926         }
8927
8928         count = cbarg->count;
8929         if (acarray != NULL && acarray[count].entry == entry) {
8930                 acarray[count].entry = NULL;
8931                 INSIST(acarray[count].cbarg == cbarg);
8932                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8933                 acarray[count].cbarg = NULL;
8934         } else
8935                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8936
8937         dns_acache_detachentry(&entry);
8938
8939         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8940
8941         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8942         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8943
8944         *arg = NULL;
8945 #endif /* BIND9 */
8946 }
8947
8948 #ifdef BIND9
8949 static void
8950 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8951                       acache_cbarg_t **cbargp)
8952 {
8953         acache_cbarg_t *cbarg;
8954
8955         REQUIRE(mctx != NULL);
8956         REQUIRE(entry != NULL);
8957         REQUIRE(cbargp != NULL && *cbargp != NULL);
8958
8959         cbarg = *cbargp;
8960
8961         dns_acache_cancelentry(entry);
8962         dns_db_detachnode(cbarg->db, &cbarg->node);
8963         dns_db_detach(&cbarg->db);
8964
8965         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8966
8967         *cbargp = NULL;
8968 }
8969 #endif /* BIND9 */
8970
8971 static isc_result_t
8972 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8973                        dns_rdatatype_t qtype, dns_acache_t *acache,
8974                        dns_zone_t *zone, dns_db_t *db,
8975                        dns_dbversion_t *version, dns_dbnode_t *node,
8976                        dns_name_t *fname)
8977 {
8978 #ifndef BIND9
8979         UNUSED(rdataset);
8980         UNUSED(type);
8981         UNUSED(qtype);
8982         UNUSED(acache);
8983         UNUSED(zone);
8984         UNUSED(db);
8985         UNUSED(version);
8986         UNUSED(node);
8987         UNUSED(fname);
8988
8989         return (ISC_R_NOTIMPLEMENTED);
8990 #else
8991         dns_rbtdb_t *rbtdb = rdataset->private1;
8992         dns_rbtnode_t *rbtnode = rdataset->private2;
8993         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8994         unsigned int current_count = rdataset->privateuint4;
8995         rdatasetheader_t *header;
8996         unsigned int total_count, count;
8997         nodelock_t *nodelock;
8998         isc_result_t result;
8999         acachectl_t *acarray;
9000         dns_acacheentry_t *newentry, *oldentry = NULL;
9001         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
9002
9003         UNUSED(qtype);
9004
9005         if (type == dns_rdatasetadditional_fromcache)
9006                 return (ISC_R_SUCCESS);
9007
9008         header = (struct rdatasetheader *)(raw - sizeof(*header));
9009
9010         total_count = raw[0] * 256 + raw[1];
9011         INSIST(total_count > current_count);
9012         count = total_count - current_count - 1; /* should be private data */
9013
9014         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
9015         if (newcbarg == NULL)
9016                 return (ISC_R_NOMEMORY);
9017         newcbarg->type = type;
9018         newcbarg->count = count;
9019         newcbarg->header = header;
9020         newcbarg->db = NULL;
9021         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
9022         newcbarg->node = NULL;
9023         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
9024                           &newcbarg->node);
9025         newentry = NULL;
9026         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
9027                                         acache_callback, newcbarg, &newentry);
9028         if (result != ISC_R_SUCCESS)
9029                 goto fail;
9030         /* Set cache data in the new entry. */
9031         result = dns_acache_setentry(acache, newentry, zone, db,
9032                                      version, node, fname);
9033         if (result != ISC_R_SUCCESS)
9034                 goto fail;
9035
9036         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9037         NODE_LOCK(nodelock, isc_rwlocktype_write);
9038
9039         acarray = NULL;
9040         switch (type) {
9041         case dns_rdatasetadditional_fromauth:
9042                 acarray = header->additional_auth;
9043                 break;
9044         case dns_rdatasetadditional_fromglue:
9045                 acarray = header->additional_glue;
9046                 break;
9047         default:
9048                 INSIST(0);
9049         }
9050
9051         if (acarray == NULL) {
9052                 unsigned int i;
9053
9054                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
9055                                       sizeof(acachectl_t));
9056
9057                 if (acarray == NULL) {
9058                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9059                         goto fail;
9060                 }
9061
9062                 for (i = 0; i < total_count; i++) {
9063                         acarray[i].entry = NULL;
9064                         acarray[i].cbarg = NULL;
9065                 }
9066         }
9067         switch (type) {
9068         case dns_rdatasetadditional_fromauth:
9069                 header->additional_auth = acarray;
9070                 break;
9071         case dns_rdatasetadditional_fromglue:
9072                 header->additional_glue = acarray;
9073                 break;
9074         default:
9075                 INSIST(0);
9076         }
9077
9078         if (acarray[count].entry != NULL) {
9079                 /*
9080                  * Swap the entry.  Delay cleaning-up the old entry since
9081                  * it would require a node lock.
9082                  */
9083                 oldentry = acarray[count].entry;
9084                 INSIST(acarray[count].cbarg != NULL);
9085                 oldcbarg = acarray[count].cbarg;
9086         }
9087         acarray[count].entry = newentry;
9088         acarray[count].cbarg = newcbarg;
9089
9090         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9091
9092         if (oldentry != NULL) {
9093                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
9094                 dns_acache_detachentry(&oldentry);
9095         }
9096
9097         return (ISC_R_SUCCESS);
9098
9099  fail:
9100         if (newcbarg != NULL) {
9101                 if (newentry != NULL) {
9102                         acache_cancelentry(rbtdb->common.mctx, newentry,
9103                                            &newcbarg);
9104                         dns_acache_detachentry(&newentry);
9105                 } else {
9106                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
9107                         dns_db_detach(&newcbarg->db);
9108                         isc_mem_put(rbtdb->common.mctx, newcbarg,
9109                             sizeof(*newcbarg));
9110                 }
9111         }
9112
9113         return (result);
9114 #endif
9115 }
9116
9117 static isc_result_t
9118 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
9119                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
9120 {
9121 #ifndef BIND9
9122         UNUSED(acache);
9123         UNUSED(rdataset);
9124         UNUSED(type);
9125         UNUSED(qtype);
9126
9127         return (ISC_R_NOTIMPLEMENTED);
9128 #else
9129         dns_rbtdb_t *rbtdb = rdataset->private1;
9130         dns_rbtnode_t *rbtnode = rdataset->private2;
9131         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
9132         unsigned int current_count = rdataset->privateuint4;
9133         rdatasetheader_t *header;
9134         nodelock_t *nodelock;
9135         unsigned int total_count, count;
9136         acachectl_t *acarray;
9137         dns_acacheentry_t *entry;
9138         acache_cbarg_t *cbarg;
9139
9140         UNUSED(qtype);          /* we do not use this value at least for now */
9141         UNUSED(acache);
9142
9143         if (type == dns_rdatasetadditional_fromcache)
9144                 return (ISC_R_SUCCESS);
9145
9146         header = (struct rdatasetheader *)(raw - sizeof(*header));
9147
9148         total_count = raw[0] * 256 + raw[1];
9149         INSIST(total_count > current_count);
9150         count = total_count - current_count - 1;
9151
9152         acarray = NULL;
9153         entry = NULL;
9154
9155         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
9156         NODE_LOCK(nodelock, isc_rwlocktype_write);
9157
9158         switch (type) {
9159         case dns_rdatasetadditional_fromauth:
9160                 acarray = header->additional_auth;
9161                 break;
9162         case dns_rdatasetadditional_fromglue:
9163                 acarray = header->additional_glue;
9164                 break;
9165         default:
9166                 INSIST(0);
9167         }
9168
9169         if (acarray == NULL) {
9170                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9171                 return (ISC_R_NOTFOUND);
9172         }
9173
9174         entry = acarray[count].entry;
9175         if (entry == NULL) {
9176                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9177                 return (ISC_R_NOTFOUND);
9178         }
9179
9180         acarray[count].entry = NULL;
9181         cbarg = acarray[count].cbarg;
9182         acarray[count].cbarg = NULL;
9183
9184         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
9185
9186         if (entry != NULL) {
9187                 if (cbarg != NULL)
9188                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
9189                 dns_acache_detachentry(&entry);
9190         }
9191
9192         return (ISC_R_SUCCESS);
9193 #endif
9194 }
9195
9196 /*%
9197  * Routines for LRU-based cache management.
9198  */
9199
9200 /*%
9201  * See if a given cache entry that is being reused needs to be updated
9202  * in the LRU-list.  From the LRU management point of view, this function is
9203  * expected to return true for almost all cases.  When used with threads,
9204  * however, this may cause a non-negligible performance penalty because a
9205  * writer lock will have to be acquired before updating the list.
9206  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
9207  * function returns true if the entry has not been updated for some period of
9208  * time.  We differentiate the NS or glue address case and the others since
9209  * experiments have shown that the former tends to be accessed relatively
9210  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
9211  * may cause external queries at a higher level zone, involving more
9212  * transactions).
9213  *
9214  * Caller must hold the node (read or write) lock.
9215  */
9216 static inline isc_boolean_t
9217 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
9218         if ((header->attributes &
9219              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
9220                 return (ISC_FALSE);
9221
9222 #if DNS_RBTDB_LIMITLRUUPDATE
9223         if (header->type == dns_rdatatype_ns ||
9224             (header->trust == dns_trust_glue &&
9225              (header->type == dns_rdatatype_a ||
9226               header->type == dns_rdatatype_aaaa))) {
9227                 /*
9228                  * Glue records are updated if at least 60 seconds have passed
9229                  * since the previous update time.
9230                  */
9231                 return (header->last_used + 60 <= now);
9232         }
9233
9234         /* Other records are updated if 5 minutes have passed. */
9235         return (header->last_used + 300 <= now);
9236 #else
9237         UNUSED(now);
9238
9239         return (ISC_TRUE);
9240 #endif
9241 }
9242
9243 /*%
9244  * Update the timestamp of a given cache entry and move it to the head
9245  * of the corresponding LRU list.
9246  *
9247  * Caller must hold the node (write) lock.
9248  *
9249  * Note that the we do NOT touch the heap here, as the TTL has not changed.
9250  */
9251 static void
9252 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9253               isc_stdtime_t now)
9254 {
9255         INSIST(IS_CACHE(rbtdb));
9256
9257         /* To be checked: can we really assume this? XXXMLG */
9258         INSIST(ISC_LINK_LINKED(header, link));
9259
9260         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
9261         header->last_used = now;
9262         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
9263 }
9264
9265 /*%
9266  * Purge some expired and/or stale (i.e. unused for some period) cache entries
9267  * under an overmem condition.  To recover from this condition quickly, up to
9268  * 2 entries will be purged.  This process is triggered while adding a new
9269  * entry, and we specifically avoid purging entries in the same LRU bucket as
9270  * the one to which the new entry will belong.  Otherwise, we might purge
9271  * entries of the same name of different RR types while adding RRsets from a
9272  * single response (consider the case where we're adding A and AAAA glue records
9273  * of the same NS name).
9274  */
9275 static void
9276 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
9277               isc_stdtime_t now, isc_boolean_t tree_locked)
9278 {
9279         rdatasetheader_t *header, *header_prev;
9280         unsigned int locknum;
9281         int purgecount = 2;
9282
9283         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
9284              locknum != locknum_start && purgecount > 0;
9285              locknum = (locknum + 1) % rbtdb->node_lock_count) {
9286                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
9287                           isc_rwlocktype_write);
9288
9289                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
9290                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
9291                         expire_header(rbtdb, header, tree_locked);
9292                         purgecount--;
9293                 }
9294
9295                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
9296                      header != NULL && purgecount > 0;
9297                      header = header_prev) {
9298                         header_prev = ISC_LIST_PREV(header, link);
9299                         /*
9300                          * Unlink the entry at this point to avoid checking it
9301                          * again even if it's currently used someone else and
9302                          * cannot be purged at this moment.  This entry won't be
9303                          * referenced any more (so unlinking is safe) since the
9304                          * TTL was reset to 0.
9305                          */
9306                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
9307                                         link);
9308                         expire_header(rbtdb, header, tree_locked);
9309                         purgecount--;
9310                 }
9311
9312                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
9313                                     isc_rwlocktype_write);
9314         }
9315 }
9316
9317 static void
9318 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
9319               isc_boolean_t tree_locked)
9320 {
9321         set_ttl(rbtdb, header, 0);
9322         header->attributes |= RDATASET_ATTR_STALE;
9323         header->node->dirty = 1;
9324
9325         /*
9326          * Caller must hold the node (write) lock.
9327          */
9328
9329         if (dns_rbtnode_refcurrent(header->node) == 0) {
9330                 /*
9331                  * If no one else is using the node, we can clean it up now.
9332                  * We first need to gain a new reference to the node to meet a
9333                  * requirement of decrement_reference().
9334                  */
9335                 new_reference(rbtdb, header->node);
9336                 decrement_reference(rbtdb, header->node, 0,
9337                                     isc_rwlocktype_write,
9338                                     tree_locked ? isc_rwlocktype_write :
9339                                     isc_rwlocktype_none, ISC_FALSE);
9340         }
9341 }