]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - contrib/bind9/lib/dns/rbtdb.c
Fix a problem whereby a corrupt DNS record can cause named to crash. [11:06]
[FreeBSD/releng/8.1.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2010  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.16.8.3 2010/02/26 00:24:39 marka Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) link;
262
263         unsigned int                    heap_index;
264         /*%<
265          * Used for TTL-based cache cleaning.
266          */
267         isc_stdtime_t                   resign;
268 } rdatasetheader_t;
269
270 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
272
273 #define RDATASET_ATTR_NONEXISTENT       0x0001
274 #define RDATASET_ATTR_STALE             0x0002
275 #define RDATASET_ATTR_IGNORE            0x0004
276 #define RDATASET_ATTR_RETAIN            0x0008
277 #define RDATASET_ATTR_NXDOMAIN          0x0010
278 #define RDATASET_ATTR_RESIGN            0x0020
279 #define RDATASET_ATTR_STATCOUNT         0x0040
280 #define RDATASET_ATTR_OPTOUT            0x0080
281 #define RDATASET_ATTR_NEGATIVE          0x0100
282
283 typedef struct acache_cbarg {
284         dns_rdatasetadditional_t        type;
285         unsigned int                    count;
286         dns_db_t                        *db;
287         dns_dbnode_t                    *node;
288         rdatasetheader_t                *header;
289 } acache_cbarg_t;
290
291 struct acachectl {
292         dns_acacheentry_t               *entry;
293         acache_cbarg_t                  *cbarg;
294 };
295
296 /*
297  * XXX
298  * When the cache will pre-expire data (due to memory low or other
299  * situations) before the rdataset's TTL has expired, it MUST
300  * respect the RETAIN bit and not expire the data until its TTL is
301  * expired.
302  */
303
304 #undef IGNORE                   /* WIN32 winbase.h defines this. */
305
306 #define EXISTS(header) \
307         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
308 #define NONEXISTENT(header) \
309         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
310 #define IGNORE(header) \
311         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
312 #define RETAIN(header) \
313         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
314 #define NXDOMAIN(header) \
315         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
316 #define RESIGN(header) \
317         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
318 #define OPTOUT(header) \
319         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define NEGATIVE(header) \
321         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
322
323 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
324
325 /*%
326  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
327  * There is a tradeoff issue about configuring this value: if this is too
328  * small, it may cause heavier contention between threads; if this is too large,
329  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
330  * The default value should work well for most environments, but this can
331  * also be configurable at compilation time via the
332  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
333  * 1 due to the assumption of overmem_purge().
334  */
335 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
336 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
337 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
338 #else
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
340 #endif
341 #else
342 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
343 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
344
345 typedef struct {
346         nodelock_t                      lock;
347         /* Protected in the refcount routines. */
348         isc_refcount_t                  references;
349         /* Locked by lock. */
350         isc_boolean_t                   exiting;
351 } rbtdb_nodelock_t;
352
353 typedef struct rbtdb_changed {
354         dns_rbtnode_t *                 node;
355         isc_boolean_t                   dirty;
356         ISC_LINK(struct rbtdb_changed)  link;
357 } rbtdb_changed_t;
358
359 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
360
361 typedef enum {
362         dns_db_insecure,
363         dns_db_partial,
364         dns_db_secure
365 } dns_db_secure_t;
366
367 typedef struct rbtdb_version {
368         /* Not locked */
369         rbtdb_serial_t                  serial;
370         /*
371          * Protected in the refcount routines.
372          * XXXJT: should we change the lock policy based on the refcount
373          * performance?
374          */
375         isc_refcount_t                  references;
376         /* Locked by database lock. */
377         isc_boolean_t                   writer;
378         isc_boolean_t                   commit_ok;
379         rbtdb_changedlist_t             changed_list;
380         rdatasetheaderlist_t            resigned_list;
381         ISC_LINK(struct rbtdb_version)  link;
382         dns_db_secure_t                 secure;
383         isc_boolean_t                   havensec3;
384         /* NSEC3 parameters */
385         dns_hash_t                      hash;
386         isc_uint8_t                     flags;
387         isc_uint16_t                    iterations;
388         isc_uint8_t                     salt_length;
389         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
390 } rbtdb_version_t;
391
392 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
393
394 typedef struct {
395         /* Unlocked. */
396         dns_db_t                        common;
397 #if DNS_RBTDB_USERWLOCK
398         isc_rwlock_t                    lock;
399 #else
400         isc_mutex_t                     lock;
401 #endif
402         isc_rwlock_t                    tree_lock;
403         unsigned int                    node_lock_count;
404         rbtdb_nodelock_t *              node_locks;
405         dns_rbtnode_t *                 origin_node;
406         dns_stats_t *                   rrsetstats; /* cache DB only */
407         /* Locked by lock. */
408         unsigned int                    active;
409         isc_refcount_t                  references;
410         unsigned int                    attributes;
411         rbtdb_serial_t                  current_serial;
412         rbtdb_serial_t                  least_serial;
413         rbtdb_serial_t                  next_serial;
414         rbtdb_version_t *               current_version;
415         rbtdb_version_t *               future_version;
416         rbtdb_versionlist_t             open_versions;
417         isc_boolean_t                   overmem;
418         isc_task_t *                    task;
419         dns_dbnode_t                    *soanode;
420         dns_dbnode_t                    *nsnode;
421
422         /*
423          * This is a linked list used to implement the LRU cache.  There will
424          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
425          * placed on the linked list rdatasets[1].
426          */
427         rdatasetheaderlist_t            *rdatasets;
428
429         /*%
430          * Temporary storage for stale cache nodes and dynamically deleted
431          * nodes that await being cleaned up.
432          */
433         rbtnodelist_t                   *deadnodes;
434
435         /*
436          * Heaps.  Each of these is used for TTL based expiry.
437          */
438         isc_heap_t                      **heaps;
439
440         /* Locked by tree_lock. */
441         dns_rbt_t *                     tree;
442         dns_rbt_t *                     nsec3;
443
444         /* Unlocked */
445         unsigned int                    quantum;
446 } dns_rbtdb_t;
447
448 #define RBTDB_ATTR_LOADED               0x01
449 #define RBTDB_ATTR_LOADING              0x02
450
451 /*%
452  * Search Context
453  */
454 typedef struct {
455         dns_rbtdb_t *           rbtdb;
456         rbtdb_version_t *       rbtversion;
457         rbtdb_serial_t          serial;
458         unsigned int            options;
459         dns_rbtnodechain_t      chain;
460         isc_boolean_t           copy_name;
461         isc_boolean_t           need_cleanup;
462         isc_boolean_t           wild;
463         dns_rbtnode_t *         zonecut;
464         rdatasetheader_t *      zonecut_rdataset;
465         rdatasetheader_t *      zonecut_sigrdataset;
466         dns_fixedname_t         zonecut_name;
467         isc_stdtime_t           now;
468 } rbtdb_search_t;
469
470 /*%
471  * Load Context
472  */
473 typedef struct {
474         dns_rbtdb_t *           rbtdb;
475         isc_stdtime_t           now;
476 } rbtdb_load_t;
477
478 static void rdataset_disassociate(dns_rdataset_t *rdataset);
479 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
480 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
481 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
482 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
483 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
484 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
485                                         dns_name_t *name,
486                                         dns_rdataset_t *neg,
487                                         dns_rdataset_t *negsig);
488 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
489                                         dns_name_t *name,
490                                         dns_rdataset_t *neg,
491                                         dns_rdataset_t *negsig);
492 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
493                                            dns_rdatasetadditional_t type,
494                                            dns_rdatatype_t qtype,
495                                            dns_acache_t *acache,
496                                            dns_zone_t **zonep,
497                                            dns_db_t **dbp,
498                                            dns_dbversion_t **versionp,
499                                            dns_dbnode_t **nodep,
500                                            dns_name_t *fname,
501                                            dns_message_t *msg,
502                                            isc_stdtime_t now);
503 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
504                                            dns_rdatasetadditional_t type,
505                                            dns_rdatatype_t qtype,
506                                            dns_acache_t *acache,
507                                            dns_zone_t *zone,
508                                            dns_db_t *db,
509                                            dns_dbversion_t *version,
510                                            dns_dbnode_t *node,
511                                            dns_name_t *fname);
512 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
513                                            dns_rdataset_t *rdataset,
514                                            dns_rdatasetadditional_t type,
515                                            dns_rdatatype_t qtype);
516 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
517                                               isc_stdtime_t now);
518 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
519                           isc_stdtime_t now);
520 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
521                           isc_boolean_t tree_locked);
522 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
523                           isc_stdtime_t now, isc_boolean_t tree_locked);
524 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
525                                   rdatasetheader_t *newheader);
526 static void prune_tree(isc_task_t *task, isc_event_t *event);
527 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
528 static void rdataset_expire(dns_rdataset_t *rdataset);
529
530 static dns_rdatasetmethods_t rdataset_methods = {
531         rdataset_disassociate,
532         rdataset_first,
533         rdataset_next,
534         rdataset_current,
535         rdataset_clone,
536         rdataset_count,
537         NULL,
538         rdataset_getnoqname,
539         NULL,
540         rdataset_getclosest,
541         rdataset_getadditional,
542         rdataset_setadditional,
543         rdataset_putadditional,
544         rdataset_settrust,
545         rdataset_expire
546 };
547
548 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
549 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
550 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
551 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
552                                  dns_rdataset_t *rdataset);
553
554 static dns_rdatasetitermethods_t rdatasetiter_methods = {
555         rdatasetiter_destroy,
556         rdatasetiter_first,
557         rdatasetiter_next,
558         rdatasetiter_current
559 };
560
561 typedef struct rbtdb_rdatasetiter {
562         dns_rdatasetiter_t              common;
563         rdatasetheader_t *              current;
564 } rbtdb_rdatasetiter_t;
565
566 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
567 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
568 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
569 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
570                                         dns_name_t *name);
571 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
572 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
573 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
574                                            dns_dbnode_t **nodep,
575                                            dns_name_t *name);
576 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
577 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
578                                           dns_name_t *name);
579
580 static dns_dbiteratormethods_t dbiterator_methods = {
581         dbiterator_destroy,
582         dbiterator_first,
583         dbiterator_last,
584         dbiterator_seek,
585         dbiterator_prev,
586         dbiterator_next,
587         dbiterator_current,
588         dbiterator_pause,
589         dbiterator_origin
590 };
591
592 #define DELETION_BATCH_MAX 64
593
594 /*
595  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
596  */
597 typedef struct rbtdb_dbiterator {
598         dns_dbiterator_t                common;
599         isc_boolean_t                   paused;
600         isc_boolean_t                   new_origin;
601         isc_rwlocktype_t                tree_locked;
602         isc_result_t                    result;
603         dns_fixedname_t                 name;
604         dns_fixedname_t                 origin;
605         dns_rbtnodechain_t              chain;
606         dns_rbtnodechain_t              nsec3chain;
607         dns_rbtnodechain_t              *current;
608         dns_rbtnode_t                   *node;
609         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
610         int                             delete;
611         isc_boolean_t                   nsec3only;
612         isc_boolean_t                   nonsec3;
613 } rbtdb_dbiterator_t;
614
615
616 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
617 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
618
619 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
620                        isc_event_t *event);
621 static void overmem(dns_db_t *db, isc_boolean_t overmem);
622 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
623                                isc_boolean_t *nsec3createflag);
624
625 /*%
626  * 'init_count' is used to initialize 'newheader->count' which inturn
627  * is used to determine where in the cycle rrset-order cyclic starts.
628  * We don't lock this as we don't care about simultaneous updates.
629  *
630  * Note:
631  *      Both init_count and header->count can be ISC_UINT32_MAX.
632  *      The count on the returned rdataset however can't be as
633  *      that indicates that the database does not implement cyclic
634  *      processing.
635  */
636 static unsigned int init_count;
637
638 /*
639  * Locking
640  *
641  * If a routine is going to lock more than one lock in this module, then
642  * the locking must be done in the following order:
643  *
644  *      Tree Lock
645  *
646  *      Node Lock       (Only one from the set may be locked at one time by
647  *                       any caller)
648  *
649  *      Database Lock
650  *
651  * Failure to follow this hierarchy can result in deadlock.
652  */
653
654 /*
655  * Deleting Nodes
656  *
657  * For zone databases the node for the origin of the zone MUST NOT be deleted.
658  */
659
660
661 /*
662  * DB Routines
663  */
664
665 static void
666 attach(dns_db_t *source, dns_db_t **targetp) {
667         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
668
669         REQUIRE(VALID_RBTDB(rbtdb));
670
671         isc_refcount_increment(&rbtdb->references, NULL);
672
673         *targetp = source;
674 }
675
676 static void
677 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
678         dns_rbtdb_t *rbtdb = event->ev_arg;
679
680         UNUSED(task);
681
682         free_rbtdb(rbtdb, ISC_TRUE, event);
683 }
684
685 static void
686 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
687                   isc_boolean_t increment)
688 {
689         dns_rdatastatstype_t statattributes = 0;
690         dns_rdatastatstype_t base = 0;
691         dns_rdatastatstype_t type;
692
693         /* At the moment we count statistics only for cache DB */
694         INSIST(IS_CACHE(rbtdb));
695
696         if (NXDOMAIN(header))
697                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
698         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
699                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
700                 base = RBTDB_RDATATYPE_EXT(header->type);
701         } else
702                 base = RBTDB_RDATATYPE_BASE(header->type);
703
704         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
705         if (increment)
706                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
707         else
708                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
709 }
710
711 static void
712 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
713         int idx;
714         isc_heap_t *heap;
715         dns_ttl_t oldttl;
716
717         oldttl = header->rdh_ttl;
718         header->rdh_ttl = newttl;
719
720         if (!IS_CACHE(rbtdb))
721                 return;
722
723         /*
724          * It's possible the rbtdb is not a cache.  If this is the case,
725          * we will not have a heap, and we move on.  If we do, though,
726          * we might need to adjust things.
727          */
728         if (header->heap_index == 0 || newttl == oldttl)
729                 return;
730         idx = header->node->locknum;
731         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
732             return;
733         heap = rbtdb->heaps[idx];
734
735         if (newttl < oldttl)
736                 isc_heap_increased(heap, header->heap_index);
737         else
738                 isc_heap_decreased(heap, header->heap_index);
739 }
740
741 /*%
742  * These functions allow the heap code to rank the priority of each
743  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
744  */
745 static isc_boolean_t
746 ttl_sooner(void *v1, void *v2) {
747         rdatasetheader_t *h1 = v1;
748         rdatasetheader_t *h2 = v2;
749
750         if (h1->rdh_ttl < h2->rdh_ttl)
751                 return (ISC_TRUE);
752         return (ISC_FALSE);
753 }
754
755 static isc_boolean_t
756 resign_sooner(void *v1, void *v2) {
757         rdatasetheader_t *h1 = v1;
758         rdatasetheader_t *h2 = v2;
759
760         if (h1->resign < h2->resign)
761                 return (ISC_TRUE);
762         return (ISC_FALSE);
763 }
764
765 /*%
766  * This function sets the heap index into the header.
767  */
768 static void
769 set_index(void *what, unsigned int index) {
770         rdatasetheader_t *h = what;
771
772         h->heap_index = index;
773 }
774
775 /*%
776  * Work out how many nodes can be deleted in the time between two
777  * requests to the nameserver.  Smooth the resulting number and use it
778  * as a estimate for the number of nodes to be deleted in the next
779  * iteration.
780  */
781 static unsigned int
782 adjust_quantum(unsigned int old, isc_time_t *start) {
783         unsigned int pps = dns_pps;     /* packets per second */
784         unsigned int interval;
785         isc_uint64_t usecs;
786         isc_time_t end;
787         unsigned int new;
788
789         if (pps < 100)
790                 pps = 100;
791         isc_time_now(&end);
792
793         interval = 1000000 / pps;       /* interval in usec */
794         if (interval == 0)
795                 interval = 1;
796         usecs = isc_time_microdiff(&end, start);
797         if (usecs == 0) {
798                 /*
799                  * We were unable to measure the amount of time taken.
800                  * Double the nodes deleted next time.
801                  */
802                 old *= 2;
803                 if (old > 1000)
804                         old = 1000;
805                 return (old);
806         }
807         new = old * interval;
808         new /= (unsigned int)usecs;
809         if (new == 0)
810                 new = 1;
811         else if (new > 1000)
812                 new = 1000;
813
814         /* Smooth */
815         new = (new + old * 3) / 4;
816
817         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
818                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
819
820         return (new);
821 }
822
823 static void
824 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
825         unsigned int i;
826         isc_ondestroy_t ondest;
827         isc_result_t result;
828         char buf[DNS_NAME_FORMATSIZE];
829         isc_time_t start;
830
831         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
832                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
833
834         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
835         REQUIRE(rbtdb->future_version == NULL);
836
837         if (rbtdb->current_version != NULL) {
838                 unsigned int refs;
839
840                 isc_refcount_decrement(&rbtdb->current_version->references,
841                                        &refs);
842                 INSIST(refs == 0);
843                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
844                 isc_refcount_destroy(&rbtdb->current_version->references);
845                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
846                             sizeof(rbtdb_version_t));
847         }
848
849         /*
850          * We assume the number of remaining dead nodes is reasonably small;
851          * the overhead of unlinking all nodes here should be negligible.
852          */
853         for (i = 0; i < rbtdb->node_lock_count; i++) {
854                 dns_rbtnode_t *node;
855
856                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
857                 while (node != NULL) {
858                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
859                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
860                 }
861         }
862
863         if (event == NULL)
864                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
865  again:
866         if (rbtdb->tree != NULL) {
867                 isc_time_now(&start);
868                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
869                 if (result == ISC_R_QUOTA) {
870                         INSIST(rbtdb->task != NULL);
871                         if (rbtdb->quantum != 0)
872                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
873                                                                 &start);
874                         if (event == NULL)
875                                 event = isc_event_allocate(rbtdb->common.mctx,
876                                                            NULL,
877                                                          DNS_EVENT_FREESTORAGE,
878                                                            free_rbtdb_callback,
879                                                            rbtdb,
880                                                            sizeof(isc_event_t));
881                         if (event == NULL)
882                                 goto again;
883                         isc_task_send(rbtdb->task, &event);
884                         return;
885                 }
886                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
887         }
888
889         if (rbtdb->nsec3 != NULL) {
890                 isc_time_now(&start);
891                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
892                 if (result == ISC_R_QUOTA) {
893                         INSIST(rbtdb->task != NULL);
894                         if (rbtdb->quantum != 0)
895                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
896                                                                 &start);
897                         if (event == NULL)
898                                 event = isc_event_allocate(rbtdb->common.mctx,
899                                                            NULL,
900                                                          DNS_EVENT_FREESTORAGE,
901                                                            free_rbtdb_callback,
902                                                            rbtdb,
903                                                            sizeof(isc_event_t));
904                         if (event == NULL)
905                                 goto again;
906                         isc_task_send(rbtdb->task, &event);
907                         return;
908                 }
909                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
910         }
911
912         if (event != NULL)
913                 isc_event_free(&event);
914         if (log) {
915                 if (dns_name_dynamic(&rbtdb->common.origin))
916                         dns_name_format(&rbtdb->common.origin, buf,
917                                         sizeof(buf));
918                 else
919                         strcpy(buf, "<UNKNOWN>");
920                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
921                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
922                               "done free_rbtdb(%s)", buf);
923         }
924         if (dns_name_dynamic(&rbtdb->common.origin))
925                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
926         for (i = 0; i < rbtdb->node_lock_count; i++) {
927                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
928                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
929         }
930
931         /*
932          * Clean up LRU / re-signing order lists.
933          */
934         if (rbtdb->rdatasets != NULL) {
935                 for (i = 0; i < rbtdb->node_lock_count; i++)
936                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
937                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
938                             rbtdb->node_lock_count *
939                             sizeof(rdatasetheaderlist_t));
940         }
941         /*
942          * Clean up dead node buckets.
943          */
944         if (rbtdb->deadnodes != NULL) {
945                 for (i = 0; i < rbtdb->node_lock_count; i++)
946                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
947                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
948                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
949         }
950         /*
951          * Clean up heap objects.
952          */
953         if (rbtdb->heaps != NULL) {
954                 for (i = 0; i < rbtdb->node_lock_count; i++)
955                         isc_heap_destroy(&rbtdb->heaps[i]);
956                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
957                             rbtdb->node_lock_count *
958                             sizeof(isc_heap_t *));
959         }
960
961         if (rbtdb->rrsetstats != NULL)
962                 dns_stats_detach(&rbtdb->rrsetstats);
963
964         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
965                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
966         isc_rwlock_destroy(&rbtdb->tree_lock);
967         isc_refcount_destroy(&rbtdb->references);
968         if (rbtdb->task != NULL)
969                 isc_task_detach(&rbtdb->task);
970
971         RBTDB_DESTROYLOCK(&rbtdb->lock);
972         rbtdb->common.magic = 0;
973         rbtdb->common.impmagic = 0;
974         ondest = rbtdb->common.ondest;
975         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
976         isc_ondestroy_notify(&ondest, rbtdb);
977 }
978
979 static inline void
980 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
981         isc_boolean_t want_free = ISC_FALSE;
982         unsigned int i;
983         unsigned int inactive = 0;
984
985         /* XXX check for open versions here */
986
987         if (rbtdb->soanode != NULL)
988                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
989         if (rbtdb->nsnode != NULL)
990                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
991
992         /*
993          * Even though there are no external direct references, there still
994          * may be nodes in use.
995          */
996         for (i = 0; i < rbtdb->node_lock_count; i++) {
997                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
998                 rbtdb->node_locks[i].exiting = ISC_TRUE;
999                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
1000                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1001                     == 0) {
1002                         inactive++;
1003                 }
1004         }
1005
1006         if (inactive != 0) {
1007                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1008                 rbtdb->active -= inactive;
1009                 if (rbtdb->active == 0)
1010                         want_free = ISC_TRUE;
1011                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1012                 if (want_free) {
1013                         char buf[DNS_NAME_FORMATSIZE];
1014                         if (dns_name_dynamic(&rbtdb->common.origin))
1015                                 dns_name_format(&rbtdb->common.origin, buf,
1016                                                 sizeof(buf));
1017                         else
1018                                 strcpy(buf, "<UNKNOWN>");
1019                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1020                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1021                                       "calling free_rbtdb(%s)", buf);
1022                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1023                 }
1024         }
1025 }
1026
1027 static void
1028 detach(dns_db_t **dbp) {
1029         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1030         unsigned int refs;
1031
1032         REQUIRE(VALID_RBTDB(rbtdb));
1033
1034         isc_refcount_decrement(&rbtdb->references, &refs);
1035
1036         if (refs == 0)
1037                 maybe_free_rbtdb(rbtdb);
1038
1039         *dbp = NULL;
1040 }
1041
1042 static void
1043 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1044         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1045         rbtdb_version_t *version;
1046         unsigned int refs;
1047
1048         REQUIRE(VALID_RBTDB(rbtdb));
1049
1050         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1051         version = rbtdb->current_version;
1052         isc_refcount_increment(&version->references, &refs);
1053         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1054
1055         *versionp = (dns_dbversion_t *)version;
1056 }
1057
1058 static inline rbtdb_version_t *
1059 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1060                  unsigned int references, isc_boolean_t writer)
1061 {
1062         isc_result_t result;
1063         rbtdb_version_t *version;
1064
1065         version = isc_mem_get(mctx, sizeof(*version));
1066         if (version == NULL)
1067                 return (NULL);
1068         version->serial = serial;
1069         result = isc_refcount_init(&version->references, references);
1070         if (result != ISC_R_SUCCESS) {
1071                 isc_mem_put(mctx, version, sizeof(*version));
1072                 return (NULL);
1073         }
1074         version->writer = writer;
1075         version->commit_ok = ISC_FALSE;
1076         ISC_LIST_INIT(version->changed_list);
1077         ISC_LIST_INIT(version->resigned_list);
1078         ISC_LINK_INIT(version, link);
1079
1080         return (version);
1081 }
1082
1083 static isc_result_t
1084 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1085         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1086         rbtdb_version_t *version;
1087
1088         REQUIRE(VALID_RBTDB(rbtdb));
1089         REQUIRE(versionp != NULL && *versionp == NULL);
1090         REQUIRE(rbtdb->future_version == NULL);
1091
1092         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1093         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1094         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1095                                    ISC_TRUE);
1096         if (version != NULL) {
1097                 version->commit_ok = ISC_TRUE;
1098                 version->secure = rbtdb->current_version->secure;
1099                 version->havensec3 = rbtdb->current_version->havensec3;
1100                 if (version->havensec3) {
1101                         version->flags = rbtdb->current_version->flags;
1102                         version->iterations =
1103                                 rbtdb->current_version->iterations;
1104                         version->hash = rbtdb->current_version->hash;
1105                         version->salt_length =
1106                                 rbtdb->current_version->salt_length;
1107                         memcpy(version->salt, rbtdb->current_version->salt,
1108                                version->salt_length);
1109                 } else {
1110                         version->flags = 0;
1111                         version->iterations = 0;
1112                         version->hash = 0;
1113                         version->salt_length = 0;
1114                         memset(version->salt, 0, sizeof(version->salt));
1115                 }
1116                 rbtdb->next_serial++;
1117                 rbtdb->future_version = version;
1118         }
1119         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1120
1121         if (version == NULL)
1122                 return (ISC_R_NOMEMORY);
1123
1124         *versionp = version;
1125
1126         return (ISC_R_SUCCESS);
1127 }
1128
1129 static void
1130 attachversion(dns_db_t *db, dns_dbversion_t *source,
1131               dns_dbversion_t **targetp)
1132 {
1133         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1134         rbtdb_version_t *rbtversion = source;
1135         unsigned int refs;
1136
1137         REQUIRE(VALID_RBTDB(rbtdb));
1138
1139         isc_refcount_increment(&rbtversion->references, &refs);
1140         INSIST(refs > 1);
1141
1142         *targetp = rbtversion;
1143 }
1144
1145 static rbtdb_changed_t *
1146 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1147             dns_rbtnode_t *node)
1148 {
1149         rbtdb_changed_t *changed;
1150         unsigned int refs;
1151
1152         /*
1153          * Caller must be holding the node lock if its reference must be
1154          * protected by the lock.
1155          */
1156
1157         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1158
1159         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1160
1161         REQUIRE(version->writer);
1162
1163         if (changed != NULL) {
1164                 dns_rbtnode_refincrement(node, &refs);
1165                 INSIST(refs != 0);
1166                 changed->node = node;
1167                 changed->dirty = ISC_FALSE;
1168                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1169         } else
1170                 version->commit_ok = ISC_FALSE;
1171
1172         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1173
1174         return (changed);
1175 }
1176
1177 static void
1178 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1179                  acachectl_t *array)
1180 {
1181         unsigned int count;
1182         unsigned int i;
1183         unsigned char *raw;     /* RDATASLAB */
1184
1185         /*
1186          * The caller must be holding the corresponding node lock.
1187          */
1188
1189         if (array == NULL)
1190                 return;
1191
1192         raw = (unsigned char *)header + sizeof(*header);
1193         count = raw[0] * 256 + raw[1];
1194
1195         /*
1196          * Sanity check: since an additional cache entry has a reference to
1197          * the original DB node (in the callback arg), there should be no
1198          * acache entries when the node can be freed.
1199          */
1200         for (i = 0; i < count; i++)
1201                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1202
1203         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1204 }
1205
1206 static inline void
1207 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1208
1209         if (dns_name_dynamic(&(*noqname)->name))
1210                 dns_name_free(&(*noqname)->name, mctx);
1211         if ((*noqname)->neg != NULL)
1212                 isc_mem_put(mctx, (*noqname)->neg,
1213                             dns_rdataslab_size((*noqname)->neg, 0));
1214         if ((*noqname)->negsig != NULL)
1215                 isc_mem_put(mctx, (*noqname)->negsig,
1216                             dns_rdataslab_size((*noqname)->negsig, 0));
1217         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1218         *noqname = NULL;
1219 }
1220
1221 static inline void
1222 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1223 {
1224         ISC_LINK_INIT(h, link);
1225         h->heap_index = 0;
1226
1227 #if TRACE_HEADER
1228         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1229                 fprintf(stderr, "initialized header: %p\n", h);
1230 #else
1231         UNUSED(rbtdb);
1232 #endif
1233 }
1234
1235 static inline rdatasetheader_t *
1236 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1237 {
1238         rdatasetheader_t *h;
1239
1240         h = isc_mem_get(mctx, sizeof(*h));
1241         if (h == NULL)
1242                 return (NULL);
1243
1244 #if TRACE_HEADER
1245         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1246                 fprintf(stderr, "allocated header: %p\n", h);
1247 #endif
1248         init_rdataset(rbtdb, h);
1249         return (h);
1250 }
1251
1252 static inline void
1253 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1254 {
1255         unsigned int size;
1256         int idx;
1257
1258         if (EXISTS(rdataset) &&
1259             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1260                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1261         }
1262
1263         idx = rdataset->node->locknum;
1264         if (ISC_LINK_LINKED(rdataset, link)) {
1265                 INSIST(IS_CACHE(rbtdb));
1266                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1267         }
1268         if (rdataset->heap_index != 0)
1269                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1270         rdataset->heap_index = 0;
1271
1272         if (rdataset->noqname != NULL)
1273                 free_noqname(mctx, &rdataset->noqname);
1274         if (rdataset->closest != NULL)
1275                 free_noqname(mctx, &rdataset->closest);
1276
1277         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1278         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1279
1280         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1281                 size = sizeof(*rdataset);
1282         else
1283                 size = dns_rdataslab_size((unsigned char *)rdataset,
1284                                           sizeof(*rdataset));
1285         isc_mem_put(mctx, rdataset, size);
1286 }
1287
1288 static inline void
1289 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1290         rdatasetheader_t *header, *dcurrent;
1291         isc_boolean_t make_dirty = ISC_FALSE;
1292
1293         /*
1294          * Caller must hold the node lock.
1295          */
1296
1297         /*
1298          * We set the IGNORE attribute on rdatasets with serial number
1299          * 'serial'.  When the reference count goes to zero, these rdatasets
1300          * will be cleaned up; until that time, they will be ignored.
1301          */
1302         for (header = node->data; header != NULL; header = header->next) {
1303                 if (header->serial == serial) {
1304                         header->attributes |= RDATASET_ATTR_IGNORE;
1305                         make_dirty = ISC_TRUE;
1306                 }
1307                 for (dcurrent = header->down;
1308                      dcurrent != NULL;
1309                      dcurrent = dcurrent->down) {
1310                         if (dcurrent->serial == serial) {
1311                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1312                                 make_dirty = ISC_TRUE;
1313                         }
1314                 }
1315         }
1316         if (make_dirty)
1317                 node->dirty = 1;
1318 }
1319
1320 static inline void
1321 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1322 {
1323         rdatasetheader_t *d, *down_next;
1324
1325         for (d = top->down; d != NULL; d = down_next) {
1326                 down_next = d->down;
1327                 free_rdataset(rbtdb, mctx, d);
1328         }
1329         top->down = NULL;
1330 }
1331
1332 static inline void
1333 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1334         rdatasetheader_t *current, *top_prev, *top_next;
1335         isc_mem_t *mctx = rbtdb->common.mctx;
1336
1337         /*
1338          * Caller must be holding the node lock.
1339          */
1340
1341         top_prev = NULL;
1342         for (current = node->data; current != NULL; current = top_next) {
1343                 top_next = current->next;
1344                 clean_stale_headers(rbtdb, mctx, current);
1345                 /*
1346                  * If current is nonexistent or stale, we can clean it up.
1347                  */
1348                 if ((current->attributes &
1349                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1350                         if (top_prev != NULL)
1351                                 top_prev->next = current->next;
1352                         else
1353                                 node->data = current->next;
1354                         free_rdataset(rbtdb, mctx, current);
1355                 } else
1356                         top_prev = current;
1357         }
1358         node->dirty = 0;
1359 }
1360
1361 static inline void
1362 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1363                 rbtdb_serial_t least_serial)
1364 {
1365         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1366         rdatasetheader_t *top_prev, *top_next;
1367         isc_mem_t *mctx = rbtdb->common.mctx;
1368         isc_boolean_t still_dirty = ISC_FALSE;
1369
1370         /*
1371          * Caller must be holding the node lock.
1372          */
1373         REQUIRE(least_serial != 0);
1374
1375         top_prev = NULL;
1376         for (current = node->data; current != NULL; current = top_next) {
1377                 top_next = current->next;
1378
1379                 /*
1380                  * First, we clean up any instances of multiple rdatasets
1381                  * with the same serial number, or that have the IGNORE
1382                  * attribute.
1383                  */
1384                 dparent = current;
1385                 for (dcurrent = current->down;
1386                      dcurrent != NULL;
1387                      dcurrent = down_next) {
1388                         down_next = dcurrent->down;
1389                         INSIST(dcurrent->serial <= dparent->serial);
1390                         if (dcurrent->serial == dparent->serial ||
1391                             IGNORE(dcurrent)) {
1392                                 if (down_next != NULL)
1393                                         down_next->next = dparent;
1394                                 dparent->down = down_next;
1395                                 free_rdataset(rbtdb, mctx, dcurrent);
1396                         } else
1397                                 dparent = dcurrent;
1398                 }
1399
1400                 /*
1401                  * We've now eliminated all IGNORE datasets with the possible
1402                  * exception of current, which we now check.
1403                  */
1404                 if (IGNORE(current)) {
1405                         down_next = current->down;
1406                         if (down_next == NULL) {
1407                                 if (top_prev != NULL)
1408                                         top_prev->next = current->next;
1409                                 else
1410                                         node->data = current->next;
1411                                 free_rdataset(rbtdb, mctx, current);
1412                                 /*
1413                                  * current no longer exists, so we can
1414                                  * just continue with the loop.
1415                                  */
1416                                 continue;
1417                         } else {
1418                                 /*
1419                                  * Pull up current->down, making it the new
1420                                  * current.
1421                                  */
1422                                 if (top_prev != NULL)
1423                                         top_prev->next = down_next;
1424                                 else
1425                                         node->data = down_next;
1426                                 down_next->next = top_next;
1427                                 free_rdataset(rbtdb, mctx, current);
1428                                 current = down_next;
1429                         }
1430                 }
1431
1432                 /*
1433                  * We now try to find the first down node less than the
1434                  * least serial.
1435                  */
1436                 dparent = current;
1437                 for (dcurrent = current->down;
1438                      dcurrent != NULL;
1439                      dcurrent = down_next) {
1440                         down_next = dcurrent->down;
1441                         if (dcurrent->serial < least_serial)
1442                                 break;
1443                         dparent = dcurrent;
1444                 }
1445
1446                 /*
1447                  * If there is a such an rdataset, delete it and any older
1448                  * versions.
1449                  */
1450                 if (dcurrent != NULL) {
1451                         do {
1452                                 down_next = dcurrent->down;
1453                                 INSIST(dcurrent->serial <= least_serial);
1454                                 free_rdataset(rbtdb, mctx, dcurrent);
1455                                 dcurrent = down_next;
1456                         } while (dcurrent != NULL);
1457                         dparent->down = NULL;
1458                 }
1459
1460                 /*
1461                  * Note.  The serial number of 'current' might be less than
1462                  * least_serial too, but we cannot delete it because it is
1463                  * the most recent version, unless it is a NONEXISTENT
1464                  * rdataset.
1465                  */
1466                 if (current->down != NULL) {
1467                         still_dirty = ISC_TRUE;
1468                         top_prev = current;
1469                 } else {
1470                         /*
1471                          * If this is a NONEXISTENT rdataset, we can delete it.
1472                          */
1473                         if (NONEXISTENT(current)) {
1474                                 if (top_prev != NULL)
1475                                         top_prev->next = current->next;
1476                                 else
1477                                         node->data = current->next;
1478                                 free_rdataset(rbtdb, mctx, current);
1479                         } else
1480                                 top_prev = current;
1481                 }
1482         }
1483         if (!still_dirty)
1484                 node->dirty = 0;
1485 }
1486
1487 /*%
1488  * Clean up dead nodes.  These are nodes which have no references, and
1489  * have no data.  They are dead but we could not or chose not to delete
1490  * them when we deleted all the data at that node because we did not want
1491  * to wait for the tree write lock.
1492  *
1493  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1494  */
1495 static void
1496 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1497         dns_rbtnode_t *node;
1498         isc_result_t result;
1499         int count = 10;         /* XXXJT: should be adjustable */
1500
1501         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1502         while (node != NULL && count > 0) {
1503                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1504
1505                 /*
1506                  * Since we're holding a tree write lock, it should be
1507                  * impossible for this node to be referenced by others.
1508                  */
1509                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1510                        node->data == NULL);
1511
1512                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1513                 if (node->nsec3)
1514                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1515                                                     ISC_FALSE);
1516                 else
1517                         result = dns_rbt_deletenode(rbtdb->tree, node,
1518                                                     ISC_FALSE);
1519                 if (result != ISC_R_SUCCESS)
1520                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1521                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1522                                       "cleanup_dead_nodes: "
1523                                       "dns_rbt_deletenode: %s",
1524                                       isc_result_totext(result));
1525                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1526                 count--;
1527         }
1528 }
1529
1530 /*
1531  * Caller must be holding the node lock if its reference must be protected
1532  * by the lock.
1533  */
1534 static inline void
1535 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1536         unsigned int lockrefs, noderefs;
1537         isc_refcount_t *lockref;
1538
1539         dns_rbtnode_refincrement0(node, &noderefs);
1540         if (noderefs == 1) {    /* this is the first reference to the node */
1541                 lockref = &rbtdb->node_locks[node->locknum].references;
1542                 isc_refcount_increment0(lockref, &lockrefs);
1543                 INSIST(lockrefs != 0);
1544         }
1545         INSIST(noderefs != 0);
1546 }
1547
1548 /*
1549  * This function is assumed to be called when a node is newly referenced
1550  * and can be in the deadnode list.  In that case the node must be retrieved
1551  * from the list because it is going to be used.  In addition, if the caller
1552  * happens to hold a write lock on the tree, it's a good chance to purge dead
1553  * nodes.
1554  * Note: while a new reference is gained in multiple places, there are only very
1555  * few cases where the node can be in the deadnode list (only empty nodes can
1556  * have been added to the list).
1557  */
1558 static inline void
1559 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1560                 isc_rwlocktype_t treelocktype)
1561 {
1562         isc_boolean_t need_relock = ISC_FALSE;
1563
1564         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1565         new_reference(rbtdb, node);
1566
1567         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1568                       isc_rwlocktype_read);
1569         if (ISC_LINK_LINKED(node, deadlink))
1570                 need_relock = ISC_TRUE;
1571         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1572                  treelocktype == isc_rwlocktype_write)
1573                 need_relock = ISC_TRUE;
1574         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1575                         isc_rwlocktype_read);
1576         if (need_relock) {
1577                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1578                               isc_rwlocktype_write);
1579                 if (ISC_LINK_LINKED(node, deadlink))
1580                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1581                                         node, deadlink);
1582                 if (treelocktype == isc_rwlocktype_write)
1583                         cleanup_dead_nodes(rbtdb, node->locknum);
1584                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1585                                 isc_rwlocktype_write);
1586         }
1587
1588         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1589 }
1590
1591 /*
1592  * Caller must be holding the node lock; either the "strong", read or write
1593  * lock.  Note that the lock must be held even when node references are
1594  * atomically modified; in that case the decrement operation itself does not
1595  * have to be protected, but we must avoid a race condition where multiple
1596  * threads are decreasing the reference to zero simultaneously and at least
1597  * one of them is going to free the node.
1598  * This function returns ISC_TRUE if and only if the node reference decreases
1599  * to zero.
1600  */
1601 static isc_boolean_t
1602 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1603                     rbtdb_serial_t least_serial,
1604                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1605                     isc_boolean_t pruning)
1606 {
1607         isc_result_t result;
1608         isc_boolean_t write_locked;
1609         rbtdb_nodelock_t *nodelock;
1610         unsigned int refs, nrefs;
1611         int bucket = node->locknum;
1612         isc_boolean_t no_reference;
1613
1614         nodelock = &rbtdb->node_locks[bucket];
1615
1616         /* Handle easy and typical case first. */
1617         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1618                 dns_rbtnode_refdecrement(node, &nrefs);
1619                 INSIST((int)nrefs >= 0);
1620                 if (nrefs == 0) {
1621                         isc_refcount_decrement(&nodelock->references, &refs);
1622                         INSIST((int)refs >= 0);
1623                 }
1624                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1625         }
1626
1627         /* Upgrade the lock? */
1628         if (nlock == isc_rwlocktype_read) {
1629                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1630                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1631         }
1632         dns_rbtnode_refdecrement(node, &nrefs);
1633         INSIST((int)nrefs >= 0);
1634         if (nrefs > 0) {
1635                 /* Restore the lock? */
1636                 if (nlock == isc_rwlocktype_read)
1637                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1638                 return (ISC_FALSE);
1639         }
1640
1641         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1642                 if (IS_CACHE(rbtdb))
1643                         clean_cache_node(rbtdb, node);
1644                 else {
1645                         if (least_serial == 0) {
1646                                 /*
1647                                  * Caller doesn't know the least serial.
1648                                  * Get it.
1649                                  */
1650                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1651                                 least_serial = rbtdb->least_serial;
1652                                 RBTDB_UNLOCK(&rbtdb->lock,
1653                                              isc_rwlocktype_read);
1654                         }
1655                         clean_zone_node(rbtdb, node, least_serial);
1656                 }
1657         }
1658
1659         isc_refcount_decrement(&nodelock->references, &refs);
1660         INSIST((int)refs >= 0);
1661
1662         /*
1663          * XXXDCL should this only be done for cache zones?
1664          */
1665         if (node->data != NULL || node->down != NULL) {
1666                 /* Restore the lock? */
1667                 if (nlock == isc_rwlocktype_read)
1668                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1669                 return (ISC_TRUE);
1670         }
1671
1672         /*
1673          * Attempt to switch to a write lock on the tree.  If this fails,
1674          * we will add this node to a linked list of nodes in this locking
1675          * bucket which we will free later.
1676          */
1677         if (tlock != isc_rwlocktype_write) {
1678                 /*
1679                  * Locking hierarchy notwithstanding, we don't need to free
1680                  * the node lock before acquiring the tree write lock because
1681                  * we only do a trylock.
1682                  */
1683                 if (tlock == isc_rwlocktype_read)
1684                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1685                 else
1686                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1687                                                     isc_rwlocktype_write);
1688                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1689                               result == ISC_R_LOCKBUSY);
1690
1691                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1692         } else
1693                 write_locked = ISC_TRUE;
1694
1695         no_reference = ISC_TRUE;
1696         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1697                 /*
1698                  * We can now delete the node if the reference counter is
1699                  * zero.  This should be typically the case, but a different
1700                  * thread may still gain a (new) reference just before the
1701                  * current thread locks the tree (e.g., in findnode()).
1702                  */
1703
1704                 /*
1705                  * If this node is the only one in the level it's in, deleting
1706                  * this node may recursively make its parent the only node in
1707                  * the parent level; if so, and if no one is currently using
1708                  * the parent node, this is almost the only opportunity to
1709                  * clean it up.  But the recursive cleanup is not that trivial
1710                  * since the child and parent may be in different lock buckets,
1711                  * which would cause a lock order reversal problem.  To avoid
1712                  * the trouble, we'll dispatch a separate event for batch
1713                  * cleaning.  We need to check whether we're deleting the node
1714                  * as a result of pruning to avoid infinite dispatching.
1715                  * Note: pruning happens only when a task has been set for the
1716                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1717                  * it's their responsibility to purge stale leaves (e.g. by
1718                  * periodic walk-through).
1719                  */
1720                 if (!pruning && node->parent != NULL &&
1721                     node->parent->down == node && node->left == NULL &&
1722                     node->right == NULL && rbtdb->task != NULL) {
1723                         isc_event_t *ev;
1724                         dns_db_t *db;
1725
1726                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1727                                                 DNS_EVENT_RBTPRUNE,
1728                                                 prune_tree, node,
1729                                                 sizeof(isc_event_t));
1730                         if (ev != NULL) {
1731                                 new_reference(rbtdb, node);
1732                                 db = NULL;
1733                                 attach((dns_db_t *)rbtdb, &db);
1734                                 ev->ev_sender = db;
1735                                 isc_task_send(rbtdb->task, &ev);
1736                                 no_reference = ISC_FALSE;
1737                         } else {
1738                                 /*
1739                                  * XXX: this is a weird situation.  We could
1740                                  * ignore this error case, but then the stale
1741                                  * node will unlikely be purged except via a
1742                                  * rare condition such as manual cleanup.  So
1743                                  * we queue it in the deadnodes list, hoping
1744                                  * the memory shortage is temporary and the node
1745                                  * will be deleted later.
1746                                  */
1747                                 isc_log_write(dns_lctx,
1748                                               DNS_LOGCATEGORY_DATABASE,
1749                                               DNS_LOGMODULE_CACHE,
1750                                               ISC_LOG_INFO,
1751                                               "decrement_reference: failed to "
1752                                               "allocate pruning event");
1753                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1754                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1755                                                 deadlink);
1756                         }
1757                 } else {
1758                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1759                                 char printname[DNS_NAME_FORMATSIZE];
1760
1761                                 isc_log_write(dns_lctx,
1762                                               DNS_LOGCATEGORY_DATABASE,
1763                                               DNS_LOGMODULE_CACHE,
1764                                               ISC_LOG_DEBUG(1),
1765                                               "decrement_reference: "
1766                                               "delete from rbt: %p %s",
1767                                               node,
1768                                               dns_rbt_formatnodename(node,
1769                                                         printname,
1770                                                         sizeof(printname)));
1771                         }
1772
1773                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1774                         if (node->nsec3)
1775                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1776                                                             ISC_FALSE);
1777                         else
1778                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1779                                                             ISC_FALSE);
1780                         if (result != ISC_R_SUCCESS) {
1781                                 isc_log_write(dns_lctx,
1782                                               DNS_LOGCATEGORY_DATABASE,
1783                                               DNS_LOGMODULE_CACHE,
1784                                               ISC_LOG_WARNING,
1785                                               "decrement_reference: "
1786                                               "dns_rbt_deletenode: %s",
1787                                               isc_result_totext(result));
1788                         }
1789                 }
1790         } else if (dns_rbtnode_refcurrent(node) == 0) {
1791                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1792                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1793         } else
1794                 no_reference = ISC_FALSE;
1795
1796         /* Restore the lock? */
1797         if (nlock == isc_rwlocktype_read)
1798                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1799
1800         /*
1801          * Relock a read lock, or unlock the write lock if no lock was held.
1802          */
1803         if (tlock == isc_rwlocktype_none)
1804                 if (write_locked)
1805                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1806
1807         if (tlock == isc_rwlocktype_read)
1808                 if (write_locked)
1809                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1810
1811         return (no_reference);
1812 }
1813
1814 /*
1815  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1816  * case, the number of iteration is the number of tree levels, which is at
1817  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1818  * should be much smaller (only a few times), and even the worst case would be
1819  * acceptable for a single event.
1820  */
1821 static void
1822 prune_tree(isc_task_t *task, isc_event_t *event) {
1823         dns_rbtdb_t *rbtdb = event->ev_sender;
1824         dns_rbtnode_t *node = event->ev_arg;
1825         dns_rbtnode_t *parent;
1826         unsigned int locknum;
1827
1828         UNUSED(task);
1829
1830         isc_event_free(&event);
1831
1832         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1833         locknum = node->locknum;
1834         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1835         do {
1836                 parent = node->parent;
1837                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1838                                     isc_rwlocktype_write, ISC_TRUE);
1839
1840                 if (parent != NULL && parent->down == NULL) {
1841                         /*
1842                          * node was the only down child of the parent and has
1843                          * just been removed.  We'll then need to examine the
1844                          * parent.  Keep the lock if possible; otherwise,
1845                          * release the old lock and acquire one for the parent.
1846                          */
1847                         if (parent->locknum != locknum) {
1848                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1849                                             isc_rwlocktype_write);
1850                                 locknum = parent->locknum;
1851                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1852                                           isc_rwlocktype_write);
1853                         }
1854
1855                         /*
1856                          * We need to gain a reference to the node before
1857                          * decrementing it in the next iteration.  In addition,
1858                          * if the node is in the dead-nodes list, extract it
1859                          * from the list beforehand as we do in
1860                          * reactivate_node().
1861                          */
1862                         new_reference(rbtdb, parent);
1863                         if (ISC_LINK_LINKED(parent, deadlink)) {
1864                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1865                                                 parent, deadlink);
1866                         }
1867                 } else
1868                         parent = NULL;
1869
1870                 node = parent;
1871         } while (node != NULL);
1872         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1873         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1874
1875         detach((dns_db_t **)&rbtdb);
1876 }
1877
1878 static inline void
1879 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1880                    rbtdb_changedlist_t *cleanup_list)
1881 {
1882         /*
1883          * Caller must be holding the database lock.
1884          */
1885
1886         rbtdb->least_serial = version->serial;
1887         *cleanup_list = version->changed_list;
1888         ISC_LIST_INIT(version->changed_list);
1889 }
1890
1891 static inline void
1892 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1893         rbtdb_changed_t *changed, *next_changed;
1894
1895         /*
1896          * If the changed record is dirty, then
1897          * an update created multiple versions of
1898          * a given rdataset.  We keep this list
1899          * until we're the least open version, at
1900          * which point it's safe to get rid of any
1901          * older versions.
1902          *
1903          * If the changed record isn't dirty, then
1904          * we don't need it anymore since we're
1905          * committing and not rolling back.
1906          *
1907          * The caller must be holding the database lock.
1908          */
1909         for (changed = HEAD(version->changed_list);
1910              changed != NULL;
1911              changed = next_changed) {
1912                 next_changed = NEXT(changed, link);
1913                 if (!changed->dirty) {
1914                         UNLINK(version->changed_list,
1915                                changed, link);
1916                         APPEND(*cleanup_list,
1917                                changed, link);
1918                 }
1919         }
1920 }
1921
1922 static void
1923 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1924         dns_rdataset_t keyset;
1925         dns_rdataset_t nsecset, signsecset;
1926         dns_rdata_t rdata = DNS_RDATA_INIT;
1927         isc_boolean_t haszonekey = ISC_FALSE;
1928         isc_boolean_t hasnsec = ISC_FALSE;
1929         isc_boolean_t hasoptbit = ISC_FALSE;
1930         isc_boolean_t nsec3createflag = ISC_FALSE;
1931         isc_result_t result;
1932
1933         dns_rdataset_init(&keyset);
1934         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1935                                      0, 0, &keyset, NULL);
1936         if (result == ISC_R_SUCCESS) {
1937                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1938                 result = dns_rdataset_first(&keyset);
1939                 while (result == ISC_R_SUCCESS) {
1940                         dns_rdataset_current(&keyset, &keyrdata);
1941                         if (dns_zonekey_iszonekey(&keyrdata)) {
1942                                 haszonekey = ISC_TRUE;
1943                                 break;
1944                         }
1945                         result = dns_rdataset_next(&keyset);
1946                 }
1947                 dns_rdataset_disassociate(&keyset);
1948         }
1949         if (!haszonekey) {
1950                 version->secure = dns_db_insecure;
1951                 version->havensec3 = ISC_FALSE;
1952                 return;
1953         }
1954
1955         dns_rdataset_init(&nsecset);
1956         dns_rdataset_init(&signsecset);
1957         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1958                                      0, 0, &nsecset, &signsecset);
1959         if (result == ISC_R_SUCCESS) {
1960                 if (dns_rdataset_isassociated(&signsecset)) {
1961                         hasnsec = ISC_TRUE;
1962                         result = dns_rdataset_first(&nsecset);
1963                         if (result == ISC_R_SUCCESS) {
1964                                 dns_rdataset_current(&nsecset, &rdata);
1965                                 hasoptbit = dns_nsec_typepresent(&rdata,
1966                                                              dns_rdatatype_opt);
1967                         }
1968                         dns_rdataset_disassociate(&signsecset);
1969                 }
1970                 dns_rdataset_disassociate(&nsecset);
1971         }
1972
1973         setnsec3parameters(db, version, &nsec3createflag);
1974
1975         /*
1976          * Do we have a valid NSEC/NSEC3 chain?
1977          */
1978         if (version->havensec3 || (hasnsec && !hasoptbit))
1979                 version->secure = dns_db_secure;
1980         /*
1981          * Do we have a NSEC/NSEC3 chain under creation?
1982          */
1983         else if (hasoptbit || nsec3createflag)
1984                 version->secure = dns_db_partial;
1985         else
1986                 version->secure = dns_db_insecure;
1987 }
1988
1989 /*%<
1990  * Walk the origin node looking for NSEC3PARAM records.
1991  * Cache the nsec3 parameters.
1992  */
1993 static void
1994 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1995                    isc_boolean_t *nsec3createflag)
1996 {
1997         dns_rbtnode_t *node;
1998         dns_rdata_nsec3param_t nsec3param;
1999         dns_rdata_t rdata = DNS_RDATA_INIT;
2000         isc_region_t region;
2001         isc_result_t result;
2002         rdatasetheader_t *header, *header_next;
2003         unsigned char *raw;             /* RDATASLAB */
2004         unsigned int count, length;
2005         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2006
2007         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2008         version->havensec3 = ISC_FALSE;
2009         node = rbtdb->origin_node;
2010         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2011                   isc_rwlocktype_read);
2012         for (header = node->data;
2013              header != NULL;
2014              header = header_next) {
2015                 header_next = header->next;
2016                 do {
2017                         if (header->serial <= version->serial &&
2018                             !IGNORE(header)) {
2019                                 if (NONEXISTENT(header))
2020                                         header = NULL;
2021                                 break;
2022                         } else
2023                                 header = header->down;
2024                 } while (header != NULL);
2025
2026                 if (header != NULL &&
2027                     header->type == dns_rdatatype_nsec3param) {
2028                         /*
2029                          * Find A NSEC3PARAM with a supported algorithm.
2030                          */
2031                         raw = (unsigned char *)header + sizeof(*header);
2032                         count = raw[0] * 256 + raw[1]; /* count */
2033 #if DNS_RDATASET_FIXED
2034                         raw += count * 4 + 2;
2035 #else
2036                         raw += 2;
2037 #endif
2038                         while (count-- > 0U) {
2039                                 length = raw[0] * 256 + raw[1];
2040 #if DNS_RDATASET_FIXED
2041                                 raw += 4;
2042 #else
2043                                 raw += 2;
2044 #endif
2045                                 region.base = raw;
2046                                 region.length = length;
2047                                 raw += length;
2048                                 dns_rdata_fromregion(&rdata,
2049                                                      rbtdb->common.rdclass,
2050                                                      dns_rdatatype_nsec3param,
2051                                                      &region);
2052                                 result = dns_rdata_tostruct(&rdata,
2053                                                             &nsec3param,
2054                                                             NULL);
2055                                 INSIST(result == ISC_R_SUCCESS);
2056                                 dns_rdata_reset(&rdata);
2057
2058                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2059                                     !dns_nsec3_supportedhash(nsec3param.hash))
2060                                         continue;
2061
2062 #ifdef RFC5155_STRICT
2063                                 if (nsec3param.flags != 0)
2064                                         continue;
2065 #else
2066                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2067                                     != 0)
2068                                         *nsec3createflag = ISC_TRUE;
2069                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2070                                     != 0)
2071                                         continue;
2072 #endif
2073
2074                                 memcpy(version->salt, nsec3param.salt,
2075                                        nsec3param.salt_length);
2076                                 version->hash = nsec3param.hash;
2077                                 version->salt_length = nsec3param.salt_length;
2078                                 version->iterations = nsec3param.iterations;
2079                                 version->flags = nsec3param.flags;
2080                                 version->havensec3 = ISC_TRUE;
2081                                 /*
2082                                  * Look for a better algorithm than the
2083                                  * unknown test algorithm.
2084                                  */
2085                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2086                                         goto unlock;
2087                         }
2088                 }
2089         }
2090  unlock:
2091         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2092                     isc_rwlocktype_read);
2093         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2094 }
2095
2096 static void
2097 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2098         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2099         rbtdb_version_t *version, *cleanup_version, *least_greater;
2100         isc_boolean_t rollback = ISC_FALSE;
2101         rbtdb_changedlist_t cleanup_list;
2102         rdatasetheaderlist_t resigned_list;
2103         rbtdb_changed_t *changed, *next_changed;
2104         rbtdb_serial_t serial, least_serial;
2105         dns_rbtnode_t *rbtnode;
2106         unsigned int refs;
2107         rdatasetheader_t *header;
2108         isc_boolean_t writer;
2109
2110         REQUIRE(VALID_RBTDB(rbtdb));
2111         version = (rbtdb_version_t *)*versionp;
2112
2113         cleanup_version = NULL;
2114         ISC_LIST_INIT(cleanup_list);
2115         ISC_LIST_INIT(resigned_list);
2116
2117         isc_refcount_decrement(&version->references, &refs);
2118         if (refs > 0) {         /* typical and easy case first */
2119                 if (commit) {
2120                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2121                         INSIST(!version->writer);
2122                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2123                 }
2124                 goto end;
2125         }
2126
2127         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2128         serial = version->serial;
2129         writer = version->writer;
2130         if (version->writer) {
2131                 if (commit) {
2132                         unsigned cur_ref;
2133                         rbtdb_version_t *cur_version;
2134
2135                         INSIST(version->commit_ok);
2136                         INSIST(version == rbtdb->future_version);
2137                         /*
2138                          * The current version is going to be replaced.
2139                          * Release the (likely last) reference to it from the
2140                          * DB itself and unlink it from the open list.
2141                          */
2142                         cur_version = rbtdb->current_version;
2143                         isc_refcount_decrement(&cur_version->references,
2144                                                &cur_ref);
2145                         if (cur_ref == 0) {
2146                                 if (cur_version->serial == rbtdb->least_serial)
2147                                         INSIST(EMPTY(cur_version->changed_list));
2148                                 UNLINK(rbtdb->open_versions,
2149                                        cur_version, link);
2150                         }
2151                         if (EMPTY(rbtdb->open_versions)) {
2152                                 /*
2153                                  * We're going to become the least open
2154                                  * version.
2155                                  */
2156                                 make_least_version(rbtdb, version,
2157                                                    &cleanup_list);
2158                         } else {
2159                                 /*
2160                                  * Some other open version is the
2161                                  * least version.  We can't cleanup
2162                                  * records that were changed in this
2163                                  * version because the older versions
2164                                  * may still be in use by an open
2165                                  * version.
2166                                  *
2167                                  * We can, however, discard the
2168                                  * changed records for things that
2169                                  * we've added that didn't exist in
2170                                  * prior versions.
2171                                  */
2172                                 cleanup_nondirty(version, &cleanup_list);
2173                         }
2174                         /*
2175                          * If the (soon to be former) current version
2176                          * isn't being used by anyone, we can clean
2177                          * it up.
2178                          */
2179                         if (cur_ref == 0) {
2180                                 cleanup_version = cur_version;
2181                                 APPENDLIST(version->changed_list,
2182                                            cleanup_version->changed_list,
2183                                            link);
2184                         }
2185                         /*
2186                          * Become the current version.
2187                          */
2188                         version->writer = ISC_FALSE;
2189                         rbtdb->current_version = version;
2190                         rbtdb->current_serial = version->serial;
2191                         rbtdb->future_version = NULL;
2192
2193                         /*
2194                          * Keep the current version in the open list, and
2195                          * gain a reference for the DB itself (see the DB
2196                          * creation function below).  This must be the only
2197                          * case where we need to increment the counter from
2198                          * zero and need to use isc_refcount_increment0().
2199                          */
2200                         isc_refcount_increment0(&version->references,
2201                                                 &cur_ref);
2202                         INSIST(cur_ref == 1);
2203                         PREPEND(rbtdb->open_versions,
2204                                 rbtdb->current_version, link);
2205                         resigned_list = version->resigned_list;
2206                         ISC_LIST_INIT(version->resigned_list);
2207                 } else {
2208                         /*
2209                          * We're rolling back this transaction.
2210                          */
2211                         cleanup_list = version->changed_list;
2212                         ISC_LIST_INIT(version->changed_list);
2213                         resigned_list = version->resigned_list;
2214                         ISC_LIST_INIT(version->resigned_list);
2215                         rollback = ISC_TRUE;
2216                         cleanup_version = version;
2217                         rbtdb->future_version = NULL;
2218                 }
2219         } else {
2220                 if (version != rbtdb->current_version) {
2221                         /*
2222                          * There are no external or internal references
2223                          * to this version and it can be cleaned up.
2224                          */
2225                         cleanup_version = version;
2226
2227                         /*
2228                          * Find the version with the least serial
2229                          * number greater than ours.
2230                          */
2231                         least_greater = PREV(version, link);
2232                         if (least_greater == NULL)
2233                                 least_greater = rbtdb->current_version;
2234
2235                         INSIST(version->serial < least_greater->serial);
2236                         /*
2237                          * Is this the least open version?
2238                          */
2239                         if (version->serial == rbtdb->least_serial) {
2240                                 /*
2241                                  * Yes.  Install the new least open
2242                                  * version.
2243                                  */
2244                                 make_least_version(rbtdb,
2245                                                    least_greater,
2246                                                    &cleanup_list);
2247                         } else {
2248                                 /*
2249                                  * Add any unexecuted cleanups to
2250                                  * those of the least greater version.
2251                                  */
2252                                 APPENDLIST(least_greater->changed_list,
2253                                            version->changed_list,
2254                                            link);
2255                         }
2256                 } else if (version->serial == rbtdb->least_serial)
2257                         INSIST(EMPTY(version->changed_list));
2258                 UNLINK(rbtdb->open_versions, version, link);
2259         }
2260         least_serial = rbtdb->least_serial;
2261         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2262
2263         /*
2264          * Update the zone's secure status.
2265          */
2266         if (writer && commit && !IS_CACHE(rbtdb))
2267                 iszonesecure(db, version, rbtdb->origin_node);
2268
2269         if (cleanup_version != NULL) {
2270                 INSIST(EMPTY(cleanup_version->changed_list));
2271                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2272                             sizeof(*cleanup_version));
2273         }
2274
2275         /*
2276          * Commit/rollback re-signed headers.
2277          */
2278         for (header = HEAD(resigned_list);
2279              header != NULL;
2280              header = HEAD(resigned_list)) {
2281                 nodelock_t *lock;
2282
2283                 ISC_LIST_UNLINK(resigned_list, header, link);
2284
2285                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2286                 NODE_LOCK(lock, isc_rwlocktype_write);
2287                 if (rollback)
2288                         resign_insert(rbtdb, header->node->locknum, header);
2289                 decrement_reference(rbtdb, header->node, least_serial,
2290                                     isc_rwlocktype_write, isc_rwlocktype_none,
2291                                     ISC_FALSE);
2292                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2293         }
2294
2295         if (!EMPTY(cleanup_list)) {
2296                 /*
2297                  * We acquire a tree write lock here in order to make sure
2298                  * that stale nodes will be removed in decrement_reference().
2299                  * If we didn't have the lock, those nodes could miss the
2300                  * chance to be removed until the server stops.  The write lock
2301                  * is expensive, but this event should be rare enough to justify
2302                  * the cost.
2303                  */
2304                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2305                 for (changed = HEAD(cleanup_list);
2306                      changed != NULL;
2307                      changed = next_changed) {
2308                         nodelock_t *lock;
2309
2310                         next_changed = NEXT(changed, link);
2311                         rbtnode = changed->node;
2312                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2313
2314                         NODE_LOCK(lock, isc_rwlocktype_write);
2315                         /*
2316                          * This is a good opportunity to purge any dead nodes,
2317                          * so use it.
2318                          */
2319                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2320
2321                         if (rollback)
2322                                 rollback_node(rbtnode, serial);
2323                         decrement_reference(rbtdb, rbtnode, least_serial,
2324                                             isc_rwlocktype_write,
2325                                             isc_rwlocktype_write, ISC_FALSE);
2326
2327                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2328
2329                         isc_mem_put(rbtdb->common.mctx, changed,
2330                                     sizeof(*changed));
2331                 }
2332                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2333         }
2334
2335  end:
2336         *versionp = NULL;
2337 }
2338
2339 /*
2340  * Add the necessary magic for the wildcard name 'name'
2341  * to be found in 'rbtdb'.
2342  *
2343  * In order for wildcard matching to work correctly in
2344  * zone_find(), we must ensure that a node for the wildcarding
2345  * level exists in the database, and has its 'find_callback'
2346  * and 'wild' bits set.
2347  *
2348  * E.g. if the wildcard name is "*.sub.example." then we
2349  * must ensure that "sub.example." exists and is marked as
2350  * a wildcard level.
2351  */
2352 static isc_result_t
2353 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2354         isc_result_t result;
2355         dns_name_t foundname;
2356         dns_offsets_t offsets;
2357         unsigned int n;
2358         dns_rbtnode_t *node = NULL;
2359
2360         dns_name_init(&foundname, offsets);
2361         n = dns_name_countlabels(name);
2362         INSIST(n >= 2);
2363         n--;
2364         dns_name_getlabelsequence(name, 1, n, &foundname);
2365         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2366         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2367                 return (result);
2368         node->nsec3 = 0;
2369         node->find_callback = 1;
2370         node->wild = 1;
2371         return (ISC_R_SUCCESS);
2372 }
2373
2374 static isc_result_t
2375 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2376         isc_result_t result;
2377         dns_name_t foundname;
2378         dns_offsets_t offsets;
2379         unsigned int n, l, i;
2380
2381         dns_name_init(&foundname, offsets);
2382         n = dns_name_countlabels(name);
2383         l = dns_name_countlabels(&rbtdb->common.origin);
2384         i = l + 1;
2385         while (i < n) {
2386                 dns_rbtnode_t *node = NULL;     /* dummy */
2387                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2388                 if (dns_name_iswildcard(&foundname)) {
2389                         result = add_wildcard_magic(rbtdb, &foundname);
2390                         if (result != ISC_R_SUCCESS)
2391                                 return (result);
2392                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2393                                                  &node);
2394                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2395                                 return (result);
2396                         node->nsec3 = 0;
2397                 }
2398                 i++;
2399         }
2400         return (ISC_R_SUCCESS);
2401 }
2402
2403 static isc_result_t
2404 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2405          dns_dbnode_t **nodep)
2406 {
2407         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2408         dns_rbtnode_t *node = NULL;
2409         dns_name_t nodename;
2410         isc_result_t result;
2411         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2412
2413         REQUIRE(VALID_RBTDB(rbtdb));
2414
2415         dns_name_init(&nodename, NULL);
2416         RWLOCK(&rbtdb->tree_lock, locktype);
2417         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2418                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2419         if (result != ISC_R_SUCCESS) {
2420                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2421                 if (!create) {
2422                         if (result == DNS_R_PARTIALMATCH)
2423                                 result = ISC_R_NOTFOUND;
2424                         return (result);
2425                 }
2426                 /*
2427                  * It would be nice to try to upgrade the lock instead of
2428                  * unlocking then relocking.
2429                  */
2430                 locktype = isc_rwlocktype_write;
2431                 RWLOCK(&rbtdb->tree_lock, locktype);
2432                 node = NULL;
2433                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2434                 if (result == ISC_R_SUCCESS) {
2435                         dns_rbt_namefromnode(node, &nodename);
2436 #ifdef DNS_RBT_USEHASH
2437                         node->locknum = node->hashval % rbtdb->node_lock_count;
2438 #else
2439                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2440                                 rbtdb->node_lock_count;
2441 #endif
2442                         node->nsec3 = 0;
2443                         add_empty_wildcards(rbtdb, name);
2444
2445                         if (dns_name_iswildcard(name)) {
2446                                 result = add_wildcard_magic(rbtdb, name);
2447                                 if (result != ISC_R_SUCCESS) {
2448                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2449                                         return (result);
2450                                 }
2451                         }
2452                 } else if (result != ISC_R_EXISTS) {
2453                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2454                         return (result);
2455                 }
2456         }
2457         reactivate_node(rbtdb, node, locktype);
2458         RWUNLOCK(&rbtdb->tree_lock, locktype);
2459
2460         *nodep = (dns_dbnode_t *)node;
2461
2462         return (ISC_R_SUCCESS);
2463 }
2464
2465 static isc_result_t
2466 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2467               dns_dbnode_t **nodep)
2468 {
2469         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2470         dns_rbtnode_t *node = NULL;
2471         dns_name_t nodename;
2472         isc_result_t result;
2473         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2474
2475         REQUIRE(VALID_RBTDB(rbtdb));
2476
2477         dns_name_init(&nodename, NULL);
2478         RWLOCK(&rbtdb->tree_lock, locktype);
2479         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2480                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2481         if (result != ISC_R_SUCCESS) {
2482                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2483                 if (!create) {
2484                         if (result == DNS_R_PARTIALMATCH)
2485                                 result = ISC_R_NOTFOUND;
2486                         return (result);
2487                 }
2488                 /*
2489                  * It would be nice to try to upgrade the lock instead of
2490                  * unlocking then relocking.
2491                  */
2492                 locktype = isc_rwlocktype_write;
2493                 RWLOCK(&rbtdb->tree_lock, locktype);
2494                 node = NULL;
2495                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2496                 if (result == ISC_R_SUCCESS) {
2497                         dns_rbt_namefromnode(node, &nodename);
2498 #ifdef DNS_RBT_USEHASH
2499                         node->locknum = node->hashval % rbtdb->node_lock_count;
2500 #else
2501                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2502                                 rbtdb->node_lock_count;
2503 #endif
2504                         node->nsec3 = 1U;
2505                 } else if (result != ISC_R_EXISTS) {
2506                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2507                         return (result);
2508                 }
2509         } else
2510                 INSIST(node->nsec3);
2511         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2512         new_reference(rbtdb, node);
2513         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2514         RWUNLOCK(&rbtdb->tree_lock, locktype);
2515
2516         *nodep = (dns_dbnode_t *)node;
2517
2518         return (ISC_R_SUCCESS);
2519 }
2520
2521 static isc_result_t
2522 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2523         rbtdb_search_t *search = arg;
2524         rdatasetheader_t *header, *header_next;
2525         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2526         rdatasetheader_t *found;
2527         isc_result_t result;
2528         dns_rbtnode_t *onode;
2529
2530         /*
2531          * We only want to remember the topmost zone cut, since it's the one
2532          * that counts, so we'll just continue if we've already found a
2533          * zonecut.
2534          */
2535         if (search->zonecut != NULL)
2536                 return (DNS_R_CONTINUE);
2537
2538         found = NULL;
2539         result = DNS_R_CONTINUE;
2540         onode = search->rbtdb->origin_node;
2541
2542         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2543                   isc_rwlocktype_read);
2544
2545         /*
2546          * Look for an NS or DNAME rdataset active in our version.
2547          */
2548         ns_header = NULL;
2549         dname_header = NULL;
2550         sigdname_header = NULL;
2551         for (header = node->data; header != NULL; header = header_next) {
2552                 header_next = header->next;
2553                 if (header->type == dns_rdatatype_ns ||
2554                     header->type == dns_rdatatype_dname ||
2555                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2556                         do {
2557                                 if (header->serial <= search->serial &&
2558                                     !IGNORE(header)) {
2559                                         /*
2560                                          * Is this a "this rdataset doesn't
2561                                          * exist" record?
2562                                          */
2563                                         if (NONEXISTENT(header))
2564                                                 header = NULL;
2565                                         break;
2566                                 } else
2567                                         header = header->down;
2568                         } while (header != NULL);
2569                         if (header != NULL) {
2570                                 if (header->type == dns_rdatatype_dname)
2571                                         dname_header = header;
2572                                 else if (header->type ==
2573                                            RBTDB_RDATATYPE_SIGDNAME)
2574                                         sigdname_header = header;
2575                                 else if (node != onode ||
2576                                          IS_STUB(search->rbtdb)) {
2577                                         /*
2578                                          * We've found an NS rdataset that
2579                                          * isn't at the origin node.  We check
2580                                          * that they're not at the origin node,
2581                                          * because otherwise we'd erroneously
2582                                          * treat the zone top as if it were
2583                                          * a delegation.
2584                                          */
2585                                         ns_header = header;
2586                                 }
2587                         }
2588                 }
2589         }
2590
2591         /*
2592          * Did we find anything?
2593          */
2594         if (dname_header != NULL) {
2595                 /*
2596                  * Note that DNAME has precedence over NS if both exist.
2597                  */
2598                 found = dname_header;
2599                 search->zonecut_sigrdataset = sigdname_header;
2600         } else if (ns_header != NULL) {
2601                 found = ns_header;
2602                 search->zonecut_sigrdataset = NULL;
2603         }
2604
2605         if (found != NULL) {
2606                 /*
2607                  * We increment the reference count on node to ensure that
2608                  * search->zonecut_rdataset will still be valid later.
2609                  */
2610                 new_reference(search->rbtdb, node);
2611                 search->zonecut = node;
2612                 search->zonecut_rdataset = found;
2613                 search->need_cleanup = ISC_TRUE;
2614                 /*
2615                  * Since we've found a zonecut, anything beneath it is
2616                  * glue and is not subject to wildcard matching, so we
2617                  * may clear search->wild.
2618                  */
2619                 search->wild = ISC_FALSE;
2620                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2621                         /*
2622                          * If the caller does not want to find glue, then
2623                          * this is the best answer and the search should
2624                          * stop now.
2625                          */
2626                         result = DNS_R_PARTIALMATCH;
2627                 } else {
2628                         dns_name_t *zcname;
2629
2630                         /*
2631                          * The search will continue beneath the zone cut.
2632                          * This may or may not be the best match.  In case it
2633                          * is, we need to remember the node name.
2634                          */
2635                         zcname = dns_fixedname_name(&search->zonecut_name);
2636                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2637                                       ISC_R_SUCCESS);
2638                         search->copy_name = ISC_TRUE;
2639                 }
2640         } else {
2641                 /*
2642                  * There is no zonecut at this node which is active in this
2643                  * version.
2644                  *
2645                  * If this is a "wild" node and the caller hasn't disabled
2646                  * wildcard matching, remember that we've seen a wild node
2647                  * in case we need to go searching for wildcard matches
2648                  * later on.
2649                  */
2650                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2651                         search->wild = ISC_TRUE;
2652         }
2653
2654         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2655                     isc_rwlocktype_read);
2656
2657         return (result);
2658 }
2659
2660 static inline void
2661 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2662               rdatasetheader_t *header, isc_stdtime_t now,
2663               dns_rdataset_t *rdataset)
2664 {
2665         unsigned char *raw;     /* RDATASLAB */
2666
2667         /*
2668          * Caller must be holding the node reader lock.
2669          * XXXJT: technically, we need a writer lock, since we'll increment
2670          * the header count below.  However, since the actual counter value
2671          * doesn't matter, we prioritize performance here.  (We may want to
2672          * use atomic increment when available).
2673          */
2674
2675         if (rdataset == NULL)
2676                 return;
2677
2678         new_reference(rbtdb, node);
2679
2680         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2681
2682         rdataset->methods = &rdataset_methods;
2683         rdataset->rdclass = rbtdb->common.rdclass;
2684         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2685         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2686         rdataset->ttl = header->rdh_ttl - now;
2687         rdataset->trust = header->trust;
2688         if (NXDOMAIN(header))
2689                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2690         if (OPTOUT(header))
2691                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2692         rdataset->private1 = rbtdb;
2693         rdataset->private2 = node;
2694         raw = (unsigned char *)header + sizeof(*header);
2695         rdataset->private3 = raw;
2696         rdataset->count = header->count++;
2697         if (rdataset->count == ISC_UINT32_MAX)
2698                 rdataset->count = 0;
2699
2700         /*
2701          * Reset iterator state.
2702          */
2703         rdataset->privateuint4 = 0;
2704         rdataset->private5 = NULL;
2705
2706         /*
2707          * Add noqname proof.
2708          */
2709         rdataset->private6 = header->noqname;
2710         if (rdataset->private6 != NULL)
2711                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2712         rdataset->private7 = header->closest;
2713         if (rdataset->private7 != NULL)
2714                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2715
2716         /*
2717          * Copy out re-signing information.
2718          */
2719         if (RESIGN(header)) {
2720                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2721                 rdataset->resign = header->resign;
2722         } else
2723                 rdataset->resign = 0;
2724 }
2725
2726 static inline isc_result_t
2727 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2728                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2729                  dns_rdataset_t *sigrdataset)
2730 {
2731         isc_result_t result;
2732         dns_name_t *zcname;
2733         rbtdb_rdatatype_t type;
2734         dns_rbtnode_t *node;
2735
2736         /*
2737          * The caller MUST NOT be holding any node locks.
2738          */
2739
2740         node = search->zonecut;
2741         type = search->zonecut_rdataset->type;
2742
2743         /*
2744          * If we have to set foundname, we do it before anything else.
2745          * If we were to set foundname after we had set nodep or bound the
2746          * rdataset, then we'd have to undo that work if dns_name_copy()
2747          * failed.  By setting foundname first, there's nothing to undo if
2748          * we have trouble.
2749          */
2750         if (foundname != NULL && search->copy_name) {
2751                 zcname = dns_fixedname_name(&search->zonecut_name);
2752                 result = dns_name_copy(zcname, foundname, NULL);
2753                 if (result != ISC_R_SUCCESS)
2754                         return (result);
2755         }
2756         if (nodep != NULL) {
2757                 /*
2758                  * Note that we don't have to increment the node's reference
2759                  * count here because we're going to use the reference we
2760                  * already have in the search block.
2761                  */
2762                 *nodep = node;
2763                 search->need_cleanup = ISC_FALSE;
2764         }
2765         if (rdataset != NULL) {
2766                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2767                           isc_rwlocktype_read);
2768                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2769                               search->now, rdataset);
2770                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2771                         bind_rdataset(search->rbtdb, node,
2772                                       search->zonecut_sigrdataset,
2773                                       search->now, sigrdataset);
2774                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2775                             isc_rwlocktype_read);
2776         }
2777
2778         if (type == dns_rdatatype_dname)
2779                 return (DNS_R_DNAME);
2780         return (DNS_R_DELEGATION);
2781 }
2782
2783 static inline isc_boolean_t
2784 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2785            dns_rbtnode_t *node)
2786 {
2787         unsigned char *raw;     /* RDATASLAB */
2788         unsigned int count, size;
2789         dns_name_t ns_name;
2790         isc_boolean_t valid = ISC_FALSE;
2791         dns_offsets_t offsets;
2792         isc_region_t region;
2793         rdatasetheader_t *header;
2794
2795         /*
2796          * No additional locking is required.
2797          */
2798
2799         /*
2800          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2801          * if it occurs at a zone cut, but is not valid below it.
2802          */
2803         if (type == dns_rdatatype_ns) {
2804                 if (node != search->zonecut) {
2805                         return (ISC_FALSE);
2806                 }
2807         } else if (type != dns_rdatatype_a &&
2808                    type != dns_rdatatype_aaaa &&
2809                    type != dns_rdatatype_a6) {
2810                 return (ISC_FALSE);
2811         }
2812
2813         header = search->zonecut_rdataset;
2814         raw = (unsigned char *)header + sizeof(*header);
2815         count = raw[0] * 256 + raw[1];
2816 #if DNS_RDATASET_FIXED
2817         raw += 2 + (4 * count);
2818 #else
2819         raw += 2;
2820 #endif
2821
2822         while (count > 0) {
2823                 count--;
2824                 size = raw[0] * 256 + raw[1];
2825 #if DNS_RDATASET_FIXED
2826                 raw += 4;
2827 #else
2828                 raw += 2;
2829 #endif
2830                 region.base = raw;
2831                 region.length = size;
2832                 raw += size;
2833                 /*
2834                  * XXX Until we have rdata structures, we have no choice but
2835                  * to directly access the rdata format.
2836                  */
2837                 dns_name_init(&ns_name, offsets);
2838                 dns_name_fromregion(&ns_name, &region);
2839                 if (dns_name_compare(&ns_name, name) == 0) {
2840                         valid = ISC_TRUE;
2841                         break;
2842                 }
2843         }
2844
2845         return (valid);
2846 }
2847
2848 static inline isc_boolean_t
2849 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2850             dns_name_t *name)
2851 {
2852         dns_fixedname_t fnext;
2853         dns_fixedname_t forigin;
2854         dns_name_t *next;
2855         dns_name_t *origin;
2856         dns_name_t prefix;
2857         dns_rbtdb_t *rbtdb;
2858         dns_rbtnode_t *node;
2859         isc_result_t result;
2860         isc_boolean_t answer = ISC_FALSE;
2861         rdatasetheader_t *header;
2862
2863         rbtdb = search->rbtdb;
2864
2865         dns_name_init(&prefix, NULL);
2866         dns_fixedname_init(&fnext);
2867         next = dns_fixedname_name(&fnext);
2868         dns_fixedname_init(&forigin);
2869         origin = dns_fixedname_name(&forigin);
2870
2871         result = dns_rbtnodechain_next(chain, NULL, NULL);
2872         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2873                 node = NULL;
2874                 result = dns_rbtnodechain_current(chain, &prefix,
2875                                                   origin, &node);
2876                 if (result != ISC_R_SUCCESS)
2877                         break;
2878                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2879                           isc_rwlocktype_read);
2880                 for (header = node->data;
2881                      header != NULL;
2882                      header = header->next) {
2883                         if (header->serial <= search->serial &&
2884                             !IGNORE(header) && EXISTS(header))
2885                                 break;
2886                 }
2887                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2888                             isc_rwlocktype_read);
2889                 if (header != NULL)
2890                         break;
2891                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2892         }
2893         if (result == ISC_R_SUCCESS)
2894                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2895         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2896                 answer = ISC_TRUE;
2897         return (answer);
2898 }
2899
2900 static inline isc_boolean_t
2901 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2902         dns_fixedname_t fnext;
2903         dns_fixedname_t forigin;
2904         dns_fixedname_t fprev;
2905         dns_name_t *next;
2906         dns_name_t *origin;
2907         dns_name_t *prev;
2908         dns_name_t name;
2909         dns_name_t rname;
2910         dns_name_t tname;
2911         dns_rbtdb_t *rbtdb;
2912         dns_rbtnode_t *node;
2913         dns_rbtnodechain_t chain;
2914         isc_boolean_t check_next = ISC_TRUE;
2915         isc_boolean_t check_prev = ISC_TRUE;
2916         isc_boolean_t answer = ISC_FALSE;
2917         isc_result_t result;
2918         rdatasetheader_t *header;
2919         unsigned int n;
2920
2921         rbtdb = search->rbtdb;
2922
2923         dns_name_init(&name, NULL);
2924         dns_name_init(&tname, NULL);
2925         dns_name_init(&rname, NULL);
2926         dns_fixedname_init(&fnext);
2927         next = dns_fixedname_name(&fnext);
2928         dns_fixedname_init(&fprev);
2929         prev = dns_fixedname_name(&fprev);
2930         dns_fixedname_init(&forigin);
2931         origin = dns_fixedname_name(&forigin);
2932
2933         /*
2934          * Find if qname is at or below a empty node.
2935          * Use our own copy of the chain.
2936          */
2937
2938         chain = search->chain;
2939         do {
2940                 node = NULL;
2941                 result = dns_rbtnodechain_current(&chain, &name,
2942                                                   origin, &node);
2943                 if (result != ISC_R_SUCCESS)
2944                         break;
2945                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2946                           isc_rwlocktype_read);
2947                 for (header = node->data;
2948                      header != NULL;
2949                      header = header->next) {
2950                         if (header->serial <= search->serial &&
2951                             !IGNORE(header) && EXISTS(header))
2952                                 break;
2953                 }
2954                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2955                             isc_rwlocktype_read);
2956                 if (header != NULL)
2957                         break;
2958                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2959         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2960         if (result == ISC_R_SUCCESS)
2961                 result = dns_name_concatenate(&name, origin, prev, NULL);
2962         if (result != ISC_R_SUCCESS)
2963                 check_prev = ISC_FALSE;
2964
2965         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2966         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2967                 node = NULL;
2968                 result = dns_rbtnodechain_current(&chain, &name,
2969                                                   origin, &node);
2970                 if (result != ISC_R_SUCCESS)
2971                         break;
2972                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2973                           isc_rwlocktype_read);
2974                 for (header = node->data;
2975                      header != NULL;
2976                      header = header->next) {
2977                         if (header->serial <= search->serial &&
2978                             !IGNORE(header) && EXISTS(header))
2979                                 break;
2980                 }
2981                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2982                             isc_rwlocktype_read);
2983                 if (header != NULL)
2984                         break;
2985                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2986         }
2987         if (result == ISC_R_SUCCESS)
2988                 result = dns_name_concatenate(&name, origin, next, NULL);
2989         if (result != ISC_R_SUCCESS)
2990                 check_next = ISC_FALSE;
2991
2992         dns_name_clone(qname, &rname);
2993
2994         /*
2995          * Remove the wildcard label to find the terminal name.
2996          */
2997         n = dns_name_countlabels(wname);
2998         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2999
3000         do {
3001                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3002                     (check_next && dns_name_issubdomain(next, &rname))) {
3003                         answer = ISC_TRUE;
3004                         break;
3005                 }
3006                 /*
3007                  * Remove the left hand label.
3008                  */
3009                 n = dns_name_countlabels(&rname);
3010                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3011         } while (!dns_name_equal(&rname, &tname));
3012         return (answer);
3013 }
3014
3015 static inline isc_result_t
3016 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3017               dns_name_t *qname)
3018 {
3019         unsigned int i, j;
3020         dns_rbtnode_t *node, *level_node, *wnode;
3021         rdatasetheader_t *header;
3022         isc_result_t result = ISC_R_NOTFOUND;
3023         dns_name_t name;
3024         dns_name_t *wname;
3025         dns_fixedname_t fwname;
3026         dns_rbtdb_t *rbtdb;
3027         isc_boolean_t done, wild, active;
3028         dns_rbtnodechain_t wchain;
3029
3030         /*
3031          * Caller must be holding the tree lock and MUST NOT be holding
3032          * any node locks.
3033          */
3034
3035         /*
3036          * Examine each ancestor level.  If the level's wild bit
3037          * is set, then construct the corresponding wildcard name and
3038          * search for it.  If the wildcard node exists, and is active in
3039          * this version, we're done.  If not, then we next check to see
3040          * if the ancestor is active in this version.  If so, then there
3041          * can be no possible wildcard match and again we're done.  If not,
3042          * continue the search.
3043          */
3044
3045         rbtdb = search->rbtdb;
3046         i = search->chain.level_matches;
3047         done = ISC_FALSE;
3048         node = *nodep;
3049         do {
3050                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3051                           isc_rwlocktype_read);
3052
3053                 /*
3054                  * First we try to figure out if this node is active in
3055                  * the search's version.  We do this now, even though we
3056                  * may not need the information, because it simplifies the
3057                  * locking and code flow.
3058                  */
3059                 for (header = node->data;
3060                      header != NULL;
3061                      header = header->next) {
3062                         if (header->serial <= search->serial &&
3063                             !IGNORE(header) && EXISTS(header))
3064                                 break;
3065                 }
3066                 if (header != NULL)
3067                         active = ISC_TRUE;
3068                 else
3069                         active = ISC_FALSE;
3070
3071                 if (node->wild)
3072                         wild = ISC_TRUE;
3073                 else
3074                         wild = ISC_FALSE;
3075
3076                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3077                             isc_rwlocktype_read);
3078
3079                 if (wild) {
3080                         /*
3081                          * Construct the wildcard name for this level.
3082                          */
3083                         dns_name_init(&name, NULL);
3084                         dns_rbt_namefromnode(node, &name);
3085                         dns_fixedname_init(&fwname);
3086                         wname = dns_fixedname_name(&fwname);
3087                         result = dns_name_concatenate(dns_wildcardname, &name,
3088                                                       wname, NULL);
3089                         j = i;
3090                         while (result == ISC_R_SUCCESS && j != 0) {
3091                                 j--;
3092                                 level_node = search->chain.levels[j];
3093                                 dns_name_init(&name, NULL);
3094                                 dns_rbt_namefromnode(level_node, &name);
3095                                 result = dns_name_concatenate(wname,
3096                                                               &name,
3097                                                               wname,
3098                                                               NULL);
3099                         }
3100                         if (result != ISC_R_SUCCESS)
3101                                 break;
3102
3103                         wnode = NULL;
3104                         dns_rbtnodechain_init(&wchain, NULL);
3105                         result = dns_rbt_findnode(rbtdb->tree, wname,
3106                                                   NULL, &wnode, &wchain,
3107                                                   DNS_RBTFIND_EMPTYDATA,
3108                                                   NULL, NULL);
3109                         if (result == ISC_R_SUCCESS) {
3110                                 nodelock_t *lock;
3111
3112                                 /*
3113                                  * We have found the wildcard node.  If it
3114                                  * is active in the search's version, we're
3115                                  * done.
3116                                  */
3117                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3118                                 NODE_LOCK(lock, isc_rwlocktype_read);
3119                                 for (header = wnode->data;
3120                                      header != NULL;
3121                                      header = header->next) {
3122                                         if (header->serial <= search->serial &&
3123                                             !IGNORE(header) && EXISTS(header))
3124                                                 break;
3125                                 }
3126                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3127                                 if (header != NULL ||
3128                                     activeempty(search, &wchain, wname)) {
3129                                         if (activeemtpynode(search, qname,
3130                                                             wname)) {
3131                                                 return (ISC_R_NOTFOUND);
3132                                         }
3133                                         /*
3134                                          * The wildcard node is active!
3135                                          *
3136                                          * Note: result is still ISC_R_SUCCESS
3137                                          * so we don't have to set it.
3138                                          */
3139                                         *nodep = wnode;
3140                                         break;
3141                                 }
3142                         } else if (result != ISC_R_NOTFOUND &&
3143                                    result != DNS_R_PARTIALMATCH) {
3144                                 /*
3145                                  * An error has occurred.  Bail out.
3146                                  */
3147                                 break;
3148                         }
3149                 }
3150
3151                 if (active) {
3152                         /*
3153                          * The level node is active.  Any wildcarding
3154                          * present at higher levels has no
3155                          * effect and we're done.
3156                          */
3157                         result = ISC_R_NOTFOUND;
3158                         break;
3159                 }
3160
3161                 if (i > 0) {
3162                         i--;
3163                         node = search->chain.levels[i];
3164                 } else
3165                         done = ISC_TRUE;
3166         } while (!done);
3167
3168         return (result);
3169 }
3170
3171 static isc_boolean_t
3172 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3173 {
3174         dns_rdata_t rdata = DNS_RDATA_INIT;
3175         dns_rdata_nsec3_t nsec3;
3176         unsigned char *raw;                     /* RDATASLAB */
3177         unsigned int rdlen, count;
3178         isc_region_t region;
3179         isc_result_t result;
3180
3181         REQUIRE(header->type == dns_rdatatype_nsec3);
3182
3183         raw = (unsigned char *)header + sizeof(*header);
3184         count = raw[0] * 256 + raw[1]; /* count */
3185 #if DNS_RDATASET_FIXED
3186         raw += count * 4 + 2;
3187 #else
3188         raw += 2;
3189 #endif
3190         while (count-- > 0) {
3191                 rdlen = raw[0] * 256 + raw[1];
3192 #if DNS_RDATASET_FIXED
3193                 raw += 4;
3194 #else
3195                 raw += 2;
3196 #endif
3197                 region.base = raw;
3198                 region.length = rdlen;
3199                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3200                                      dns_rdatatype_nsec3, &region);
3201                 raw += rdlen;
3202                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3203                 INSIST(result == ISC_R_SUCCESS);
3204                 if (nsec3.hash == search->rbtversion->hash &&
3205                     nsec3.iterations == search->rbtversion->iterations &&
3206                     nsec3.salt_length == search->rbtversion->salt_length &&
3207                     memcmp(nsec3.salt, search->rbtversion->salt,
3208                            nsec3.salt_length) == 0)
3209                         return (ISC_TRUE);
3210                 dns_rdata_reset(&rdata);
3211         }
3212         return (ISC_FALSE);
3213 }
3214
3215 static inline isc_result_t
3216 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3217                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3218                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3219                   dns_db_secure_t secure)
3220 {
3221         dns_rbtnode_t *node;
3222         rdatasetheader_t *header, *header_next, *found, *foundsig;
3223         isc_boolean_t empty_node;
3224         isc_result_t result;
3225         dns_fixedname_t fname, forigin;
3226         dns_name_t *name, *origin;
3227         dns_rdatatype_t type;
3228         rbtdb_rdatatype_t sigtype;
3229         isc_boolean_t wraps;
3230         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3231
3232         if (tree == search->rbtdb->nsec3) {
3233                 type = dns_rdatatype_nsec3;
3234                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3235                 wraps = ISC_TRUE;
3236         } else {
3237                 type = dns_rdatatype_nsec;
3238                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3239                 wraps = ISC_FALSE;
3240         }
3241
3242  again:
3243         do {
3244                 node = NULL;
3245                 dns_fixedname_init(&fname);
3246                 name = dns_fixedname_name(&fname);
3247                 dns_fixedname_init(&forigin);
3248                 origin = dns_fixedname_name(&forigin);
3249                 result = dns_rbtnodechain_current(&search->chain, name,
3250                                                   origin, &node);
3251                 if (result != ISC_R_SUCCESS)
3252                         return (result);
3253                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3254                           isc_rwlocktype_read);
3255                 found = NULL;
3256                 foundsig = NULL;
3257                 empty_node = ISC_TRUE;
3258                 for (header = node->data;
3259                      header != NULL;
3260                      header = header_next) {
3261                         header_next = header->next;
3262                         /*
3263                          * Look for an active, extant NSEC or RRSIG NSEC.
3264                          */
3265                         do {
3266                                 if (header->serial <= search->serial &&
3267                                     !IGNORE(header)) {
3268                                         /*
3269                                          * Is this a "this rdataset doesn't
3270                                          * exist" record?
3271                                          */
3272                                         if (NONEXISTENT(header))
3273                                                 header = NULL;
3274                                         break;
3275                                 } else
3276                                         header = header->down;
3277                         } while (header != NULL);
3278                         if (header != NULL) {
3279                                 /*
3280                                  * We now know that there is at least one
3281                                  * active rdataset at this node.
3282                                  */
3283                                 empty_node = ISC_FALSE;
3284                                 if (header->type == type) {
3285                                         found = header;
3286                                         if (foundsig != NULL)
3287                                                 break;
3288                                 } else if (header->type == sigtype) {
3289                                         foundsig = header;
3290                                         if (found != NULL)
3291                                                 break;
3292                                 }
3293                         }
3294                 }
3295                 if (!empty_node) {
3296                         if (found != NULL && search->rbtversion->havensec3 &&
3297                             found->type == dns_rdatatype_nsec3 &&
3298                             !matchparams(found, search)) {
3299                                 empty_node = ISC_TRUE;
3300                                 found = NULL;
3301                                 foundsig = NULL;
3302                                 result = dns_rbtnodechain_prev(&search->chain,
3303                                                                NULL, NULL);
3304                         } else if (found != NULL &&
3305                                    (foundsig != NULL || !need_sig))
3306                         {
3307                                 /*
3308                                  * We've found the right NSEC/NSEC3 record.
3309                                  *
3310                                  * Note: for this to really be the right
3311                                  * NSEC record, it's essential that the NSEC
3312                                  * records of any nodes obscured by a zone
3313                                  * cut have been removed; we assume this is
3314                                  * the case.
3315                                  */
3316                                 result = dns_name_concatenate(name, origin,
3317                                                               foundname, NULL);
3318                                 if (result == ISC_R_SUCCESS) {
3319                                         if (nodep != NULL) {
3320                                                 new_reference(search->rbtdb,
3321                                                               node);
3322                                                 *nodep = node;
3323                                         }
3324                                         bind_rdataset(search->rbtdb, node,
3325                                                       found, search->now,
3326                                                       rdataset);
3327                                         if (foundsig != NULL)
3328                                                 bind_rdataset(search->rbtdb,
3329                                                               node,
3330                                                               foundsig,
3331                                                               search->now,
3332                                                               sigrdataset);
3333                                 }
3334                         } else if (found == NULL && foundsig == NULL) {
3335                                 /*
3336                                  * This node is active, but has no NSEC or
3337                                  * RRSIG NSEC.  That means it's glue or
3338                                  * other obscured zone data that isn't
3339                                  * relevant for our search.  Treat the
3340                                  * node as if it were empty and keep looking.
3341                                  */
3342                                 empty_node = ISC_TRUE;
3343                                 result = dns_rbtnodechain_prev(&search->chain,
3344                                                                NULL, NULL);
3345                         } else {
3346                                 /*
3347                                  * We found an active node, but either the
3348                                  * NSEC or the RRSIG NSEC is missing.  This
3349                                  * shouldn't happen.
3350                                  */
3351                                 result = DNS_R_BADDB;
3352                         }
3353                 } else {
3354                         /*
3355                          * This node isn't active.  We've got to keep
3356                          * looking.
3357                          */
3358                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3359                                                        NULL);
3360                 }
3361                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3362                             isc_rwlocktype_read);
3363         } while (empty_node && result == ISC_R_SUCCESS);
3364
3365         if (result == ISC_R_NOMORE && wraps) {
3366                 result = dns_rbtnodechain_last(&search->chain, tree,
3367                                                NULL, NULL);
3368                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3369                         wraps = ISC_FALSE;
3370                         goto again;
3371                 }
3372         }
3373
3374         /*
3375          * If the result is ISC_R_NOMORE, then we got to the beginning of
3376          * the database and didn't find a NSEC record.  This shouldn't
3377          * happen.
3378          */
3379         if (result == ISC_R_NOMORE)
3380                 result = DNS_R_BADDB;
3381
3382         return (result);
3383 }
3384
3385 static isc_result_t
3386 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3387           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3388           dns_dbnode_t **nodep, dns_name_t *foundname,
3389           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3390 {
3391         dns_rbtnode_t *node = NULL;
3392         isc_result_t result;
3393         rbtdb_search_t search;
3394         isc_boolean_t cname_ok = ISC_TRUE;
3395         isc_boolean_t close_version = ISC_FALSE;
3396         isc_boolean_t maybe_zonecut = ISC_FALSE;
3397         isc_boolean_t at_zonecut = ISC_FALSE;
3398         isc_boolean_t wild;
3399         isc_boolean_t empty_node;
3400         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3401         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3402         rbtdb_rdatatype_t sigtype;
3403         isc_boolean_t active;
3404         dns_rbtnodechain_t chain;
3405         nodelock_t *lock;
3406         dns_rbt_t *tree;
3407
3408         search.rbtdb = (dns_rbtdb_t *)db;
3409
3410         REQUIRE(VALID_RBTDB(search.rbtdb));
3411
3412         /*
3413          * We don't care about 'now'.
3414          */
3415         UNUSED(now);
3416
3417         /*
3418          * If the caller didn't supply a version, attach to the current
3419          * version.
3420          */
3421         if (version == NULL) {
3422                 currentversion(db, &version);
3423                 close_version = ISC_TRUE;
3424         }
3425
3426         search.rbtversion = version;
3427         search.serial = search.rbtversion->serial;
3428         search.options = options;
3429         search.copy_name = ISC_FALSE;
3430         search.need_cleanup = ISC_FALSE;
3431         search.wild = ISC_FALSE;
3432         search.zonecut = NULL;
3433         dns_fixedname_init(&search.zonecut_name);
3434         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3435         search.now = 0;
3436
3437         /*
3438          * 'wild' will be true iff. we've matched a wildcard.
3439          */
3440         wild = ISC_FALSE;
3441
3442         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3443
3444         /*
3445          * Search down from the root of the tree.  If, while going down, we
3446          * encounter a callback node, zone_zonecut_callback() will search the
3447          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3448          */
3449         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3450                                                          search.rbtdb->tree;
3451         result = dns_rbt_findnode(tree, name, foundname, &node,
3452                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3453                                   zone_zonecut_callback, &search);
3454
3455         if (result == DNS_R_PARTIALMATCH) {
3456         partial_match:
3457                 if (search.zonecut != NULL) {
3458                     result = setup_delegation(&search, nodep, foundname,
3459                                               rdataset, sigrdataset);
3460                     goto tree_exit;
3461                 }
3462
3463                 if (search.wild) {
3464                         /*
3465                          * At least one of the levels in the search chain
3466                          * potentially has a wildcard.  For each such level,
3467                          * we must see if there's a matching wildcard active
3468                          * in the current version.
3469                          */
3470                         result = find_wildcard(&search, &node, name);
3471                         if (result == ISC_R_SUCCESS) {
3472                                 result = dns_name_copy(name, foundname, NULL);
3473                                 if (result != ISC_R_SUCCESS)
3474                                         goto tree_exit;
3475                                 wild = ISC_TRUE;
3476                                 goto found;
3477                         }
3478                         else if (result != ISC_R_NOTFOUND)
3479                                 goto tree_exit;
3480                 }
3481
3482                 chain = search.chain;
3483                 active = activeempty(&search, &chain, name);
3484
3485                 /*
3486                  * If we're here, then the name does not exist, is not
3487                  * beneath a zonecut, and there's no matching wildcard.
3488                  */
3489                 if ((search.rbtversion->secure == dns_db_secure &&
3490                      !search.rbtversion->havensec3) ||
3491                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3492                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3493                 {
3494                         result = find_closest_nsec(&search, nodep, foundname,
3495                                                    rdataset, sigrdataset, tree,
3496                                                    search.rbtversion->secure);
3497                         if (result == ISC_R_SUCCESS)
3498                                 result = active ? DNS_R_EMPTYNAME :
3499                                                   DNS_R_NXDOMAIN;
3500                 } else
3501                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3502                 goto tree_exit;
3503         } else if (result != ISC_R_SUCCESS)
3504                 goto tree_exit;
3505
3506  found:
3507         /*
3508          * We have found a node whose name is the desired name, or we
3509          * have matched a wildcard.
3510          */
3511
3512         if (search.zonecut != NULL) {
3513                 /*
3514                  * If we're beneath a zone cut, we don't want to look for
3515                  * CNAMEs because they're not legitimate zone glue.
3516                  */
3517                 cname_ok = ISC_FALSE;
3518         } else {
3519                 /*
3520                  * The node may be a zone cut itself.  If it might be one,
3521                  * make sure we check for it later.
3522                  *
3523                  * DS records live above the zone cut in ordinary zone so
3524                  * we want to ignore any referral.
3525                  *
3526                  * Stub zones don't have anything "above" the delgation so
3527                  * we always return a referral.
3528                  */
3529                 if (node->find_callback &&
3530                     ((node != search.rbtdb->origin_node &&
3531                       !dns_rdatatype_atparent(type)) ||
3532                      IS_STUB(search.rbtdb)))
3533                         maybe_zonecut = ISC_TRUE;
3534         }
3535
3536         /*
3537          * Certain DNSSEC types are not subject to CNAME matching
3538          * (RFC4035, section 2.5 and RFC3007).
3539          *
3540          * We don't check for RRSIG, because we don't store RRSIG records
3541          * directly.
3542          */
3543         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3544                 cname_ok = ISC_FALSE;
3545
3546         /*
3547          * We now go looking for rdata...
3548          */
3549
3550         lock = &search.rbtdb->node_locks[node->locknum].lock;
3551         NODE_LOCK(lock, isc_rwlocktype_read);
3552
3553         found = NULL;
3554         foundsig = NULL;
3555         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3556         nsecheader = NULL;
3557         nsecsig = NULL;
3558         cnamesig = NULL;
3559         empty_node = ISC_TRUE;
3560         for (header = node->data; header != NULL; header = header_next) {
3561                 header_next = header->next;
3562                 /*
3563                  * Look for an active, extant rdataset.
3564                  */
3565                 do {
3566                         if (header->serial <= search.serial &&
3567                             !IGNORE(header)) {
3568                                 /*
3569                                  * Is this a "this rdataset doesn't
3570                                  * exist" record?
3571                                  */
3572                                 if (NONEXISTENT(header))
3573                                         header = NULL;
3574                                 break;
3575                         } else
3576                                 header = header->down;
3577                 } while (header != NULL);
3578                 if (header != NULL) {
3579                         /*
3580                          * We now know that there is at least one active
3581                          * rdataset at this node.
3582                          */
3583                         empty_node = ISC_FALSE;
3584
3585                         /*
3586                          * Do special zone cut handling, if requested.
3587                          */
3588                         if (maybe_zonecut &&
3589                             header->type == dns_rdatatype_ns) {
3590                                 /*
3591                                  * We increment the reference count on node to
3592                                  * ensure that search->zonecut_rdataset will
3593                                  * still be valid later.
3594                                  */
3595                                 new_reference(search.rbtdb, node);
3596                                 search.zonecut = node;
3597                                 search.zonecut_rdataset = header;
3598                                 search.zonecut_sigrdataset = NULL;
3599                                 search.need_cleanup = ISC_TRUE;
3600                                 maybe_zonecut = ISC_FALSE;
3601                                 at_zonecut = ISC_TRUE;
3602                                 /*
3603                                  * It is not clear if KEY should still be
3604                                  * allowed at the parent side of the zone
3605                                  * cut or not.  It is needed for RFC3007
3606                                  * validated updates.
3607                                  */
3608                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3609                                     && type != dns_rdatatype_nsec
3610                                     && type != dns_rdatatype_key) {
3611                                         /*
3612                                          * Glue is not OK, but any answer we
3613                                          * could return would be glue.  Return
3614                                          * the delegation.
3615                                          */
3616                                         found = NULL;
3617                                         break;
3618                                 }
3619                                 if (found != NULL && foundsig != NULL)
3620                                         break;
3621                         }
3622
3623
3624                         /*
3625                          * If the NSEC3 record doesn't match the chain
3626                          * we are using behave as if it isn't here.
3627                          */
3628                         if (header->type == dns_rdatatype_nsec3 &&
3629                            !matchparams(header, &search)) {
3630                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3631                                 goto partial_match;
3632                         }
3633                         /*
3634                          * If we found a type we were looking for,
3635                          * remember it.
3636                          */
3637                         if (header->type == type ||
3638                             type == dns_rdatatype_any ||
3639                             (header->type == dns_rdatatype_cname &&
3640                              cname_ok)) {
3641                                 /*
3642                                  * We've found the answer!
3643                                  */
3644                                 found = header;
3645                                 if (header->type == dns_rdatatype_cname &&
3646                                     cname_ok) {
3647                                         /*
3648                                          * We may be finding a CNAME instead
3649                                          * of the desired type.
3650                                          *
3651                                          * If we've already got the CNAME RRSIG,
3652                                          * use it, otherwise change sigtype
3653                                          * so that we find it.
3654                                          */
3655                                         if (cnamesig != NULL)
3656                                                 foundsig = cnamesig;
3657                                         else
3658                                                 sigtype =
3659                                                     RBTDB_RDATATYPE_SIGCNAME;
3660                                 }
3661                                 /*
3662                                  * If we've got all we need, end the search.
3663                                  */
3664                                 if (!maybe_zonecut && foundsig != NULL)
3665                                         break;
3666                         } else if (header->type == sigtype) {
3667                                 /*
3668                                  * We've found the RRSIG rdataset for our
3669                                  * target type.  Remember it.
3670                                  */
3671                                 foundsig = header;
3672                                 /*
3673                                  * If we've got all we need, end the search.
3674                                  */
3675                                 if (!maybe_zonecut && found != NULL)
3676                                         break;
3677                         } else if (header->type == dns_rdatatype_nsec &&
3678                                    !search.rbtversion->havensec3) {
3679                                 /*
3680                                  * Remember a NSEC rdataset even if we're
3681                                  * not specifically looking for it, because
3682                                  * we might need it later.
3683                                  */
3684                                 nsecheader = header;
3685                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3686                                    !search.rbtversion->havensec3) {
3687                                 /*
3688                                  * If we need the NSEC rdataset, we'll also
3689                                  * need its signature.
3690                                  */
3691                                 nsecsig = header;
3692                         } else if (cname_ok &&
3693                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3694                                 /*
3695                                  * If we get a CNAME match, we'll also need
3696                                  * its signature.
3697                                  */
3698                                 cnamesig = header;
3699                         }
3700                 }
3701         }
3702
3703         if (empty_node) {
3704                 /*
3705                  * We have an exact match for the name, but there are no
3706                  * active rdatasets in the desired version.  That means that
3707                  * this node doesn't exist in the desired version, and that
3708                  * we really have a partial match.
3709                  */
3710                 if (!wild) {
3711                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3712                         goto partial_match;
3713                 }
3714         }
3715
3716         /*
3717          * If we didn't find what we were looking for...
3718          */
3719         if (found == NULL) {
3720                 if (search.zonecut != NULL) {
3721                         /*
3722                          * We were trying to find glue at a node beneath a
3723                          * zone cut, but didn't.
3724                          *
3725                          * Return the delegation.
3726                          */
3727                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3728                         result = setup_delegation(&search, nodep, foundname,
3729                                                   rdataset, sigrdataset);
3730                         goto tree_exit;
3731                 }
3732                 /*
3733                  * The desired type doesn't exist.
3734                  */
3735                 result = DNS_R_NXRRSET;
3736                 if (search.rbtversion->secure == dns_db_secure &&
3737                     !search.rbtversion->havensec3 &&
3738                     (nsecheader == NULL || nsecsig == NULL)) {
3739                         /*
3740                          * The zone is secure but there's no NSEC,
3741                          * or the NSEC has no signature!
3742                          */
3743                         if (!wild) {
3744                                 result = DNS_R_BADDB;
3745                                 goto node_exit;
3746                         }
3747
3748                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3749                         result = find_closest_nsec(&search, nodep, foundname,
3750                                                    rdataset, sigrdataset,
3751                                                    search.rbtdb->tree,
3752                                                    search.rbtversion->secure);
3753                         if (result == ISC_R_SUCCESS)
3754                                 result = DNS_R_EMPTYWILD;
3755                         goto tree_exit;
3756                 }
3757                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3758                     nsecheader == NULL)
3759                 {
3760                         /*
3761                          * There's no NSEC record, and we were told
3762                          * to find one.
3763                          */
3764                         result = DNS_R_BADDB;
3765                         goto node_exit;
3766                 }
3767                 if (nodep != NULL) {
3768                         new_reference(search.rbtdb, node);
3769                         *nodep = node;
3770                 }
3771                 if ((search.rbtversion->secure == dns_db_secure &&
3772                      !search.rbtversion->havensec3) ||
3773                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3774                 {
3775                         bind_rdataset(search.rbtdb, node, nsecheader,
3776                                       0, rdataset);
3777                         if (nsecsig != NULL)
3778                                 bind_rdataset(search.rbtdb, node,
3779                                               nsecsig, 0, sigrdataset);
3780                 }
3781                 if (wild)
3782                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3783                 goto node_exit;
3784         }
3785
3786         /*
3787          * We found what we were looking for, or we found a CNAME.
3788          */
3789
3790         if (type != found->type &&
3791             type != dns_rdatatype_any &&
3792             found->type == dns_rdatatype_cname) {
3793                 /*
3794                  * We weren't doing an ANY query and we found a CNAME instead
3795                  * of the type we were looking for, so we need to indicate
3796                  * that result to the caller.
3797                  */
3798                 result = DNS_R_CNAME;
3799         } else if (search.zonecut != NULL) {
3800                 /*
3801                  * If we're beneath a zone cut, we must indicate that the
3802                  * result is glue, unless we're actually at the zone cut
3803                  * and the type is NSEC or KEY.
3804                  */
3805                 if (search.zonecut == node) {
3806                         /*
3807                          * It is not clear if KEY should still be
3808                          * allowed at the parent side of the zone
3809                          * cut or not.  It is needed for RFC3007
3810                          * validated updates.
3811                          */
3812                         if (type == dns_rdatatype_nsec ||
3813                             type == dns_rdatatype_nsec3 ||
3814                             type == dns_rdatatype_key)
3815                                 result = ISC_R_SUCCESS;
3816                         else if (type == dns_rdatatype_any)
3817                                 result = DNS_R_ZONECUT;
3818                         else
3819                                 result = DNS_R_GLUE;
3820                 } else
3821                         result = DNS_R_GLUE;
3822                 /*
3823                  * We might have found data that isn't glue, but was occluded
3824                  * by a dynamic update.  If the caller cares about this, they
3825                  * will have told us to validate glue.
3826                  *
3827                  * XXX We should cache the glue validity state!
3828                  */
3829                 if (result == DNS_R_GLUE &&
3830                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3831                     !valid_glue(&search, foundname, type, node)) {
3832                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3833                         result = setup_delegation(&search, nodep, foundname,
3834                                                   rdataset, sigrdataset);
3835                     goto tree_exit;
3836                 }
3837         } else {
3838                 /*
3839                  * An ordinary successful query!
3840                  */
3841                 result = ISC_R_SUCCESS;
3842         }
3843
3844         if (nodep != NULL) {
3845                 if (!at_zonecut)
3846                         new_reference(search.rbtdb, node);
3847                 else
3848                         search.need_cleanup = ISC_FALSE;
3849                 *nodep = node;
3850         }
3851
3852         if (type != dns_rdatatype_any) {
3853                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3854                 if (foundsig != NULL)
3855                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3856                                       sigrdataset);
3857         }
3858
3859         if (wild)
3860                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3861
3862  node_exit:
3863         NODE_UNLOCK(lock, isc_rwlocktype_read);
3864
3865  tree_exit:
3866         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3867
3868         /*
3869          * If we found a zonecut but aren't going to use it, we have to
3870          * let go of it.
3871          */
3872         if (search.need_cleanup) {
3873                 node = search.zonecut;
3874                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3875
3876                 NODE_LOCK(lock, isc_rwlocktype_read);
3877                 decrement_reference(search.rbtdb, node, 0,
3878                                     isc_rwlocktype_read, isc_rwlocktype_none,
3879                                     ISC_FALSE);
3880                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3881         }
3882
3883         if (close_version)
3884                 closeversion(db, &version, ISC_FALSE);
3885
3886         dns_rbtnodechain_reset(&search.chain);
3887
3888         return (result);
3889 }
3890
3891 static isc_result_t
3892 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3893                  isc_stdtime_t now, dns_dbnode_t **nodep,
3894                  dns_name_t *foundname,
3895                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3896 {
3897         UNUSED(db);
3898         UNUSED(name);
3899         UNUSED(options);
3900         UNUSED(now);
3901         UNUSED(nodep);
3902         UNUSED(foundname);
3903         UNUSED(rdataset);
3904         UNUSED(sigrdataset);
3905
3906         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3907
3908         return (ISC_R_NOTIMPLEMENTED);
3909 }
3910
3911 static isc_result_t
3912 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3913         rbtdb_search_t *search = arg;
3914         rdatasetheader_t *header, *header_prev, *header_next;
3915         rdatasetheader_t *dname_header, *sigdname_header;
3916         isc_result_t result;
3917         nodelock_t *lock;
3918         isc_rwlocktype_t locktype;
3919
3920         /* XXX comment */
3921
3922         REQUIRE(search->zonecut == NULL);
3923
3924         /*
3925          * Keep compiler silent.
3926          */
3927         UNUSED(name);
3928
3929         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3930         locktype = isc_rwlocktype_read;
3931         NODE_LOCK(lock, locktype);
3932
3933         /*
3934          * Look for a DNAME or RRSIG DNAME rdataset.
3935          */
3936         dname_header = NULL;
3937         sigdname_header = NULL;
3938         header_prev = NULL;
3939         for (header = node->data; header != NULL; header = header_next) {
3940                 header_next = header->next;
3941                 if (header->rdh_ttl <= search->now) {
3942                         /*
3943                          * This rdataset is stale.  If no one else is
3944                          * using the node, we can clean it up right
3945                          * now, otherwise we mark it as stale, and
3946                          * the node as dirty, so it will get cleaned
3947                          * up later.
3948                          */
3949                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3950                             (locktype == isc_rwlocktype_write ||
3951                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3952                                 /*
3953                                  * We update the node's status only when we
3954                                  * can get write access; otherwise, we leave
3955                                  * others to this work.  Periodical cleaning
3956                                  * will eventually take the job as the last
3957                                  * resort.
3958                                  * We won't downgrade the lock, since other
3959                                  * rdatasets are probably stale, too.
3960                                  */
3961                                 locktype = isc_rwlocktype_write;
3962
3963                                 if (dns_rbtnode_refcurrent(node) == 0) {
3964                                         isc_mem_t *mctx;
3965
3966                                         /*
3967                                          * header->down can be non-NULL if the
3968                                          * refcount has just decremented to 0
3969                                          * but decrement_reference() has not
3970                                          * performed clean_cache_node(), in
3971                                          * which case we need to purge the
3972                                          * stale headers first.
3973                                          */
3974                                         mctx = search->rbtdb->common.mctx;
3975                                         clean_stale_headers(search->rbtdb,
3976                                                             mctx,
3977                                                             header);
3978                                         if (header_prev != NULL)
3979                                                 header_prev->next =
3980                                                         header->next;
3981                                         else
3982                                                 node->data = header->next;
3983                                         free_rdataset(search->rbtdb, mctx,
3984                                                       header);
3985                                 } else {
3986                                         header->attributes |=
3987                                                 RDATASET_ATTR_STALE;
3988                                         node->dirty = 1;
3989                                         header_prev = header;
3990                                 }
3991                         } else
3992                                 header_prev = header;
3993                 } else if (header->type == dns_rdatatype_dname &&
3994                            EXISTS(header)) {
3995                         dname_header = header;
3996                         header_prev = header;
3997                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3998                          EXISTS(header)) {
3999                         sigdname_header = header;
4000                         header_prev = header;
4001                 } else
4002                         header_prev = header;
4003         }
4004
4005         if (dname_header != NULL &&
4006             (!DNS_TRUST_PENDING(dname_header->trust) ||
4007              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4008                 /*
4009                  * We increment the reference count on node to ensure that
4010                  * search->zonecut_rdataset will still be valid later.
4011                  */
4012                 new_reference(search->rbtdb, node);
4013                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4014                 search->zonecut = node;
4015                 search->zonecut_rdataset = dname_header;
4016                 search->zonecut_sigrdataset = sigdname_header;
4017                 search->need_cleanup = ISC_TRUE;
4018                 result = DNS_R_PARTIALMATCH;
4019         } else
4020                 result = DNS_R_CONTINUE;
4021
4022         NODE_UNLOCK(lock, locktype);
4023
4024         return (result);
4025 }
4026
4027 static inline isc_result_t
4028 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4029                      dns_dbnode_t **nodep, dns_name_t *foundname,
4030                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4031 {
4032         unsigned int i;
4033         dns_rbtnode_t *level_node;
4034         rdatasetheader_t *header, *header_prev, *header_next;
4035         rdatasetheader_t *found, *foundsig;
4036         isc_result_t result = ISC_R_NOTFOUND;
4037         dns_name_t name;
4038         dns_rbtdb_t *rbtdb;
4039         isc_boolean_t done;
4040         nodelock_t *lock;
4041         isc_rwlocktype_t locktype;
4042
4043         /*
4044          * Caller must be holding the tree lock.
4045          */
4046
4047         rbtdb = search->rbtdb;
4048         i = search->chain.level_matches;
4049         done = ISC_FALSE;
4050         do {
4051                 locktype = isc_rwlocktype_read;
4052                 lock = &rbtdb->node_locks[node->locknum].lock;
4053                 NODE_LOCK(lock, locktype);
4054
4055                 /*
4056                  * Look for NS and RRSIG NS rdatasets.
4057                  */
4058                 found = NULL;
4059                 foundsig = NULL;
4060                 header_prev = NULL;
4061                 for (header = node->data;
4062                      header != NULL;
4063                      header = header_next) {
4064                         header_next = header->next;
4065                         if (header->rdh_ttl <= search->now) {
4066                                 /*
4067                                  * This rdataset is stale.  If no one else is
4068                                  * using the node, we can clean it up right
4069                                  * now, otherwise we mark it as stale, and
4070                                  * the node as dirty, so it will get cleaned
4071                                  * up later.
4072                                  */
4073                                 if ((header->rdh_ttl <= search->now -
4074                                                     RBTDB_VIRTUAL) &&
4075                                     (locktype == isc_rwlocktype_write ||
4076                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4077                                         /*
4078                                          * We update the node's status only
4079                                          * when we can get write access.
4080                                          */
4081                                         locktype = isc_rwlocktype_write;
4082
4083                                         if (dns_rbtnode_refcurrent(node)
4084                                             == 0) {
4085                                                 isc_mem_t *m;
4086
4087                                                 m = search->rbtdb->common.mctx;
4088                                                 clean_stale_headers(
4089                                                         search->rbtdb,
4090                                                         m, header);
4091                                                 if (header_prev != NULL)
4092                                                         header_prev->next =
4093                                                                 header->next;
4094                                                 else
4095                                                         node->data =
4096                                                                 header->next;
4097                                                 free_rdataset(rbtdb, m,
4098                                                               header);
4099                                         } else {
4100                                                 header->attributes |=
4101                                                         RDATASET_ATTR_STALE;
4102                                                 node->dirty = 1;
4103                                                 header_prev = header;
4104                                         }
4105                                 } else
4106                                         header_prev = header;
4107                         } else if (EXISTS(header)) {
4108                                 /*
4109                                  * We've found an extant rdataset.  See if
4110                                  * we're interested in it.
4111                                  */
4112                                 if (header->type == dns_rdatatype_ns) {
4113                                         found = header;
4114                                         if (foundsig != NULL)
4115                                                 break;
4116                                 } else if (header->type ==
4117                                            RBTDB_RDATATYPE_SIGNS) {
4118                                         foundsig = header;
4119                                         if (found != NULL)
4120                                                 break;
4121                                 }
4122                                 header_prev = header;
4123                         } else
4124                                 header_prev = header;
4125                 }
4126
4127                 if (found != NULL) {
4128                         /*
4129                          * If we have to set foundname, we do it before
4130                          * anything else.  If we were to set foundname after
4131                          * we had set nodep or bound the rdataset, then we'd
4132                          * have to undo that work if dns_name_concatenate()
4133                          * failed.  By setting foundname first, there's
4134                          * nothing to undo if we have trouble.
4135                          */
4136                         if (foundname != NULL) {
4137                                 dns_name_init(&name, NULL);
4138                                 dns_rbt_namefromnode(node, &name);
4139                                 result = dns_name_copy(&name, foundname, NULL);
4140                                 while (result == ISC_R_SUCCESS && i > 0) {
4141                                         i--;
4142                                         level_node = search->chain.levels[i];
4143                                         dns_name_init(&name, NULL);
4144                                         dns_rbt_namefromnode(level_node,
4145                                                              &name);
4146                                         result =
4147                                                 dns_name_concatenate(foundname,
4148                                                                      &name,
4149                                                                      foundname,
4150                                                                      NULL);
4151                                 }
4152                                 if (result != ISC_R_SUCCESS) {
4153                                         *nodep = NULL;
4154                                         goto node_exit;
4155                                 }
4156                         }
4157                         result = DNS_R_DELEGATION;
4158                         if (nodep != NULL) {
4159                                 new_reference(search->rbtdb, node);
4160                                 *nodep = node;
4161                         }
4162                         bind_rdataset(search->rbtdb, node, found, search->now,
4163                                       rdataset);
4164                         if (foundsig != NULL)
4165                                 bind_rdataset(search->rbtdb, node, foundsig,
4166                                               search->now, sigrdataset);
4167                         if (need_headerupdate(found, search->now) ||
4168                             (foundsig != NULL &&
4169                              need_headerupdate(foundsig, search->now))) {
4170                                 if (locktype != isc_rwlocktype_write) {
4171                                         NODE_UNLOCK(lock, locktype);
4172                                         NODE_LOCK(lock, isc_rwlocktype_write);
4173                                         locktype = isc_rwlocktype_write;
4174                                 }
4175                                 if (need_headerupdate(found, search->now))
4176                                         update_header(search->rbtdb, found,
4177                                                       search->now);
4178                                 if (foundsig != NULL &&
4179                                     need_headerupdate(foundsig, search->now)) {
4180                                         update_header(search->rbtdb, foundsig,
4181                                                       search->now);
4182                                 }
4183                         }
4184                 }
4185
4186         node_exit:
4187                 NODE_UNLOCK(lock, locktype);
4188
4189                 if (found == NULL && i > 0) {
4190                         i--;
4191                         node = search->chain.levels[i];
4192                 } else
4193                         done = ISC_TRUE;
4194
4195         } while (!done);
4196
4197         return (result);
4198 }
4199
4200 static isc_result_t
4201 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4202                   isc_stdtime_t now, dns_name_t *foundname,
4203                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4204 {
4205         dns_rbtnode_t *node;
4206         rdatasetheader_t *header, *header_next, *header_prev;
4207         rdatasetheader_t *found, *foundsig;
4208         isc_boolean_t empty_node;
4209         isc_result_t result;
4210         dns_fixedname_t fname, forigin;
4211         dns_name_t *name, *origin;
4212         rbtdb_rdatatype_t matchtype, sigmatchtype;
4213         nodelock_t *lock;
4214         isc_rwlocktype_t locktype;
4215
4216         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4217         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4218                                              dns_rdatatype_nsec);
4219
4220         do {
4221                 node = NULL;
4222                 dns_fixedname_init(&fname);
4223                 name = dns_fixedname_name(&fname);
4224                 dns_fixedname_init(&forigin);
4225                 origin = dns_fixedname_name(&forigin);
4226                 result = dns_rbtnodechain_current(&search->chain, name,
4227                                                   origin, &node);
4228                 if (result != ISC_R_SUCCESS)
4229                         return (result);
4230                 locktype = isc_rwlocktype_read;
4231                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4232                 NODE_LOCK(lock, locktype);
4233                 found = NULL;
4234                 foundsig = NULL;
4235                 empty_node = ISC_TRUE;
4236                 header_prev = NULL;
4237                 for (header = node->data;
4238                      header != NULL;
4239                      header = header_next) {
4240                         header_next = header->next;
4241                         if (header->rdh_ttl <= now) {
4242                                 /*
4243                                  * This rdataset is stale.  If no one else is
4244                                  * using the node, we can clean it up right
4245                                  * now, otherwise we mark it as stale, and the
4246                                  * node as dirty, so it will get cleaned up
4247                                  * later.
4248                                  */
4249                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4250                                     (locktype == isc_rwlocktype_write ||
4251                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4252                                         /*
4253                                          * We update the node's status only
4254                                          * when we can get write access.
4255                                          */
4256                                         locktype = isc_rwlocktype_write;
4257
4258                                         if (dns_rbtnode_refcurrent(node)
4259                                             == 0) {
4260                                                 isc_mem_t *m;
4261
4262                                                 m = search->rbtdb->common.mctx;
4263                                                 clean_stale_headers(
4264                                                         search->rbtdb,
4265                                                         m, header);
4266                                                 if (header_prev != NULL)
4267                                                         header_prev->next =
4268                                                                 header->next;
4269                                                 else
4270                                                         node->data = header->next;
4271                                                 free_rdataset(search->rbtdb, m,
4272                                                               header);
4273                                         } else {
4274                                                 header->attributes |=
4275                                                         RDATASET_ATTR_STALE;
4276                                                 node->dirty = 1;
4277                                                 header_prev = header;
4278                                         }
4279                                 } else
4280                                         header_prev = header;
4281                                 continue;
4282                         }
4283                         if (NONEXISTENT(header) ||
4284                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4285                                 header_prev = header;
4286                                 continue;
4287                         }
4288                         empty_node = ISC_FALSE;
4289                         if (header->type == matchtype)
4290                                 found = header;
4291                         else if (header->type == sigmatchtype)
4292                                 foundsig = header;
4293                         header_prev = header;
4294                 }
4295                 if (found != NULL) {
4296                         result = dns_name_concatenate(name, origin,
4297                                                       foundname, NULL);
4298                         if (result != ISC_R_SUCCESS)
4299                                 goto unlock_node;
4300                         bind_rdataset(search->rbtdb, node, found,
4301                                       now, rdataset);
4302                         if (foundsig != NULL)
4303                                 bind_rdataset(search->rbtdb, node, foundsig,
4304                                               now, sigrdataset);
4305                         new_reference(search->rbtdb, node);
4306                         *nodep = node;
4307                         result = DNS_R_COVERINGNSEC;
4308                 } else if (!empty_node) {
4309                         result = ISC_R_NOTFOUND;
4310                 } else
4311                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4312                                                        NULL);
4313  unlock_node:
4314                 NODE_UNLOCK(lock, locktype);
4315         } while (empty_node && result == ISC_R_SUCCESS);
4316         return (result);
4317 }
4318
4319 static isc_result_t
4320 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4321            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4322            dns_dbnode_t **nodep, dns_name_t *foundname,
4323            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4324 {
4325         dns_rbtnode_t *node = NULL;
4326         isc_result_t result;
4327         rbtdb_search_t search;
4328         isc_boolean_t cname_ok = ISC_TRUE;
4329         isc_boolean_t empty_node;
4330         nodelock_t *lock;
4331         isc_rwlocktype_t locktype;
4332         rdatasetheader_t *header, *header_prev, *header_next;
4333         rdatasetheader_t *found, *nsheader;
4334         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4335         rdatasetheader_t *update, *updatesig;
4336         rbtdb_rdatatype_t sigtype, negtype;
4337
4338         UNUSED(version);
4339
4340         search.rbtdb = (dns_rbtdb_t *)db;
4341
4342         REQUIRE(VALID_RBTDB(search.rbtdb));
4343         REQUIRE(version == NULL);
4344
4345         if (now == 0)
4346                 isc_stdtime_get(&now);
4347
4348         search.rbtversion = NULL;
4349         search.serial = 1;
4350         search.options = options;
4351         search.copy_name = ISC_FALSE;
4352         search.need_cleanup = ISC_FALSE;
4353         search.wild = ISC_FALSE;
4354         search.zonecut = NULL;
4355         dns_fixedname_init(&search.zonecut_name);
4356         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4357         search.now = now;
4358         update = NULL;
4359         updatesig = NULL;
4360
4361         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4362
4363         /*
4364          * Search down from the root of the tree.  If, while going down, we
4365          * encounter a callback node, cache_zonecut_callback() will search the
4366          * rdatasets at the zone cut for a DNAME rdataset.
4367          */
4368         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4369                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4370                                   cache_zonecut_callback, &search);
4371
4372         if (result == DNS_R_PARTIALMATCH) {
4373                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4374                         result = find_coveringnsec(&search, nodep, now,
4375                                                    foundname, rdataset,
4376                                                    sigrdataset);
4377                         if (result == DNS_R_COVERINGNSEC)
4378                                 goto tree_exit;
4379                 }
4380                 if (search.zonecut != NULL) {
4381                     result = setup_delegation(&search, nodep, foundname,
4382                                               rdataset, sigrdataset);
4383                     goto tree_exit;
4384                 } else {
4385                 find_ns:
4386                         result = find_deepest_zonecut(&search, node, nodep,
4387                                                       foundname, rdataset,
4388                                                       sigrdataset);
4389                         goto tree_exit;
4390                 }
4391         } else if (result != ISC_R_SUCCESS)
4392                 goto tree_exit;
4393
4394         /*
4395          * Certain DNSSEC types are not subject to CNAME matching
4396          * (RFC4035, section 2.5 and RFC3007).
4397          *
4398          * We don't check for RRSIG, because we don't store RRSIG records
4399          * directly.
4400          */
4401         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4402                 cname_ok = ISC_FALSE;
4403
4404         /*
4405          * We now go looking for rdata...
4406          */
4407
4408         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4409         locktype = isc_rwlocktype_read;
4410         NODE_LOCK(lock, locktype);
4411
4412         found = NULL;
4413         foundsig = NULL;
4414         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4415         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4416         nsheader = NULL;
4417         nssig = NULL;
4418         cnamesig = NULL;
4419         empty_node = ISC_TRUE;
4420         header_prev = NULL;
4421         for (header = node->data; header != NULL; header = header_next) {
4422                 header_next = header->next;
4423                 if (header->rdh_ttl <= now) {
4424                         /*
4425                          * This rdataset is stale.  If no one else is using the
4426                          * node, we can clean it up right now, otherwise we
4427                          * mark it as stale, and the node as dirty, so it will
4428                          * get cleaned up later.
4429                          */
4430                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4431                             (locktype == isc_rwlocktype_write ||
4432                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4433                                 /*
4434                                  * We update the node's status only when we
4435                                  * can get write access.
4436                                  */
4437                                 locktype = isc_rwlocktype_write;
4438
4439                                 if (dns_rbtnode_refcurrent(node) == 0) {
4440                                         isc_mem_t *mctx;
4441
4442                                         mctx = search.rbtdb->common.mctx;
4443                                         clean_stale_headers(search.rbtdb, mctx,
4444                                                             header);
4445                                         if (header_prev != NULL)
4446                                                 header_prev->next =
4447                                                         header->next;
4448                                         else
4449                                                 node->data = header->next;
4450                                         free_rdataset(search.rbtdb, mctx,
4451                                                       header);
4452                                 } else {
4453                                         header->attributes |=
4454                                                 RDATASET_ATTR_STALE;
4455                                         node->dirty = 1;
4456                                         header_prev = header;
4457                                 }
4458                         } else
4459                                 header_prev = header;
4460                 } else if (EXISTS(header)) {
4461                         /*
4462                          * We now know that there is at least one active
4463                          * non-stale rdataset at this node.
4464                          */
4465                         empty_node = ISC_FALSE;
4466
4467                         /*
4468                          * If we found a type we were looking for, remember
4469                          * it.
4470                          */
4471                         if (header->type == type ||
4472                             (type == dns_rdatatype_any &&
4473                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4474                             (cname_ok && header->type ==
4475                              dns_rdatatype_cname)) {
4476                                 /*
4477                                  * We've found the answer.
4478                                  */
4479                                 found = header;
4480                                 if (header->type == dns_rdatatype_cname &&
4481                                     cname_ok &&
4482                                     cnamesig != NULL) {
4483                                         /*
4484                                          * If we've already got the CNAME RRSIG,
4485                                          * use it, otherwise change sigtype
4486                                          * so that we find it.
4487                                          */
4488                                         if (cnamesig != NULL)
4489                                                 foundsig = cnamesig;
4490                                         else
4491                                                 sigtype =
4492                                                     RBTDB_RDATATYPE_SIGCNAME;
4493                                         foundsig = cnamesig;
4494                                 }
4495                         } else if (header->type == sigtype) {
4496                                 /*
4497                                  * We've found the RRSIG rdataset for our
4498                                  * target type.  Remember it.
4499                                  */
4500                                 foundsig = header;
4501                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4502                                    header->type == negtype) {
4503                                 /*
4504                                  * We've found a negative cache entry.
4505                                  */
4506                                 found = header;
4507                         } else if (header->type == dns_rdatatype_ns) {
4508                                 /*
4509                                  * Remember a NS rdataset even if we're
4510                                  * not specifically looking for it, because
4511                                  * we might need it later.
4512                                  */
4513                                 nsheader = header;
4514                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4515                                 /*
4516                                  * If we need the NS rdataset, we'll also
4517                                  * need its signature.
4518                                  */
4519                                 nssig = header;
4520                         } else if (cname_ok &&
4521                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4522                                 /*
4523                                  * If we get a CNAME match, we'll also need
4524                                  * its signature.
4525                                  */
4526                                 cnamesig = header;
4527                         }
4528                         header_prev = header;
4529                 } else
4530                         header_prev = header;
4531         }
4532
4533         if (empty_node) {
4534                 /*
4535                  * We have an exact match for the name, but there are no
4536                  * extant rdatasets.  That means that this node doesn't
4537                  * meaningfully exist, and that we really have a partial match.
4538                  */
4539                 NODE_UNLOCK(lock, locktype);
4540                 goto find_ns;
4541         }
4542
4543         /*
4544          * If we didn't find what we were looking for...
4545          */
4546         if (found == NULL ||
4547             (DNS_TRUST_ADDITIONAL(found->trust) &&
4548              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4549             (found->trust == dns_trust_glue &&
4550              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4551             (DNS_TRUST_PENDING(found->trust) &&
4552              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4553                 /*
4554                  * If there is an NS rdataset at this node, then this is the
4555                  * deepest zone cut.
4556                  */
4557                 if (nsheader != NULL) {
4558                         if (nodep != NULL) {
4559                                 new_reference(search.rbtdb, node);
4560                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4561                                 *nodep = node;
4562                         }
4563                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4564                                       rdataset);
4565                         if (need_headerupdate(nsheader, search.now))
4566                                 update = nsheader;
4567                         if (nssig != NULL) {
4568                                 bind_rdataset(search.rbtdb, node, nssig,
4569                                               search.now, sigrdataset);
4570                                 if (need_headerupdate(nssig, search.now))
4571                                         updatesig = nssig;
4572                         }
4573                         result = DNS_R_DELEGATION;
4574                         goto node_exit;
4575                 }
4576
4577                 /*
4578                  * Go find the deepest zone cut.
4579                  */
4580                 NODE_UNLOCK(lock, locktype);
4581                 goto find_ns;
4582         }
4583
4584         /*
4585          * We found what we were looking for, or we found a CNAME.
4586          */
4587
4588         if (nodep != NULL) {
4589                 new_reference(search.rbtdb, node);
4590                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4591                 *nodep = node;
4592         }
4593
4594         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4595                 /*
4596                  * We found a negative cache entry.
4597                  */
4598                 if (NXDOMAIN(found))
4599                         result = DNS_R_NCACHENXDOMAIN;
4600                 else
4601                         result = DNS_R_NCACHENXRRSET;
4602         } else if (type != found->type &&
4603                    type != dns_rdatatype_any &&
4604                    found->type == dns_rdatatype_cname) {
4605                 /*
4606                  * We weren't doing an ANY query and we found a CNAME instead
4607                  * of the type we were looking for, so we need to indicate
4608                  * that result to the caller.
4609                  */
4610                 result = DNS_R_CNAME;
4611         } else {
4612                 /*
4613                  * An ordinary successful query!
4614                  */
4615                 result = ISC_R_SUCCESS;
4616         }
4617
4618         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4619             result == DNS_R_NCACHENXRRSET) {
4620                 bind_rdataset(search.rbtdb, node, found, search.now,
4621                               rdataset);
4622                 if (need_headerupdate(found, search.now))
4623                         update = found;
4624                 if (!NEGATIVE(found) && foundsig != NULL) {
4625                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4626                                       sigrdataset);
4627                         if (need_headerupdate(foundsig, search.now))
4628                                 updatesig = foundsig;
4629                 }
4630         }
4631
4632  node_exit:
4633         if ((update != NULL || updatesig != NULL) &&
4634             locktype != isc_rwlocktype_write) {
4635                 NODE_UNLOCK(lock, locktype);
4636                 NODE_LOCK(lock, isc_rwlocktype_write);
4637                 locktype = isc_rwlocktype_write;
4638         }
4639         if (update != NULL && need_headerupdate(update, search.now))
4640                 update_header(search.rbtdb, update, search.now);
4641         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4642                 update_header(search.rbtdb, updatesig, search.now);
4643
4644         NODE_UNLOCK(lock, locktype);
4645
4646  tree_exit:
4647         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4648
4649         /*
4650          * If we found a zonecut but aren't going to use it, we have to
4651          * let go of it.
4652          */
4653         if (search.need_cleanup) {
4654                 node = search.zonecut;
4655                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4656
4657                 NODE_LOCK(lock, isc_rwlocktype_read);
4658                 decrement_reference(search.rbtdb, node, 0,
4659                                     isc_rwlocktype_read, isc_rwlocktype_none,
4660                                     ISC_FALSE);
4661                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4662         }
4663
4664         dns_rbtnodechain_reset(&search.chain);
4665
4666         return (result);
4667 }
4668
4669 static isc_result_t
4670 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4671                   isc_stdtime_t now, dns_dbnode_t **nodep,
4672                   dns_name_t *foundname,
4673                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4674 {
4675         dns_rbtnode_t *node = NULL;
4676         nodelock_t *lock;
4677         isc_result_t result;
4678         rbtdb_search_t search;
4679         rdatasetheader_t *header, *header_prev, *header_next;
4680         rdatasetheader_t *found, *foundsig;
4681         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4682         isc_rwlocktype_t locktype;
4683
4684         search.rbtdb = (dns_rbtdb_t *)db;
4685
4686         REQUIRE(VALID_RBTDB(search.rbtdb));
4687
4688         if (now == 0)
4689                 isc_stdtime_get(&now);
4690
4691         search.rbtversion = NULL;
4692         search.serial = 1;
4693         search.options = options;
4694         search.copy_name = ISC_FALSE;
4695         search.need_cleanup = ISC_FALSE;
4696         search.wild = ISC_FALSE;
4697         search.zonecut = NULL;
4698         dns_fixedname_init(&search.zonecut_name);
4699         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4700         search.now = now;
4701
4702         if ((options & DNS_DBFIND_NOEXACT) != 0)
4703                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4704
4705         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4706
4707         /*
4708          * Search down from the root of the tree.
4709          */
4710         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4711                                   &search.chain, rbtoptions, NULL, &search);
4712
4713         if (result == DNS_R_PARTIALMATCH) {
4714         find_ns:
4715                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4716                                               rdataset, sigrdataset);
4717                 goto tree_exit;
4718         } else if (result != ISC_R_SUCCESS)
4719                 goto tree_exit;
4720
4721         /*
4722          * We now go looking for an NS rdataset at the node.
4723          */
4724
4725         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4726         locktype = isc_rwlocktype_read;
4727         NODE_LOCK(lock, locktype);
4728
4729         found = NULL;
4730         foundsig = NULL;
4731         header_prev = NULL;
4732         for (header = node->data; header != NULL; header = header_next) {
4733                 header_next = header->next;
4734                 if (header->rdh_ttl <= now) {
4735                         /*
4736                          * This rdataset is stale.  If no one else is using the
4737                          * node, we can clean it up right now, otherwise we
4738                          * mark it as stale, and the node as dirty, so it will
4739                          * get cleaned up later.
4740                          */
4741                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4742                             (locktype == isc_rwlocktype_write ||
4743                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4744                                 /*
4745                                  * We update the node's status only when we
4746                                  * can get write access.
4747                                  */
4748                                 locktype = isc_rwlocktype_write;
4749
4750                                 if (dns_rbtnode_refcurrent(node) == 0) {
4751                                         isc_mem_t *mctx;
4752
4753                                         mctx = search.rbtdb->common.mctx;
4754                                         clean_stale_headers(search.rbtdb, mctx,
4755                                                             header);
4756                                         if (header_prev != NULL)
4757                                                 header_prev->next =
4758                                                         header->next;
4759                                         else
4760                                                 node->data = header->next;
4761                                         free_rdataset(search.rbtdb, mctx,
4762                                                       header);
4763                                 } else {
4764                                         header->attributes |=
4765                                                 RDATASET_ATTR_STALE;
4766                                         node->dirty = 1;
4767                                         header_prev = header;
4768                                 }
4769                         } else
4770                                 header_prev = header;
4771                 } else if (EXISTS(header)) {
4772                         /*
4773                          * If we found a type we were looking for, remember
4774                          * it.
4775                          */
4776                         if (header->type == dns_rdatatype_ns) {
4777                                 /*
4778                                  * Remember a NS rdataset even if we're
4779                                  * not specifically looking for it, because
4780                                  * we might need it later.
4781                                  */
4782                                 found = header;
4783                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4784                                 /*
4785                                  * If we need the NS rdataset, we'll also
4786                                  * need its signature.
4787                                  */
4788                                 foundsig = header;
4789                         }
4790                         header_prev = header;
4791                 } else
4792                         header_prev = header;
4793         }
4794
4795         if (found == NULL) {
4796                 /*
4797                  * No NS records here.
4798                  */
4799                 NODE_UNLOCK(lock, locktype);
4800                 goto find_ns;
4801         }
4802
4803         if (nodep != NULL) {
4804                 new_reference(search.rbtdb, node);
4805                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4806                 *nodep = node;
4807         }
4808
4809         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4810         if (foundsig != NULL)
4811                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4812                               sigrdataset);
4813
4814         if (need_headerupdate(found, search.now) ||
4815             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4816                 if (locktype != isc_rwlocktype_write) {
4817                         NODE_UNLOCK(lock, locktype);
4818                         NODE_LOCK(lock, isc_rwlocktype_write);
4819                         locktype = isc_rwlocktype_write;
4820                 }
4821                 if (need_headerupdate(found, search.now))
4822                         update_header(search.rbtdb, found, search.now);
4823                 if (foundsig != NULL &&
4824                     need_headerupdate(foundsig, search.now)) {
4825                         update_header(search.rbtdb, foundsig, search.now);
4826                 }
4827         }
4828
4829         NODE_UNLOCK(lock, locktype);
4830
4831  tree_exit:
4832         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4833
4834         INSIST(!search.need_cleanup);
4835
4836         dns_rbtnodechain_reset(&search.chain);
4837
4838         if (result == DNS_R_DELEGATION)
4839                 result = ISC_R_SUCCESS;
4840
4841         return (result);
4842 }
4843
4844 static void
4845 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4846         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4847         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4848         unsigned int refs;
4849
4850         REQUIRE(VALID_RBTDB(rbtdb));
4851         REQUIRE(targetp != NULL && *targetp == NULL);
4852
4853         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4854         dns_rbtnode_refincrement(node, &refs);
4855         INSIST(refs != 0);
4856         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4857
4858         *targetp = source;
4859 }
4860
4861 static void
4862 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4863         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4864         dns_rbtnode_t *node;
4865         isc_boolean_t want_free = ISC_FALSE;
4866         isc_boolean_t inactive = ISC_FALSE;
4867         rbtdb_nodelock_t *nodelock;
4868
4869         REQUIRE(VALID_RBTDB(rbtdb));
4870         REQUIRE(targetp != NULL && *targetp != NULL);
4871
4872         node = (dns_rbtnode_t *)(*targetp);
4873         nodelock = &rbtdb->node_locks[node->locknum];
4874
4875         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4876
4877         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4878                                 isc_rwlocktype_none, ISC_FALSE)) {
4879                 if (isc_refcount_current(&nodelock->references) == 0 &&
4880                     nodelock->exiting) {
4881                         inactive = ISC_TRUE;
4882                 }
4883         }
4884
4885         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4886
4887         *targetp = NULL;
4888
4889         if (inactive) {
4890                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4891                 rbtdb->active--;
4892                 if (rbtdb->active == 0)
4893                         want_free = ISC_TRUE;
4894                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4895                 if (want_free) {
4896                         char buf[DNS_NAME_FORMATSIZE];
4897                         if (dns_name_dynamic(&rbtdb->common.origin))
4898                                 dns_name_format(&rbtdb->common.origin, buf,
4899                                                 sizeof(buf));
4900                         else
4901                                 strcpy(buf, "<UNKNOWN>");
4902                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4903                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4904                                       "calling free_rbtdb(%s)", buf);
4905                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4906                 }
4907         }
4908 }
4909
4910 static isc_result_t
4911 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4912         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4913         dns_rbtnode_t *rbtnode = node;
4914         rdatasetheader_t *header;
4915         isc_boolean_t force_expire = ISC_FALSE;
4916         /*
4917          * These are the category and module used by the cache cleaner.
4918          */
4919         isc_boolean_t log = ISC_FALSE;
4920         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4921         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4922         int level = ISC_LOG_DEBUG(2);
4923         char printname[DNS_NAME_FORMATSIZE];
4924
4925         REQUIRE(VALID_RBTDB(rbtdb));
4926
4927         /*
4928          * Caller must hold a tree lock.
4929          */
4930
4931         if (now == 0)
4932                 isc_stdtime_get(&now);
4933
4934         if (rbtdb->overmem) {
4935                 isc_uint32_t val;
4936
4937                 isc_random_get(&val);
4938                 /*
4939                  * XXXDCL Could stand to have a better policy, like LRU.
4940                  */
4941                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4942
4943                 /*
4944                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
4945                  * rbtdb->overmem can currently only be true for cache
4946                  * databases -- hence all of the "overmem cache" log strings.
4947                  */
4948                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4949                 if (log)
4950                         isc_log_write(dns_lctx, category, module, level,
4951                                       "overmem cache: %s %s",
4952                                       force_expire ? "FORCE" : "check",
4953                                       dns_rbt_formatnodename(rbtnode,
4954                                                            printname,
4955                                                            sizeof(printname)));
4956         }
4957
4958         /*
4959          * We may not need write access, but this code path is not performance
4960          * sensitive, so it should be okay to always lock as a writer.
4961          */
4962         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4963                   isc_rwlocktype_write);
4964
4965         for (header = rbtnode->data; header != NULL; header = header->next)
4966                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4967                         /*
4968                          * We don't check if refcurrent(rbtnode) == 0 and try
4969                          * to free like we do in cache_find(), because
4970                          * refcurrent(rbtnode) must be non-zero.  This is so
4971                          * because 'node' is an argument to the function.
4972                          */
4973                         header->attributes |= RDATASET_ATTR_STALE;
4974                         rbtnode->dirty = 1;
4975                         if (log)
4976                                 isc_log_write(dns_lctx, category, module,
4977                                               level, "overmem cache: stale %s",
4978                                               printname);
4979                 } else if (force_expire) {
4980                         if (! RETAIN(header)) {
4981                                 set_ttl(rbtdb, header, 0);
4982                                 header->attributes |= RDATASET_ATTR_STALE;
4983                                 rbtnode->dirty = 1;
4984                         } else if (log) {
4985                                 isc_log_write(dns_lctx, category, module,
4986                                               level, "overmem cache: "
4987                                               "reprieve by RETAIN() %s",
4988                                               printname);
4989                         }
4990                 } else if (rbtdb->overmem && log)
4991                         isc_log_write(dns_lctx, category, module, level,
4992                                       "overmem cache: saved %s", printname);
4993
4994         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4995                     isc_rwlocktype_write);
4996
4997         return (ISC_R_SUCCESS);
4998 }
4999
5000 static void
5001 overmem(dns_db_t *db, isc_boolean_t overmem) {
5002         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5003
5004         if (IS_CACHE(rbtdb))
5005                 rbtdb->overmem = overmem;
5006 }
5007
5008 static void
5009 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5010         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5011         dns_rbtnode_t *rbtnode = node;
5012         isc_boolean_t first;
5013
5014         REQUIRE(VALID_RBTDB(rbtdb));
5015
5016         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5017                   isc_rwlocktype_read);
5018
5019         fprintf(out, "node %p, %u references, locknum = %u\n",
5020                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5021                 rbtnode->locknum);
5022         if (rbtnode->data != NULL) {
5023                 rdatasetheader_t *current, *top_next;
5024
5025                 for (current = rbtnode->data; current != NULL;
5026                      current = top_next) {
5027                         top_next = current->next;
5028                         first = ISC_TRUE;
5029                         fprintf(out, "\ttype %u", current->type);
5030                         do {
5031                                 if (!first)
5032                                         fprintf(out, "\t");
5033                                 first = ISC_FALSE;
5034                                 fprintf(out,
5035                                         "\tserial = %lu, ttl = %u, "
5036                                         "trust = %u, attributes = %u, "
5037                                         "resign = %u\n",
5038                                         (unsigned long)current->serial,
5039                                         current->rdh_ttl,
5040                                         current->trust,
5041                                         current->attributes,
5042                                         current->resign);
5043                                 current = current->down;
5044                         } while (current != NULL);
5045                 }
5046         } else
5047                 fprintf(out, "(empty)\n");
5048
5049         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5050                     isc_rwlocktype_read);
5051 }
5052
5053 static isc_result_t
5054 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5055 {
5056         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5057         rbtdb_dbiterator_t *rbtdbiter;
5058
5059         REQUIRE(VALID_RBTDB(rbtdb));
5060
5061         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5062         if (rbtdbiter == NULL)
5063                 return (ISC_R_NOMEMORY);
5064
5065         rbtdbiter->common.methods = &dbiterator_methods;
5066         rbtdbiter->common.db = NULL;
5067         dns_db_attach(db, &rbtdbiter->common.db);
5068         rbtdbiter->common.relative_names =
5069                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5070         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5071         rbtdbiter->common.cleaning = ISC_FALSE;
5072         rbtdbiter->paused = ISC_TRUE;
5073         rbtdbiter->tree_locked = isc_rwlocktype_none;
5074         rbtdbiter->result = ISC_R_SUCCESS;
5075         dns_fixedname_init(&rbtdbiter->name);
5076         dns_fixedname_init(&rbtdbiter->origin);
5077         rbtdbiter->node = NULL;
5078         rbtdbiter->delete = 0;
5079         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5080         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5081         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5082         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5083         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5084         if (rbtdbiter->nsec3only)
5085                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5086         else
5087                 rbtdbiter->current = &rbtdbiter->chain;
5088
5089         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5090
5091         return (ISC_R_SUCCESS);
5092 }
5093
5094 static isc_result_t
5095 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5096                   dns_rdatatype_t type, dns_rdatatype_t covers,
5097                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5098                   dns_rdataset_t *sigrdataset)
5099 {
5100         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5101         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5102         rdatasetheader_t *header, *header_next, *found, *foundsig;
5103         rbtdb_serial_t serial;
5104         rbtdb_version_t *rbtversion = version;
5105         isc_boolean_t close_version = ISC_FALSE;
5106         rbtdb_rdatatype_t matchtype, sigmatchtype;
5107
5108         REQUIRE(VALID_RBTDB(rbtdb));
5109         REQUIRE(type != dns_rdatatype_any);
5110
5111         if (rbtversion == NULL) {
5112                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5113                 close_version = ISC_TRUE;
5114         }
5115         serial = rbtversion->serial;
5116         now = 0;
5117
5118         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5119                   isc_rwlocktype_read);
5120
5121         found = NULL;
5122         foundsig = NULL;
5123         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5124         if (covers == 0)
5125                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5126         else
5127                 sigmatchtype = 0;
5128
5129         for (header = rbtnode->data; header != NULL; header = header_next) {
5130                 header_next = header->next;
5131                 do {
5132                         if (header->serial <= serial &&
5133                             !IGNORE(header)) {
5134                                 /*
5135                                  * Is this a "this rdataset doesn't
5136                                  * exist" record?
5137                                  */
5138                                 if (NONEXISTENT(header))
5139                                         header = NULL;
5140                                 break;
5141                         } else
5142                                 header = header->down;
5143                 } while (header != NULL);
5144                 if (header != NULL) {
5145                         /*
5146                          * We have an active, extant rdataset.  If it's a
5147                          * type we're looking for, remember it.
5148                          */
5149                         if (header->type == matchtype) {
5150                                 found = header;
5151                                 if (foundsig != NULL)
5152                                         break;
5153                         } else if (header->type == sigmatchtype) {
5154                                 foundsig = header;
5155                                 if (found != NULL)
5156                                         break;
5157                         }
5158                 }
5159         }
5160         if (found != NULL) {
5161                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5162                 if (foundsig != NULL)
5163                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5164                                       sigrdataset);
5165         }
5166
5167         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5168                     isc_rwlocktype_read);
5169
5170         if (close_version)
5171                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5172                              ISC_FALSE);
5173
5174         if (found == NULL)
5175                 return (ISC_R_NOTFOUND);
5176
5177         return (ISC_R_SUCCESS);
5178 }
5179
5180 static isc_result_t
5181 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5182                    dns_rdatatype_t type, dns_rdatatype_t covers,
5183                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5184                    dns_rdataset_t *sigrdataset)
5185 {
5186         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5187         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5188         rdatasetheader_t *header, *header_next, *found, *foundsig;
5189         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5190         isc_result_t result;
5191         nodelock_t *lock;
5192         isc_rwlocktype_t locktype;
5193
5194         REQUIRE(VALID_RBTDB(rbtdb));
5195         REQUIRE(type != dns_rdatatype_any);
5196
5197         UNUSED(version);
5198
5199         result = ISC_R_SUCCESS;
5200
5201         if (now == 0)
5202                 isc_stdtime_get(&now);
5203
5204         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5205         locktype = isc_rwlocktype_read;
5206         NODE_LOCK(lock, locktype);
5207
5208         found = NULL;
5209         foundsig = NULL;
5210         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5211         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5212         if (covers == 0)
5213                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5214         else
5215                 sigmatchtype = 0;
5216
5217         for (header = rbtnode->data; header != NULL; header = header_next) {
5218                 header_next = header->next;
5219                 if (header->rdh_ttl <= now) {
5220                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5221                             (locktype == isc_rwlocktype_write ||
5222                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5223                                 /*
5224                                  * We update the node's status only when we
5225                                  * can get write access.
5226                                  */
5227                                 locktype = isc_rwlocktype_write;
5228
5229                                 /*
5230                                  * We don't check if refcurrent(rbtnode) == 0
5231                                  * and try to free like we do in cache_find(),
5232                                  * because refcurrent(rbtnode) must be
5233                                  * non-zero.  This is so because 'node' is an
5234                                  * argument to the function.
5235                                  */
5236                                 header->attributes |= RDATASET_ATTR_STALE;
5237                                 rbtnode->dirty = 1;
5238                         }
5239                 } else if (EXISTS(header)) {
5240                         if (header->type == matchtype)
5241                                 found = header;
5242                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5243                                  header->type == negtype)
5244                                 found = header;
5245                         else if (header->type == sigmatchtype)
5246                                 foundsig = header;
5247                 }
5248         }
5249         if (found != NULL) {
5250                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5251                 if (!NEGATIVE(found) && foundsig != NULL)
5252                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5253                                       sigrdataset);
5254         }
5255
5256         NODE_UNLOCK(lock, locktype);
5257
5258         if (found == NULL)
5259                 return (ISC_R_NOTFOUND);
5260
5261         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5262                 /*
5263                  * We found a negative cache entry.
5264                  */
5265                 if (NXDOMAIN(found))
5266                         result = DNS_R_NCACHENXDOMAIN;
5267                 else
5268                         result = DNS_R_NCACHENXRRSET;
5269         }
5270
5271         return (result);
5272 }
5273
5274 static isc_result_t
5275 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5276              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5277 {
5278         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5279         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5280         rbtdb_version_t *rbtversion = version;
5281         rbtdb_rdatasetiter_t *iterator;
5282         unsigned int refs;
5283
5284         REQUIRE(VALID_RBTDB(rbtdb));
5285
5286         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5287         if (iterator == NULL)
5288                 return (ISC_R_NOMEMORY);
5289
5290         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5291                 now = 0;
5292                 if (rbtversion == NULL)
5293                         currentversion(db,
5294                                  (dns_dbversion_t **) (void *)(&rbtversion));
5295                 else {
5296                         unsigned int refs;
5297
5298                         isc_refcount_increment(&rbtversion->references,
5299                                                &refs);
5300                         INSIST(refs > 1);
5301                 }
5302         } else {
5303                 if (now == 0)
5304                         isc_stdtime_get(&now);
5305                 rbtversion = NULL;
5306         }
5307
5308         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5309         iterator->common.methods = &rdatasetiter_methods;
5310         iterator->common.db = db;
5311         iterator->common.node = node;
5312         iterator->common.version = (dns_dbversion_t *)rbtversion;
5313         iterator->common.now = now;
5314
5315         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5316
5317         dns_rbtnode_refincrement(rbtnode, &refs);
5318         INSIST(refs != 0);
5319
5320         iterator->current = NULL;
5321
5322         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5323
5324         *iteratorp = (dns_rdatasetiter_t *)iterator;
5325
5326         return (ISC_R_SUCCESS);
5327 }
5328
5329 static isc_boolean_t
5330 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5331         rdatasetheader_t *header, *header_next;
5332         isc_boolean_t cname, other_data;
5333         dns_rdatatype_t rdtype;
5334
5335         /*
5336          * The caller must hold the node lock.
5337          */
5338
5339         /*
5340          * Look for CNAME and "other data" rdatasets active in our version.
5341          */
5342         cname = ISC_FALSE;
5343         other_data = ISC_FALSE;
5344         for (header = node->data; header != NULL; header = header_next) {
5345                 header_next = header->next;
5346                 if (header->type == dns_rdatatype_cname) {
5347                         /*
5348                          * Look for an active extant CNAME.
5349                          */
5350                         do {
5351                                 if (header->serial <= serial &&
5352                                     !IGNORE(header)) {
5353                                         /*
5354                                          * Is this a "this rdataset doesn't
5355                                          * exist" record?
5356                                          */
5357                                         if (NONEXISTENT(header))
5358                                                 header = NULL;
5359                                         break;
5360                                 } else
5361                                         header = header->down;
5362                         } while (header != NULL);
5363                         if (header != NULL)
5364                                 cname = ISC_TRUE;
5365                 } else {
5366                         /*
5367                          * Look for active extant "other data".
5368                          *
5369                          * "Other data" is any rdataset whose type is not
5370                          * KEY, NSEC, SIG or RRSIG.
5371                          */
5372                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5373                         if (rdtype != dns_rdatatype_key &&
5374                             rdtype != dns_rdatatype_sig &&
5375                             rdtype != dns_rdatatype_nsec &&
5376                             rdtype != dns_rdatatype_rrsig) {
5377                                 /*
5378                                  * Is it active and extant?
5379                                  */
5380                                 do {
5381                                         if (header->serial <= serial &&
5382                                             !IGNORE(header)) {
5383                                                 /*
5384                                                  * Is this a "this rdataset
5385                                                  * doesn't exist" record?
5386                                                  */
5387                                                 if (NONEXISTENT(header))
5388                                                         header = NULL;
5389                                                 break;
5390                                         } else
5391                                                 header = header->down;
5392                                 } while (header != NULL);
5393                                 if (header != NULL)
5394                                         other_data = ISC_TRUE;
5395                         }
5396                 }
5397         }
5398
5399         if (cname && other_data)
5400                 return (ISC_TRUE);
5401
5402         return (ISC_FALSE);
5403 }
5404
5405 static isc_result_t
5406 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5407         isc_result_t result;
5408
5409         INSIST(!IS_CACHE(rbtdb));
5410         INSIST(newheader->heap_index == 0);
5411         INSIST(!ISC_LINK_LINKED(newheader, link));
5412
5413         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5414         return (result);
5415 }
5416
5417 static isc_result_t
5418 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5419     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5420     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5421 {
5422         rbtdb_changed_t *changed = NULL;
5423         rdatasetheader_t *topheader, *topheader_prev, *header;
5424         unsigned char *merged;
5425         isc_result_t result;
5426         isc_boolean_t header_nx;
5427         isc_boolean_t newheader_nx;
5428         isc_boolean_t merge;
5429         dns_rdatatype_t rdtype, covers;
5430         rbtdb_rdatatype_t negtype;
5431         dns_trust_t trust;
5432         int idx;
5433
5434         /*
5435          * Add an rdatasetheader_t to a node.
5436          */
5437
5438         /*
5439          * Caller must be holding the node lock.
5440          */
5441
5442         if ((options & DNS_DBADD_MERGE) != 0) {
5443                 REQUIRE(rbtversion != NULL);
5444                 merge = ISC_TRUE;
5445         } else
5446                 merge = ISC_FALSE;
5447
5448         if ((options & DNS_DBADD_FORCE) != 0)
5449                 trust = dns_trust_ultimate;
5450         else
5451                 trust = newheader->trust;
5452
5453         if (rbtversion != NULL && !loading) {
5454                 /*
5455                  * We always add a changed record, even if no changes end up
5456                  * being made to this node, because it's harmless and
5457                  * simplifies the code.
5458                  */
5459                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5460                 if (changed == NULL) {
5461                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5462                         return (ISC_R_NOMEMORY);
5463                 }
5464         }
5465
5466         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5467         topheader_prev = NULL;
5468
5469         negtype = 0;
5470         if (rbtversion == NULL && !newheader_nx) {
5471                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5472                 if (rdtype == 0) {
5473                         /*
5474                          * We're adding a negative cache entry.
5475                          */
5476                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5477                         if (covers == dns_rdatatype_any) {
5478                                 /*
5479                                  * We're adding an negative cache entry
5480                                  * which covers all types (NXDOMAIN,
5481                                  * NODATA(QTYPE=ANY)).
5482                                  *
5483                                  * We make all other data stale so that the
5484                                  * only rdataset that can be found at this
5485                                  * node is the negative cache entry.
5486                                  */
5487                                 for (topheader = rbtnode->data;
5488                                      topheader != NULL;
5489                                      topheader = topheader->next) {
5490                                         set_ttl(rbtdb, topheader, 0);
5491                                         topheader->attributes |=
5492                                                 RDATASET_ATTR_STALE;
5493                                 }
5494                                 rbtnode->dirty = 1;
5495                                 goto find_header;
5496                         }
5497                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5498                 } else {
5499                         /*
5500                          * We're adding something that isn't a
5501                          * negative cache entry.  Look for an extant
5502                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5503                          * cache entry.
5504                          */
5505                         for (topheader = rbtnode->data;
5506                              topheader != NULL;
5507                              topheader = topheader->next) {
5508                                 if (topheader->type ==
5509                                     RBTDB_RDATATYPE_NCACHEANY)
5510                                         break;
5511                         }
5512                         if (topheader != NULL && EXISTS(topheader) &&
5513                             topheader->rdh_ttl > now) {
5514                                 /*
5515                                  * Found one.
5516                                  */
5517                                 if (trust < topheader->trust) {
5518                                         /*
5519                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5520                                          * is more trusted.
5521                                          */
5522                                         free_rdataset(rbtdb,
5523                                                       rbtdb->common.mctx,
5524                                                       newheader);
5525                                         if (addedrdataset != NULL)
5526                                                 bind_rdataset(rbtdb, rbtnode,
5527                                                               topheader, now,
5528                                                               addedrdataset);
5529                                         return (DNS_R_UNCHANGED);
5530                                 }
5531                                 /*
5532                                  * The new rdataset is better.  Expire the
5533                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5534                                  */
5535                                 set_ttl(rbtdb, topheader, 0);
5536                                 topheader->attributes |= RDATASET_ATTR_STALE;
5537                                 rbtnode->dirty = 1;
5538                                 topheader = NULL;
5539                                 goto find_header;
5540                         }
5541                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5542                 }
5543         }
5544
5545         for (topheader = rbtnode->data;
5546              topheader != NULL;
5547              topheader = topheader->next) {
5548                 if (topheader->type == newheader->type ||
5549                     topheader->type == negtype)
5550                         break;
5551                 topheader_prev = topheader;
5552         }
5553
5554  find_header:
5555         /*
5556          * If header isn't NULL, we've found the right type.  There may be
5557          * IGNORE rdatasets between the top of the chain and the first real
5558          * data.  We skip over them.
5559          */
5560         header = topheader;
5561         while (header != NULL && IGNORE(header))
5562                 header = header->down;
5563         if (header != NULL) {
5564                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5565
5566                 /*
5567                  * Deleting an already non-existent rdataset has no effect.
5568                  */
5569                 if (header_nx && newheader_nx) {
5570                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5571                         return (DNS_R_UNCHANGED);
5572                 }
5573
5574                 /*
5575                  * Trying to add an rdataset with lower trust to a cache DB
5576                  * has no effect, provided that the cache data isn't stale.
5577                  */
5578                 if (rbtversion == NULL && trust < header->trust &&
5579                     (header->rdh_ttl > now || header_nx)) {
5580                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5581                         if (addedrdataset != NULL)
5582                                 bind_rdataset(rbtdb, rbtnode, header, now,
5583                                               addedrdataset);
5584                         return (DNS_R_UNCHANGED);
5585                 }
5586
5587                 /*
5588                  * Don't merge if a nonexistent rdataset is involved.
5589                  */
5590                 if (merge && (header_nx || newheader_nx))
5591                         merge = ISC_FALSE;
5592
5593                 /*
5594                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5595                  * that is the union of 'newheader' and 'header'.
5596                  */
5597                 if (merge) {
5598                         unsigned int flags = 0;
5599                         INSIST(rbtversion->serial >= header->serial);
5600                         merged = NULL;
5601                         result = ISC_R_SUCCESS;
5602
5603                         if ((options & DNS_DBADD_EXACT) != 0)
5604                                 flags |= DNS_RDATASLAB_EXACT;
5605                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5606                              newheader->rdh_ttl != header->rdh_ttl)
5607                                         result = DNS_R_NOTEXACT;
5608                         else if (newheader->rdh_ttl != header->rdh_ttl)
5609                                 flags |= DNS_RDATASLAB_FORCE;
5610                         if (result == ISC_R_SUCCESS)
5611                                 result = dns_rdataslab_merge(
5612                                              (unsigned char *)header,
5613                                              (unsigned char *)newheader,
5614                                              (unsigned int)(sizeof(*newheader)),
5615                                              rbtdb->common.mctx,
5616                                              rbtdb->common.rdclass,
5617                                              (dns_rdatatype_t)header->type,
5618                                              flags, &merged);
5619                         if (result == ISC_R_SUCCESS) {
5620                                 /*
5621                                  * If 'header' has the same serial number as
5622                                  * we do, we could clean it up now if we knew
5623                                  * that our caller had no references to it.
5624                                  * We don't know this, however, so we leave it
5625                                  * alone.  It will get cleaned up when
5626                                  * clean_zone_node() runs.
5627                                  */
5628                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5629                                               newheader);
5630                                 newheader = (rdatasetheader_t *)merged;
5631                                 if (loading && RESIGN(newheader) &&
5632                                     RESIGN(header) &&
5633                                     header->resign < newheader->resign)
5634                                         newheader->resign = header->resign;
5635                         } else {
5636                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5637                                               newheader);
5638                                 return (result);
5639                         }
5640                 }
5641                 /*
5642                  * Don't replace existing NS, A and AAAA RRsets
5643                  * in the cache if they are already exist.  This
5644                  * prevents named being locked to old servers.
5645                  * Don't lower trust of existing record if the
5646                  * update is forced.
5647                  */
5648                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5649                     header->type == dns_rdatatype_ns &&
5650                     !header_nx && !newheader_nx &&
5651                     header->trust >= newheader->trust &&
5652                     dns_rdataslab_equalx((unsigned char *)header,
5653                                          (unsigned char *)newheader,
5654                                          (unsigned int)(sizeof(*newheader)),
5655                                          rbtdb->common.rdclass,
5656                                          (dns_rdatatype_t)header->type)) {
5657                         /*
5658                          * Honour the new ttl if it is less than the
5659                          * older one.
5660                          */
5661                         if (header->rdh_ttl > newheader->rdh_ttl)
5662                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5663                         if (header->noqname == NULL &&
5664                             newheader->noqname != NULL) {
5665                                 header->noqname = newheader->noqname;
5666                                 newheader->noqname = NULL;
5667                         }
5668                         if (header->closest == NULL &&
5669                             newheader->closest != NULL) {
5670                                 header->closest = newheader->closest;
5671                                 newheader->closest = NULL;
5672                         }
5673                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5674                         if (addedrdataset != NULL)
5675                                 bind_rdataset(rbtdb, rbtnode, header, now,
5676                                               addedrdataset);
5677                         return (ISC_R_SUCCESS);
5678                 }
5679                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5680                     (header->type == dns_rdatatype_a ||
5681                      header->type == dns_rdatatype_aaaa) &&
5682                     !header_nx && !newheader_nx &&
5683                     header->trust >= newheader->trust &&
5684                     dns_rdataslab_equal((unsigned char *)header,
5685                                         (unsigned char *)newheader,
5686                                         (unsigned int)(sizeof(*newheader)))) {
5687                         /*
5688                          * Honour the new ttl if it is less than the
5689                          * older one.
5690                          */
5691                         if (header->rdh_ttl > newheader->rdh_ttl)
5692                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5693                         if (header->noqname == NULL &&
5694                             newheader->noqname != NULL) {
5695                                 header->noqname = newheader->noqname;
5696                                 newheader->noqname = NULL;
5697                         }
5698                         if (header->closest == NULL &&
5699                             newheader->closest != NULL) {
5700                                 header->closest = newheader->closest;
5701                                 newheader->closest = NULL;
5702                         }
5703                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5704                         if (addedrdataset != NULL)
5705                                 bind_rdataset(rbtdb, rbtnode, header, now,
5706                                               addedrdataset);
5707                         return (ISC_R_SUCCESS);
5708                 }
5709                 INSIST(rbtversion == NULL ||
5710                        rbtversion->serial >= topheader->serial);
5711                 if (topheader_prev != NULL)
5712                         topheader_prev->next = newheader;
5713                 else
5714                         rbtnode->data = newheader;
5715                 newheader->next = topheader->next;
5716                 if (loading) {
5717                         /*
5718                          * There are no other references to 'header' when
5719                          * loading, so we MAY clean up 'header' now.
5720                          * Since we don't generate changed records when
5721                          * loading, we MUST clean up 'header' now.
5722                          */
5723                         newheader->down = NULL;
5724                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5725                 } else {
5726                         newheader->down = topheader;
5727                         topheader->next = newheader;
5728                         rbtnode->dirty = 1;
5729                         if (changed != NULL)
5730                                 changed->dirty = ISC_TRUE;
5731                         if (rbtversion == NULL) {
5732                                 set_ttl(rbtdb, header, 0);
5733                                 header->attributes |= RDATASET_ATTR_STALE;
5734                         }
5735                         idx = newheader->node->locknum;
5736                         if (IS_CACHE(rbtdb)) {
5737                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5738                                                  newheader, link);
5739                                 /*
5740                                  * XXXMLG We don't check the return value
5741                                  * here.  If it fails, we will not do TTL
5742                                  * based expiry on this node.  However, we
5743                                  * will do it on the LRU side, so memory
5744                                  * will not leak... for long.
5745                                  */
5746                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5747                         } else if (RESIGN(newheader))
5748                                 resign_insert(rbtdb, idx, newheader);
5749                 }
5750         } else {
5751                 /*
5752                  * No non-IGNORED rdatasets of the given type exist at
5753                  * this node.
5754                  */
5755
5756                 /*
5757                  * If we're trying to delete the type, don't bother.
5758                  */
5759                 if (newheader_nx) {
5760                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5761                         return (DNS_R_UNCHANGED);
5762                 }
5763
5764                 if (topheader != NULL) {
5765                         /*
5766                          * We have an list of rdatasets of the given type,
5767                          * but they're all marked IGNORE.  We simply insert
5768                          * the new rdataset at the head of the list.
5769                          *
5770                          * Ignored rdatasets cannot occur during loading, so
5771                          * we INSIST on it.
5772                          */
5773                         INSIST(!loading);
5774                         INSIST(rbtversion == NULL ||
5775                                rbtversion->serial >= topheader->serial);
5776                         if (topheader_prev != NULL)
5777                                 topheader_prev->next = newheader;
5778                         else
5779                                 rbtnode->data = newheader;
5780                         newheader->next = topheader->next;
5781                         newheader->down = topheader;
5782                         topheader->next = newheader;
5783                         rbtnode->dirty = 1;
5784                         if (changed != NULL)
5785                                 changed->dirty = ISC_TRUE;
5786                 } else {
5787                         /*
5788                          * No rdatasets of the given type exist at the node.
5789                          */
5790                         newheader->next = rbtnode->data;
5791                         newheader->down = NULL;
5792                         rbtnode->data = newheader;
5793                 }
5794                 idx = newheader->node->locknum;
5795                 if (IS_CACHE(rbtdb)) {
5796                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5797                                          newheader, link);
5798                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5799                 } else if (RESIGN(newheader)) {
5800                         resign_insert(rbtdb, idx, newheader);
5801                 }
5802         }
5803
5804         /*
5805          * Check if the node now contains CNAME and other data.
5806          */
5807         if (rbtversion != NULL &&
5808             cname_and_other_data(rbtnode, rbtversion->serial))
5809                 return (DNS_R_CNAMEANDOTHER);
5810
5811         if (addedrdataset != NULL)
5812                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5813
5814         return (ISC_R_SUCCESS);
5815 }
5816
5817 static inline isc_boolean_t
5818 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5819                 rbtdb_rdatatype_t type)
5820 {
5821         if (IS_CACHE(rbtdb)) {
5822                 if (type == dns_rdatatype_dname)
5823                         return (ISC_TRUE);
5824                 else
5825                         return (ISC_FALSE);
5826         } else if (type == dns_rdatatype_dname ||
5827                    (type == dns_rdatatype_ns &&
5828                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5829                 return (ISC_TRUE);
5830         return (ISC_FALSE);
5831 }
5832
5833 static inline isc_result_t
5834 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5835            dns_rdataset_t *rdataset)
5836 {
5837         struct noqname *noqname;
5838         isc_mem_t *mctx = rbtdb->common.mctx;
5839         dns_name_t name;
5840         dns_rdataset_t neg, negsig;
5841         isc_result_t result;
5842         isc_region_t r;
5843
5844         dns_name_init(&name, NULL);
5845         dns_rdataset_init(&neg);
5846         dns_rdataset_init(&negsig);
5847
5848         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5849         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5850
5851         noqname = isc_mem_get(mctx, sizeof(*noqname));
5852         if (noqname == NULL) {
5853                 result = ISC_R_NOMEMORY;
5854                 goto cleanup;
5855         }
5856         dns_name_init(&noqname->name, NULL);
5857         noqname->neg = NULL;
5858         noqname->negsig = NULL;
5859         noqname->type = neg.type;
5860         result = dns_name_dup(&name, mctx, &noqname->name);
5861         if (result != ISC_R_SUCCESS)
5862                 goto cleanup;
5863         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5864         if (result != ISC_R_SUCCESS)
5865                 goto cleanup;
5866         noqname->neg = r.base;
5867         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5868         if (result != ISC_R_SUCCESS)
5869                 goto cleanup;
5870         noqname->negsig = r.base;
5871         dns_rdataset_disassociate(&neg);
5872         dns_rdataset_disassociate(&negsig);
5873         newheader->noqname = noqname;
5874         return (ISC_R_SUCCESS);
5875
5876 cleanup:
5877         dns_rdataset_disassociate(&neg);
5878         dns_rdataset_disassociate(&negsig);
5879         free_noqname(mctx, &noqname);
5880         return(result);
5881 }
5882
5883 static inline isc_result_t
5884 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5885            dns_rdataset_t *rdataset)
5886 {
5887         struct noqname *closest;
5888         isc_mem_t *mctx = rbtdb->common.mctx;
5889         dns_name_t name;
5890         dns_rdataset_t neg, negsig;
5891         isc_result_t result;
5892         isc_region_t r;
5893
5894         dns_name_init(&name, NULL);
5895         dns_rdataset_init(&neg);
5896         dns_rdataset_init(&negsig);
5897
5898         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5899         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5900
5901         closest = isc_mem_get(mctx, sizeof(*closest));
5902         if (closest == NULL) {
5903                 result = ISC_R_NOMEMORY;
5904                 goto cleanup;
5905         }
5906         dns_name_init(&closest->name, NULL);
5907         closest->neg = NULL;
5908         closest->negsig = NULL;
5909         closest->type = neg.type;
5910         result = dns_name_dup(&name, mctx, &closest->name);
5911         if (result != ISC_R_SUCCESS)
5912                 goto cleanup;
5913         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5914         if (result != ISC_R_SUCCESS)
5915                 goto cleanup;
5916         closest->neg = r.base;
5917         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5918         if (result != ISC_R_SUCCESS)
5919                 goto cleanup;
5920         closest->negsig = r.base;
5921         dns_rdataset_disassociate(&neg);
5922         dns_rdataset_disassociate(&negsig);
5923         newheader->closest = closest;
5924         return (ISC_R_SUCCESS);
5925
5926  cleanup:
5927         dns_rdataset_disassociate(&neg);
5928         dns_rdataset_disassociate(&negsig);
5929         free_noqname(mctx, &closest);
5930         return(result);
5931 }
5932
5933 static dns_dbmethods_t zone_methods;
5934
5935 static isc_result_t
5936 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5937             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5938             dns_rdataset_t *addedrdataset)
5939 {
5940         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5941         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5942         rbtdb_version_t *rbtversion = version;
5943         isc_region_t region;
5944         rdatasetheader_t *newheader;
5945         rdatasetheader_t *header;
5946         isc_result_t result;
5947         isc_boolean_t delegating;
5948         isc_boolean_t tree_locked = ISC_FALSE;
5949
5950         REQUIRE(VALID_RBTDB(rbtdb));
5951
5952         if (rbtdb->common.methods == &zone_methods)
5953                 REQUIRE(((rbtnode->nsec3 &&
5954                           (rdataset->type == dns_rdatatype_nsec3 ||
5955                            rdataset->covers == dns_rdatatype_nsec3)) ||
5956                          (!rbtnode->nsec3 &&
5957                            rdataset->type != dns_rdatatype_nsec3 &&
5958                            rdataset->covers != dns_rdatatype_nsec3)));
5959
5960         if (rbtversion == NULL) {
5961                 if (now == 0)
5962                         isc_stdtime_get(&now);
5963         } else
5964                 now = 0;
5965
5966         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5967                                             &region,
5968                                             sizeof(rdatasetheader_t));
5969         if (result != ISC_R_SUCCESS)
5970                 return (result);
5971
5972         newheader = (rdatasetheader_t *)region.base;
5973         init_rdataset(rbtdb, newheader);
5974         set_ttl(rbtdb, newheader, rdataset->ttl + now);
5975         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5976                                                 rdataset->covers);
5977         newheader->attributes = 0;
5978         newheader->noqname = NULL;
5979         newheader->closest = NULL;
5980         newheader->count = init_count++;
5981         newheader->trust = rdataset->trust;
5982         newheader->additional_auth = NULL;
5983         newheader->additional_glue = NULL;
5984         newheader->last_used = now;
5985         newheader->node = rbtnode;
5986         if (rbtversion != NULL) {
5987                 newheader->serial = rbtversion->serial;
5988                 now = 0;
5989
5990                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5991                         newheader->attributes |= RDATASET_ATTR_RESIGN;
5992                         newheader->resign = rdataset->resign;
5993                 } else
5994                         newheader->resign = 0;
5995         } else {
5996                 newheader->serial = 1;
5997                 newheader->resign = 0;
5998                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
5999                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6000                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6001                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6002                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6003                         result = addnoqname(rbtdb, newheader, rdataset);
6004                         if (result != ISC_R_SUCCESS) {
6005                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6006                                               newheader);
6007                                 return (result);
6008                         }
6009                 }
6010                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6011                         result = addclosest(rbtdb, newheader, rdataset);
6012                         if (result != ISC_R_SUCCESS) {
6013                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6014                                               newheader);
6015                                 return (result);
6016                         }
6017                 }
6018         }
6019
6020         /*
6021          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6022          * just DNAME for the cache), then we need to set the callback bit
6023          * on the node.
6024          */
6025         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6026                 delegating = ISC_TRUE;
6027         else
6028                 delegating = ISC_FALSE;
6029
6030         /*
6031          * If we're adding a delegation type or the DB is a cache in an overmem
6032          * state, hold an exclusive lock on the tree.  In the latter case
6033          * the lock does not necessarily have to be acquired but it will help
6034          * purge stale entries more effectively.
6035          */
6036         if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6037                 tree_locked = ISC_TRUE;
6038                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6039         }
6040
6041         if (IS_CACHE(rbtdb) && rbtdb->overmem)
6042                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6043
6044         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6045                   isc_rwlocktype_write);
6046
6047         if (rbtdb->rrsetstats != NULL) {
6048                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6049                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6050         }
6051
6052         if (IS_CACHE(rbtdb)) {
6053                 if (tree_locked)
6054                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6055
6056                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6057                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6058                         expire_header(rbtdb, header, tree_locked);
6059
6060                 /*
6061                  * If we've been holding a write lock on the tree just for
6062                  * cleaning, we can release it now.  However, we still need the
6063                  * node lock.
6064                  */
6065                 if (tree_locked && !delegating) {
6066                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6067                         tree_locked = ISC_FALSE;
6068                 }
6069         }
6070
6071         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6072                      addedrdataset, now);
6073         if (result == ISC_R_SUCCESS && delegating)
6074                 rbtnode->find_callback = 1;
6075
6076         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6077                     isc_rwlocktype_write);
6078
6079         if (tree_locked)
6080                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6081
6082         /*
6083          * Update the zone's secure status.  If version is non-NULL
6084          * this is deferred until closeversion() is called.
6085          */
6086         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6087                 iszonesecure(db, version, rbtdb->origin_node);
6088
6089         return (result);
6090 }
6091
6092 static isc_result_t
6093 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6094                  dns_rdataset_t *rdataset, unsigned int options,
6095                  dns_rdataset_t *newrdataset)
6096 {
6097         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6098         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6099         rbtdb_version_t *rbtversion = version;
6100         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6101         unsigned char *subresult;
6102         isc_region_t region;
6103         isc_result_t result;
6104         rbtdb_changed_t *changed;
6105
6106         REQUIRE(VALID_RBTDB(rbtdb));
6107
6108         if (rbtdb->common.methods == &zone_methods)
6109                 REQUIRE(((rbtnode->nsec3 &&
6110                           (rdataset->type == dns_rdatatype_nsec3 ||
6111                            rdataset->covers == dns_rdatatype_nsec3)) ||
6112                          (!rbtnode->nsec3 &&
6113                            rdataset->type != dns_rdatatype_nsec3 &&
6114                            rdataset->covers != dns_rdatatype_nsec3)));
6115
6116         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6117                                             &region,
6118                                             sizeof(rdatasetheader_t));
6119         if (result != ISC_R_SUCCESS)
6120                 return (result);
6121         newheader = (rdatasetheader_t *)region.base;
6122         init_rdataset(rbtdb, newheader);
6123         set_ttl(rbtdb, newheader, rdataset->ttl);
6124         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6125                                                 rdataset->covers);
6126         newheader->attributes = 0;
6127         newheader->serial = rbtversion->serial;
6128         newheader->trust = 0;
6129         newheader->noqname = NULL;
6130         newheader->closest = NULL;
6131         newheader->count = init_count++;
6132         newheader->additional_auth = NULL;
6133         newheader->additional_glue = NULL;
6134         newheader->last_used = 0;
6135         newheader->node = rbtnode;
6136         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6137                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6138                 newheader->resign = rdataset->resign;
6139         } else
6140                 newheader->resign = 0;
6141
6142         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6143                   isc_rwlocktype_write);
6144
6145         changed = add_changed(rbtdb, rbtversion, rbtnode);
6146         if (changed == NULL) {
6147                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6148                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6149                             isc_rwlocktype_write);
6150                 return (ISC_R_NOMEMORY);
6151         }
6152
6153         topheader_prev = NULL;
6154         for (topheader = rbtnode->data;
6155              topheader != NULL;
6156              topheader = topheader->next) {
6157                 if (topheader->type == newheader->type)
6158                         break;
6159                 topheader_prev = topheader;
6160         }
6161         /*
6162          * If header isn't NULL, we've found the right type.  There may be
6163          * IGNORE rdatasets between the top of the chain and the first real
6164          * data.  We skip over them.
6165          */
6166         header = topheader;
6167         while (header != NULL && IGNORE(header))
6168                 header = header->down;
6169         if (header != NULL && EXISTS(header)) {
6170                 unsigned int flags = 0;
6171                 subresult = NULL;
6172                 result = ISC_R_SUCCESS;
6173                 if ((options & DNS_DBSUB_EXACT) != 0) {
6174                         flags |= DNS_RDATASLAB_EXACT;
6175                         if (newheader->rdh_ttl != header->rdh_ttl)
6176                                 result = DNS_R_NOTEXACT;
6177                 }
6178                 if (result == ISC_R_SUCCESS)
6179                         result = dns_rdataslab_subtract(
6180                                         (unsigned char *)header,
6181                                         (unsigned char *)newheader,
6182                                         (unsigned int)(sizeof(*newheader)),
6183                                         rbtdb->common.mctx,
6184                                         rbtdb->common.rdclass,
6185                                         (dns_rdatatype_t)header->type,
6186                                         flags, &subresult);
6187                 if (result == ISC_R_SUCCESS) {
6188                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6189                         newheader = (rdatasetheader_t *)subresult;
6190                         init_rdataset(rbtdb, newheader);
6191                         /*
6192                          * We have to set the serial since the rdataslab
6193                          * subtraction routine copies the reserved portion of
6194                          * header, not newheader.
6195                          */
6196                         newheader->serial = rbtversion->serial;
6197                         /*
6198                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6199                          * to additional info.  We need to clear these fields
6200                          * to avoid having duplicated references.
6201                          */
6202                         newheader->additional_auth = NULL;
6203                         newheader->additional_glue = NULL;
6204                 } else if (result == DNS_R_NXRRSET) {
6205                         /*
6206                          * This subtraction would remove all of the rdata;
6207                          * add a nonexistent header instead.
6208                          */
6209                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6210                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6211                         if (newheader == NULL) {
6212                                 result = ISC_R_NOMEMORY;
6213                                 goto unlock;
6214                         }
6215                         set_ttl(rbtdb, newheader, 0);
6216                         newheader->type = topheader->type;
6217                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6218                         newheader->trust = 0;
6219                         newheader->serial = rbtversion->serial;
6220                         newheader->noqname = NULL;
6221                         newheader->closest = NULL;
6222                         newheader->count = 0;
6223                         newheader->additional_auth = NULL;
6224                         newheader->additional_glue = NULL;
6225                         newheader->node = rbtnode;
6226                         newheader->resign = 0;
6227                         newheader->last_used = 0;
6228                 } else {
6229                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6230                         goto unlock;
6231                 }
6232
6233                 /*
6234                  * If we're here, we want to link newheader in front of
6235                  * topheader.
6236                  */
6237                 INSIST(rbtversion->serial >= topheader->serial);
6238                 if (topheader_prev != NULL)
6239                         topheader_prev->next = newheader;
6240                 else
6241                         rbtnode->data = newheader;
6242                 newheader->next = topheader->next;
6243                 newheader->down = topheader;
6244                 topheader->next = newheader;
6245                 rbtnode->dirty = 1;
6246                 changed->dirty = ISC_TRUE;
6247         } else {
6248                 /*
6249                  * The rdataset doesn't exist, so we don't need to do anything
6250                  * to satisfy the deletion request.
6251                  */
6252                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6253                 if ((options & DNS_DBSUB_EXACT) != 0)
6254                         result = DNS_R_NOTEXACT;
6255                 else
6256                         result = DNS_R_UNCHANGED;
6257         }
6258
6259         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6260                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6261
6262  unlock:
6263         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6264                     isc_rwlocktype_write);
6265
6266         /*
6267          * Update the zone's secure status.  If version is non-NULL
6268          * this is deferred until closeversion() is called.
6269          */
6270         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6271                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6272
6273         return (result);
6274 }
6275
6276 static isc_result_t
6277 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6278                dns_rdatatype_t type, dns_rdatatype_t covers)
6279 {
6280         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6281         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6282         rbtdb_version_t *rbtversion = version;
6283         isc_result_t result;
6284         rdatasetheader_t *newheader;
6285
6286         REQUIRE(VALID_RBTDB(rbtdb));
6287
6288         if (type == dns_rdatatype_any)
6289                 return (ISC_R_NOTIMPLEMENTED);
6290         if (type == dns_rdatatype_rrsig && covers == 0)
6291                 return (ISC_R_NOTIMPLEMENTED);
6292
6293         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6294         if (newheader == NULL)
6295                 return (ISC_R_NOMEMORY);
6296         set_ttl(rbtdb, newheader, 0);
6297         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6298         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6299         newheader->trust = 0;
6300         newheader->noqname = NULL;
6301         newheader->closest = NULL;
6302         newheader->additional_auth = NULL;
6303         newheader->additional_glue = NULL;
6304         if (rbtversion != NULL)
6305                 newheader->serial = rbtversion->serial;
6306         else
6307                 newheader->serial = 0;
6308         newheader->count = 0;
6309         newheader->last_used = 0;
6310         newheader->node = rbtnode;
6311
6312         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6313                   isc_rwlocktype_write);
6314
6315         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6316                      ISC_FALSE, NULL, 0);
6317
6318         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6319                     isc_rwlocktype_write);
6320
6321         /*
6322          * Update the zone's secure status.  If version is non-NULL
6323          * this is deferred until closeversion() is called.
6324          */
6325         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6326                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6327
6328         return (result);
6329 }
6330
6331 static isc_result_t
6332 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6333         rbtdb_load_t *loadctx = arg;
6334         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6335         dns_rbtnode_t *node;
6336         isc_result_t result;
6337         isc_region_t region;
6338         rdatasetheader_t *newheader;
6339
6340         /*
6341          * This routine does no node locking.  See comments in
6342          * 'load' below for more information on loading and
6343          * locking.
6344          */
6345
6346
6347         /*
6348          * SOA records are only allowed at top of zone.
6349          */
6350         if (rdataset->type == dns_rdatatype_soa &&
6351             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6352                 return (DNS_R_NOTZONETOP);
6353
6354         if (rdataset->type != dns_rdatatype_nsec3 &&
6355             rdataset->covers != dns_rdatatype_nsec3)
6356                 add_empty_wildcards(rbtdb, name);
6357
6358         if (dns_name_iswildcard(name)) {
6359                 /*
6360                  * NS record owners cannot legally be wild cards.
6361                  */
6362                 if (rdataset->type == dns_rdatatype_ns)
6363                         return (DNS_R_INVALIDNS);
6364                 /*
6365                  * NSEC3 record owners cannot legally be wild cards.
6366                  */
6367                 if (rdataset->type == dns_rdatatype_nsec3)
6368                         return (DNS_R_INVALIDNSEC3);
6369                 result = add_wildcard_magic(rbtdb, name);
6370                 if (result != ISC_R_SUCCESS)
6371                         return (result);
6372         }
6373
6374         node = NULL;
6375         if (rdataset->type == dns_rdatatype_nsec3 ||
6376             rdataset->covers == dns_rdatatype_nsec3) {
6377                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6378                 if (result == ISC_R_SUCCESS)
6379                         node->nsec3 = 1;
6380         } else {
6381                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6382                 if (result == ISC_R_SUCCESS)
6383                         node->nsec3 = 0;
6384         }
6385         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6386                 return (result);
6387         if (result != ISC_R_EXISTS) {
6388                 dns_name_t foundname;
6389                 dns_name_init(&foundname, NULL);
6390                 dns_rbt_namefromnode(node, &foundname);
6391 #ifdef DNS_RBT_USEHASH
6392                 node->locknum = node->hashval % rbtdb->node_lock_count;
6393 #else
6394                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6395                         rbtdb->node_lock_count;
6396 #endif
6397         }
6398
6399         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6400                                             &region,
6401                                             sizeof(rdatasetheader_t));
6402         if (result != ISC_R_SUCCESS)
6403                 return (result);
6404         newheader = (rdatasetheader_t *)region.base;
6405         init_rdataset(rbtdb, newheader);
6406         set_ttl(rbtdb, newheader,
6407                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6408         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6409                                                 rdataset->covers);
6410         newheader->attributes = 0;
6411         newheader->trust = rdataset->trust;
6412         newheader->serial = 1;
6413         newheader->noqname = NULL;
6414         newheader->closest = NULL;
6415         newheader->count = init_count++;
6416         newheader->additional_auth = NULL;
6417         newheader->additional_glue = NULL;
6418         newheader->last_used = 0;
6419         newheader->node = node;
6420         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6421                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6422                 newheader->resign = rdataset->resign;
6423         } else
6424                 newheader->resign = 0;
6425
6426         result = add(rbtdb, node, rbtdb->current_version, newheader,
6427                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6428         if (result == ISC_R_SUCCESS &&
6429             delegating_type(rbtdb, node, rdataset->type))
6430                 node->find_callback = 1;
6431         else if (result == DNS_R_UNCHANGED)
6432                 result = ISC_R_SUCCESS;
6433
6434         return (result);
6435 }
6436
6437 static isc_result_t
6438 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6439         rbtdb_load_t *loadctx;
6440         dns_rbtdb_t *rbtdb;
6441
6442         rbtdb = (dns_rbtdb_t *)db;
6443
6444         REQUIRE(VALID_RBTDB(rbtdb));
6445
6446         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6447         if (loadctx == NULL)
6448                 return (ISC_R_NOMEMORY);
6449
6450         loadctx->rbtdb = rbtdb;
6451         if (IS_CACHE(rbtdb))
6452                 isc_stdtime_get(&loadctx->now);
6453         else
6454                 loadctx->now = 0;
6455
6456         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6457
6458         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6459                 == 0);
6460         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6461
6462         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6463
6464         *addp = loading_addrdataset;
6465         *dbloadp = loadctx;
6466
6467         return (ISC_R_SUCCESS);
6468 }
6469
6470 static isc_result_t
6471 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6472         rbtdb_load_t *loadctx;
6473         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6474
6475         REQUIRE(VALID_RBTDB(rbtdb));
6476         REQUIRE(dbloadp != NULL);
6477         loadctx = *dbloadp;
6478         REQUIRE(loadctx->rbtdb == rbtdb);
6479
6480         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6481
6482         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6483         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6484
6485         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6486         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6487
6488         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6489
6490         /*
6491          * If there's a KEY rdataset at the zone origin containing a
6492          * zone key, we consider the zone secure.
6493          */
6494         if (! IS_CACHE(rbtdb))
6495                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6496
6497         *dbloadp = NULL;
6498
6499         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6500
6501         return (ISC_R_SUCCESS);
6502 }
6503
6504 static isc_result_t
6505 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6506      dns_masterformat_t masterformat) {
6507         dns_rbtdb_t *rbtdb;
6508
6509         rbtdb = (dns_rbtdb_t *)db;
6510
6511         REQUIRE(VALID_RBTDB(rbtdb));
6512
6513         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6514                                  &dns_master_style_default,
6515                                  filename, masterformat));
6516 }
6517
6518 static void
6519 delete_callback(void *data, void *arg) {
6520         dns_rbtdb_t *rbtdb = arg;
6521         rdatasetheader_t *current, *next;
6522         unsigned int locknum;
6523
6524         current = data;
6525         locknum = current->node->locknum;
6526         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6527         while (current != NULL) {
6528                 next = current->next;
6529                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6530                 current = next;
6531         }
6532         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6533 }
6534
6535 static isc_boolean_t
6536 issecure(dns_db_t *db) {
6537         dns_rbtdb_t *rbtdb;
6538         isc_boolean_t secure;
6539
6540         rbtdb = (dns_rbtdb_t *)db;
6541
6542         REQUIRE(VALID_RBTDB(rbtdb));
6543
6544         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6545         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6546         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6547
6548         return (secure);
6549 }
6550
6551 static isc_boolean_t
6552 isdnssec(dns_db_t *db) {
6553         dns_rbtdb_t *rbtdb;
6554         isc_boolean_t dnssec;
6555
6556         rbtdb = (dns_rbtdb_t *)db;
6557
6558         REQUIRE(VALID_RBTDB(rbtdb));
6559
6560         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6561         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6562         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6563
6564         return (dnssec);
6565 }
6566
6567 static unsigned int
6568 nodecount(dns_db_t *db) {
6569         dns_rbtdb_t *rbtdb;
6570         unsigned int count;
6571
6572         rbtdb = (dns_rbtdb_t *)db;
6573
6574         REQUIRE(VALID_RBTDB(rbtdb));
6575
6576         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6577         count = dns_rbt_nodecount(rbtdb->tree);
6578         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6579
6580         return (count);
6581 }
6582
6583 static void
6584 settask(dns_db_t *db, isc_task_t *task) {
6585         dns_rbtdb_t *rbtdb;
6586
6587         rbtdb = (dns_rbtdb_t *)db;
6588
6589         REQUIRE(VALID_RBTDB(rbtdb));
6590
6591         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6592         if (rbtdb->task != NULL)
6593                 isc_task_detach(&rbtdb->task);
6594         if (task != NULL)
6595                 isc_task_attach(task, &rbtdb->task);
6596         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6597 }
6598
6599 static isc_boolean_t
6600 ispersistent(dns_db_t *db) {
6601         UNUSED(db);
6602         return (ISC_FALSE);
6603 }
6604
6605 static isc_result_t
6606 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6607         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6608         dns_rbtnode_t *onode;
6609         isc_result_t result = ISC_R_SUCCESS;
6610
6611         REQUIRE(VALID_RBTDB(rbtdb));
6612         REQUIRE(nodep != NULL && *nodep == NULL);
6613
6614         /* Note that the access to origin_node doesn't require a DB lock */
6615         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6616         if (onode != NULL) {
6617                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6618                 new_reference(rbtdb, onode);
6619                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6620
6621                 *nodep = rbtdb->origin_node;
6622         } else {
6623                 INSIST(IS_CACHE(rbtdb));
6624                 result = ISC_R_NOTFOUND;
6625         }
6626
6627         return (result);
6628 }
6629
6630 static isc_result_t
6631 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6632                    isc_uint8_t *flags, isc_uint16_t *iterations,
6633                    unsigned char *salt, size_t *salt_length)
6634 {
6635         dns_rbtdb_t *rbtdb;
6636         isc_result_t result = ISC_R_NOTFOUND;
6637         rbtdb_version_t *rbtversion = version;
6638
6639         rbtdb = (dns_rbtdb_t *)db;
6640
6641         REQUIRE(VALID_RBTDB(rbtdb));
6642
6643         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6644
6645         if (rbtversion == NULL)
6646                 rbtversion = rbtdb->current_version;
6647
6648         if (rbtversion->havensec3) {
6649                 if (hash != NULL)
6650                         *hash = rbtversion->hash;
6651                 if (salt != NULL && salt_length != NULL) {
6652                         REQUIRE(*salt_length >= rbtversion->salt_length);
6653                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6654                 }
6655                 if (salt_length != NULL)
6656                         *salt_length = rbtversion->salt_length;
6657                 if (iterations != NULL)
6658                         *iterations = rbtversion->iterations;
6659                 if (flags != NULL)
6660                         *flags = rbtversion->flags;
6661                 result = ISC_R_SUCCESS;
6662         }
6663         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6664
6665         return (result);
6666 }
6667
6668 static isc_result_t
6669 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6670         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6671         isc_stdtime_t oldresign;
6672         isc_result_t result = ISC_R_SUCCESS;
6673         rdatasetheader_t *header;
6674
6675         REQUIRE(VALID_RBTDB(rbtdb));
6676         REQUIRE(!IS_CACHE(rbtdb));
6677         REQUIRE(rdataset != NULL);
6678
6679         header = rdataset->private3;
6680         header--;
6681
6682         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6683                   isc_rwlocktype_write);
6684
6685         oldresign = header->resign;
6686         header->resign = resign;
6687         if (header->heap_index != 0) {
6688                 INSIST(RESIGN(header));
6689                 if (resign == 0) {
6690                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6691                                         header->heap_index);
6692                         header->heap_index = 0;
6693                 } else if (resign < oldresign)
6694                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6695                                            header->heap_index);
6696                 else
6697                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6698                                            header->heap_index);
6699         } else if (resign && header->heap_index == 0) {
6700                 header->attributes |= RDATASET_ATTR_RESIGN;
6701                 result = resign_insert(rbtdb, header->node->locknum, header);
6702         }
6703         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6704                     isc_rwlocktype_write);
6705         return (result);
6706 }
6707
6708 static isc_result_t
6709 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6710                dns_name_t *foundname)
6711 {
6712         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6713         rdatasetheader_t *header = NULL, *this;
6714         unsigned int i;
6715         isc_result_t result = ISC_R_NOTFOUND;
6716         unsigned int locknum;
6717
6718         REQUIRE(VALID_RBTDB(rbtdb));
6719
6720         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6721
6722         for (i = 0; i < rbtdb->node_lock_count; i++) {
6723                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6724                 this = isc_heap_element(rbtdb->heaps[i], 1);
6725                 if (this == NULL) {
6726                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6727                                     isc_rwlocktype_read);
6728                         continue;
6729                 }
6730                 if (header == NULL)
6731                         header = this;
6732                 else if (isc_serial_lt(this->resign, header->resign)) {
6733                         locknum = header->node->locknum;
6734                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6735                                     isc_rwlocktype_read);
6736                         header = this;
6737                 } else
6738                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6739                                     isc_rwlocktype_read);
6740         }
6741
6742         if (header == NULL)
6743                 goto unlock;
6744
6745         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6746
6747         if (foundname != NULL)
6748                 dns_rbt_fullnamefromnode(header->node, foundname);
6749
6750         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6751                     isc_rwlocktype_read);
6752
6753         result = ISC_R_SUCCESS;
6754
6755  unlock:
6756         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6757
6758         return (result);
6759 }
6760
6761 static void
6762 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6763 {
6764         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6765         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6766         dns_rbtnode_t *node;
6767         rdatasetheader_t *header;
6768
6769         REQUIRE(VALID_RBTDB(rbtdb));
6770         REQUIRE(rdataset != NULL);
6771         REQUIRE(rbtdb->future_version == rbtversion);
6772         REQUIRE(rbtversion->writer);
6773
6774         node = rdataset->private2;
6775         header = rdataset->private3;
6776         header--;
6777
6778         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6779         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6780                   isc_rwlocktype_write);
6781         /*
6782          * Delete from heap and save to re-signed list so that it can
6783          * be restored if we backout of this change.
6784          */
6785         new_reference(rbtdb, node);
6786         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6787         header->heap_index = 0;
6788         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6789
6790         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6791                     isc_rwlocktype_write);
6792         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6793 }
6794
6795 static dns_stats_t *
6796 getrrsetstats(dns_db_t *db) {
6797         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6798
6799         REQUIRE(VALID_RBTDB(rbtdb));
6800         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6801
6802         return (rbtdb->rrsetstats);
6803 }
6804
6805 static dns_dbmethods_t zone_methods = {
6806         attach,
6807         detach,
6808         beginload,
6809         endload,
6810         dump,
6811         currentversion,
6812         newversion,
6813         attachversion,
6814         closeversion,
6815         findnode,
6816         zone_find,
6817         zone_findzonecut,
6818         attachnode,
6819         detachnode,
6820         expirenode,
6821         printnode,
6822         createiterator,
6823         zone_findrdataset,
6824         allrdatasets,
6825         addrdataset,
6826         subtractrdataset,
6827         deleterdataset,
6828         issecure,
6829         nodecount,
6830         ispersistent,
6831         overmem,
6832         settask,
6833         getoriginnode,
6834         NULL,
6835         getnsec3parameters,
6836         findnsec3node,
6837         setsigningtime,
6838         getsigningtime,
6839         resigned,
6840         isdnssec,
6841         NULL
6842 };
6843
6844 static dns_dbmethods_t cache_methods = {
6845         attach,
6846         detach,
6847         beginload,
6848         endload,
6849         dump,
6850         currentversion,
6851         newversion,
6852         attachversion,
6853         closeversion,
6854         findnode,
6855         cache_find,
6856         cache_findzonecut,
6857         attachnode,
6858         detachnode,
6859         expirenode,
6860         printnode,
6861         createiterator,
6862         cache_findrdataset,
6863         allrdatasets,
6864         addrdataset,
6865         subtractrdataset,
6866         deleterdataset,
6867         issecure,
6868         nodecount,
6869         ispersistent,
6870         overmem,
6871         settask,
6872         getoriginnode,
6873         NULL,
6874         NULL,
6875         NULL,
6876         NULL,
6877         NULL,
6878         NULL,
6879         isdnssec,
6880         getrrsetstats
6881 };
6882
6883 isc_result_t
6884 #ifdef DNS_RBTDB_VERSION64
6885 dns_rbtdb64_create
6886 #else
6887 dns_rbtdb_create
6888 #endif
6889                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6890                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6891                  void *driverarg, dns_db_t **dbp)
6892 {
6893         dns_rbtdb_t *rbtdb;
6894         isc_result_t result;
6895         int i;
6896         dns_name_t name;
6897         isc_boolean_t (*sooner)(void *, void *);
6898
6899         /* Keep the compiler happy. */
6900         UNUSED(argc);
6901         UNUSED(argv);
6902         UNUSED(driverarg);
6903
6904         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6905         if (rbtdb == NULL)
6906                 return (ISC_R_NOMEMORY);
6907
6908         memset(rbtdb, '\0', sizeof(*rbtdb));
6909         dns_name_init(&rbtdb->common.origin, NULL);
6910         rbtdb->common.attributes = 0;
6911         if (type == dns_dbtype_cache) {
6912                 rbtdb->common.methods = &cache_methods;
6913                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6914         } else if (type == dns_dbtype_stub) {
6915                 rbtdb->common.methods = &zone_methods;
6916                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6917         } else
6918                 rbtdb->common.methods = &zone_methods;
6919         rbtdb->common.rdclass = rdclass;
6920         rbtdb->common.mctx = NULL;
6921
6922         result = RBTDB_INITLOCK(&rbtdb->lock);
6923         if (result != ISC_R_SUCCESS)
6924                 goto cleanup_rbtdb;
6925
6926         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6927         if (result != ISC_R_SUCCESS)
6928                 goto cleanup_lock;
6929
6930         /*
6931          * Initialize node_lock_count in a generic way to support future
6932          * extension which allows the user to specify this value on creation.
6933          * Note that when specified for a cache DB it must be larger than 1
6934          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6935          */
6936         if (rbtdb->node_lock_count == 0) {
6937                 if (IS_CACHE(rbtdb))
6938                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6939                 else
6940                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6941         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6942                 result = ISC_R_RANGE;
6943                 goto cleanup_tree_lock;
6944         }
6945         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6946         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6947                                         sizeof(rbtdb_nodelock_t));
6948         if (rbtdb->node_locks == NULL) {
6949                 result = ISC_R_NOMEMORY;
6950                 goto cleanup_tree_lock;
6951         }
6952
6953         rbtdb->rrsetstats = NULL;
6954         if (IS_CACHE(rbtdb)) {
6955                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6956                 if (result != ISC_R_SUCCESS)
6957                         goto cleanup_node_locks;
6958                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6959                                                sizeof(rdatasetheaderlist_t));
6960                 if (rbtdb->rdatasets == NULL) {
6961                         result = ISC_R_NOMEMORY;
6962                         goto cleanup_rrsetstats;
6963                 }
6964                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6965                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
6966         } else
6967                 rbtdb->rdatasets = NULL;
6968
6969         /*
6970          * Create the heaps.
6971          */
6972         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6973                                    sizeof(isc_heap_t *));
6974         if (rbtdb->heaps == NULL) {
6975                 result = ISC_R_NOMEMORY;
6976                 goto cleanup_rdatasets;
6977         }
6978         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6979                 rbtdb->heaps[i] = NULL;
6980         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6981         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6982                 result = isc_heap_create(mctx, sooner, set_index, 0,
6983                                          &rbtdb->heaps[i]);
6984                 if (result != ISC_R_SUCCESS)
6985                         goto cleanup_heaps;
6986         }
6987
6988         /*
6989          * Create deadnode lists.
6990          */
6991         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6992                                        sizeof(rbtnodelist_t));
6993         if (rbtdb->deadnodes == NULL) {
6994                 result = ISC_R_NOMEMORY;
6995                 goto cleanup_heaps;
6996         }
6997         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6998                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
6999
7000         rbtdb->active = rbtdb->node_lock_count;
7001
7002         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7003                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7004                 if (result == ISC_R_SUCCESS) {
7005                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7006                         if (result != ISC_R_SUCCESS)
7007                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7008                 }
7009                 if (result != ISC_R_SUCCESS) {
7010                         while (i-- > 0) {
7011                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7012                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7013                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7014                         }
7015                         goto cleanup_deadnodes;
7016                 }
7017                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7018         }
7019
7020         /*
7021          * Attach to the mctx.  The database will persist so long as there
7022          * are references to it, and attaching to the mctx ensures that our
7023          * mctx won't disappear out from under us.
7024          */
7025         isc_mem_attach(mctx, &rbtdb->common.mctx);
7026
7027         /*
7028          * Must be initialized before free_rbtdb() is called.
7029          */
7030         isc_ondestroy_init(&rbtdb->common.ondest);
7031
7032         /*
7033          * Make a copy of the origin name.
7034          */
7035         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7036         if (result != ISC_R_SUCCESS) {
7037                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7038                 return (result);
7039         }
7040
7041         /*
7042          * Make the Red-Black Trees.
7043          */
7044         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7045         if (result != ISC_R_SUCCESS) {
7046                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7047                 return (result);
7048         }
7049
7050         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7051         if (result != ISC_R_SUCCESS) {
7052                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7053                 return (result);
7054         }
7055
7056         /*
7057          * In order to set the node callback bit correctly in zone databases,
7058          * we need to know if the node has the origin name of the zone.
7059          * In loading_addrdataset() we could simply compare the new name
7060          * to the origin name, but this is expensive.  Also, we don't know the
7061          * node name in addrdataset(), so we need another way of knowing the
7062          * zone's top.
7063          *
7064          * We now explicitly create a node for the zone's origin, and then
7065          * we simply remember the node's address.  This is safe, because
7066          * the top-of-zone node can never be deleted, nor can its address
7067          * change.
7068          */
7069         if (!IS_CACHE(rbtdb)) {
7070                 rbtdb->origin_node = NULL;
7071                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7072                                          &rbtdb->origin_node);
7073                 if (result != ISC_R_SUCCESS) {
7074                         INSIST(result != ISC_R_EXISTS);
7075                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7076                         return (result);
7077                 }
7078                 rbtdb->origin_node->nsec3 = 0;
7079                 /*
7080                  * We need to give the origin node the right locknum.
7081                  */
7082                 dns_name_init(&name, NULL);
7083                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7084 #ifdef DNS_RBT_USEHASH
7085                 rbtdb->origin_node->locknum =
7086                         rbtdb->origin_node->hashval %
7087                         rbtdb->node_lock_count;
7088 #else
7089                 rbtdb->origin_node->locknum =
7090                         dns_name_hash(&name, ISC_TRUE) %
7091                         rbtdb->node_lock_count;
7092 #endif
7093         }
7094
7095         /*
7096          * Misc. Initialization.
7097          */
7098         result = isc_refcount_init(&rbtdb->references, 1);
7099         if (result != ISC_R_SUCCESS) {
7100                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7101                 return (result);
7102         }
7103         rbtdb->attributes = 0;
7104         rbtdb->overmem = ISC_FALSE;
7105         rbtdb->task = NULL;
7106
7107         /*
7108          * Version Initialization.
7109          */
7110         rbtdb->current_serial = 1;
7111         rbtdb->least_serial = 1;
7112         rbtdb->next_serial = 2;
7113         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7114         if (rbtdb->current_version == NULL) {
7115                 isc_refcount_decrement(&rbtdb->references, NULL);
7116                 isc_refcount_destroy(&rbtdb->references);
7117                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7118                 return (ISC_R_NOMEMORY);
7119         }
7120         rbtdb->current_version->secure = dns_db_insecure;
7121         rbtdb->current_version->havensec3 = ISC_FALSE;
7122         rbtdb->current_version->flags = 0;
7123         rbtdb->current_version->iterations = 0;
7124         rbtdb->current_version->hash = 0;
7125         rbtdb->current_version->salt_length = 0;
7126         memset(rbtdb->current_version->salt, 0,
7127                sizeof(rbtdb->current_version->salt));
7128         rbtdb->future_version = NULL;
7129         ISC_LIST_INIT(rbtdb->open_versions);
7130         /*
7131          * Keep the current version in the open list so that list operation
7132          * won't happen in normal lookup operations.
7133          */
7134         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7135
7136         rbtdb->common.magic = DNS_DB_MAGIC;
7137         rbtdb->common.impmagic = RBTDB_MAGIC;
7138
7139         *dbp = (dns_db_t *)rbtdb;
7140
7141         return (ISC_R_SUCCESS);
7142
7143  cleanup_deadnodes:
7144         isc_mem_put(mctx, rbtdb->deadnodes,
7145                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7146
7147  cleanup_heaps:
7148         if (rbtdb->heaps != NULL) {
7149                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7150                         if (rbtdb->heaps[i] != NULL)
7151                                 isc_heap_destroy(&rbtdb->heaps[i]);
7152                 isc_mem_put(mctx, rbtdb->heaps,
7153                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7154         }
7155
7156  cleanup_rdatasets:
7157         if (rbtdb->rdatasets != NULL)
7158                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7159                             sizeof(rdatasetheaderlist_t));
7160  cleanup_rrsetstats:
7161         if (rbtdb->rrsetstats != NULL)
7162                 dns_stats_detach(&rbtdb->rrsetstats);
7163
7164  cleanup_node_locks:
7165         isc_mem_put(mctx, rbtdb->node_locks,
7166                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7167
7168  cleanup_tree_lock:
7169         isc_rwlock_destroy(&rbtdb->tree_lock);
7170
7171  cleanup_lock:
7172         RBTDB_DESTROYLOCK(&rbtdb->lock);
7173
7174  cleanup_rbtdb:
7175         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7176         return (result);
7177 }
7178
7179
7180 /*
7181  * Slabbed Rdataset Methods
7182  */
7183
7184 static void
7185 rdataset_disassociate(dns_rdataset_t *rdataset) {
7186         dns_db_t *db = rdataset->private1;
7187         dns_dbnode_t *node = rdataset->private2;
7188
7189         detachnode(db, &node);
7190 }
7191
7192 static isc_result_t
7193 rdataset_first(dns_rdataset_t *rdataset) {
7194         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7195         unsigned int count;
7196
7197         count = raw[0] * 256 + raw[1];
7198         if (count == 0) {
7199                 rdataset->private5 = NULL;
7200                 return (ISC_R_NOMORE);
7201         }
7202
7203 #if DNS_RDATASET_FIXED
7204         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7205                 raw += 2 + (4 * count);
7206         else
7207 #endif
7208                 raw += 2;
7209
7210         /*
7211          * The privateuint4 field is the number of rdata beyond the
7212          * cursor position, so we decrement the total count by one
7213          * before storing it.
7214          *
7215          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7216          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7217          * to the first entry in the offset table.
7218          */
7219         count--;
7220         rdataset->privateuint4 = count;
7221         rdataset->private5 = raw;
7222
7223         return (ISC_R_SUCCESS);
7224 }
7225
7226 static isc_result_t
7227 rdataset_next(dns_rdataset_t *rdataset) {
7228         unsigned int count;
7229         unsigned int length;
7230         unsigned char *raw;     /* RDATASLAB */
7231
7232         count = rdataset->privateuint4;
7233         if (count == 0)
7234                 return (ISC_R_NOMORE);
7235         count--;
7236         rdataset->privateuint4 = count;
7237
7238         /*
7239          * Skip forward one record (length + 4) or one offset (4).
7240          */
7241         raw = rdataset->private5;
7242 #if DNS_RDATASET_FIXED
7243         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7244 #endif
7245                 length = raw[0] * 256 + raw[1];
7246                 raw += length;
7247 #if DNS_RDATASET_FIXED
7248         }
7249         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7250 #else
7251         rdataset->private5 = raw + 2;           /* length(2) */
7252 #endif
7253
7254         return (ISC_R_SUCCESS);
7255 }
7256
7257 static void
7258 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7259         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7260 #if DNS_RDATASET_FIXED
7261         unsigned int offset;
7262 #endif
7263         unsigned int length;
7264         isc_region_t r;
7265         unsigned int flags = 0;
7266
7267         REQUIRE(raw != NULL);
7268
7269         /*
7270          * Find the start of the record if not already in private5
7271          * then skip the length and order fields.
7272          */
7273 #if DNS_RDATASET_FIXED
7274         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7275                 offset = (raw[0] << 24) + (raw[1] << 16) +
7276                          (raw[2] << 8) + raw[3];
7277                 raw = rdataset->private3;
7278                 raw += offset;
7279         }
7280 #endif
7281         length = raw[0] * 256 + raw[1];
7282 #if DNS_RDATASET_FIXED
7283         raw += 4;
7284 #else
7285         raw += 2;
7286 #endif
7287         if (rdataset->type == dns_rdatatype_rrsig) {
7288                 if (*raw & DNS_RDATASLAB_OFFLINE)
7289                         flags |= DNS_RDATA_OFFLINE;
7290                 length--;
7291                 raw++;
7292         }
7293         r.length = length;
7294         r.base = raw;
7295         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7296         rdata->flags |= flags;
7297 }
7298
7299 static void
7300 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7301         dns_db_t *db = source->private1;
7302         dns_dbnode_t *node = source->private2;
7303         dns_dbnode_t *cloned_node = NULL;
7304
7305         attachnode(db, node, &cloned_node);
7306         *target = *source;
7307
7308         /*
7309          * Reset iterator state.
7310          */
7311         target->privateuint4 = 0;
7312         target->private5 = NULL;
7313 }
7314
7315 static unsigned int
7316 rdataset_count(dns_rdataset_t *rdataset) {
7317         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7318         unsigned int count;
7319
7320         count = raw[0] * 256 + raw[1];
7321
7322         return (count);
7323 }
7324
7325 static isc_result_t
7326 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7327                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7328 {
7329         dns_db_t *db = rdataset->private1;
7330         dns_dbnode_t *node = rdataset->private2;
7331         dns_dbnode_t *cloned_node;
7332         struct noqname *noqname = rdataset->private6;
7333
7334         cloned_node = NULL;
7335         attachnode(db, node, &cloned_node);
7336         nsec->methods = &rdataset_methods;
7337         nsec->rdclass = db->rdclass;
7338         nsec->type = noqname->type;
7339         nsec->covers = 0;
7340         nsec->ttl = rdataset->ttl;
7341         nsec->trust = rdataset->trust;
7342         nsec->private1 = rdataset->private1;
7343         nsec->private2 = rdataset->private2;
7344         nsec->private3 = noqname->neg;
7345         nsec->privateuint4 = 0;
7346         nsec->private5 = NULL;
7347         nsec->private6 = NULL;
7348         nsec->private7 = NULL;
7349
7350         cloned_node = NULL;
7351         attachnode(db, node, &cloned_node);
7352         nsecsig->methods = &rdataset_methods;
7353         nsecsig->rdclass = db->rdclass;
7354         nsecsig->type = dns_rdatatype_rrsig;
7355         nsecsig->covers = noqname->type;
7356         nsecsig->ttl = rdataset->ttl;
7357         nsecsig->trust = rdataset->trust;
7358         nsecsig->private1 = rdataset->private1;
7359         nsecsig->private2 = rdataset->private2;
7360         nsecsig->private3 = noqname->negsig;
7361         nsecsig->privateuint4 = 0;
7362         nsecsig->private5 = NULL;
7363         nsec->private6 = NULL;
7364         nsec->private7 = NULL;
7365
7366         dns_name_clone(&noqname->name, name);
7367
7368         return (ISC_R_SUCCESS);
7369 }
7370
7371 static isc_result_t
7372 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7373                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7374 {
7375         dns_db_t *db = rdataset->private1;
7376         dns_dbnode_t *node = rdataset->private2;
7377         dns_dbnode_t *cloned_node;
7378         struct noqname *closest = rdataset->private7;
7379
7380         cloned_node = NULL;
7381         attachnode(db, node, &cloned_node);
7382         nsec->methods = &rdataset_methods;
7383         nsec->rdclass = db->rdclass;
7384         nsec->type = closest->type;
7385         nsec->covers = 0;
7386         nsec->ttl = rdataset->ttl;
7387         nsec->trust = rdataset->trust;
7388         nsec->private1 = rdataset->private1;
7389         nsec->private2 = rdataset->private2;
7390         nsec->private3 = closest->neg;
7391         nsec->privateuint4 = 0;
7392         nsec->private5 = NULL;
7393         nsec->private6 = NULL;
7394         nsec->private7 = NULL;
7395
7396         cloned_node = NULL;
7397         attachnode(db, node, &cloned_node);
7398         nsecsig->methods = &rdataset_methods;
7399         nsecsig->rdclass = db->rdclass;
7400         nsecsig->type = dns_rdatatype_rrsig;
7401         nsecsig->covers = closest->type;
7402         nsecsig->ttl = rdataset->ttl;
7403         nsecsig->trust = rdataset->trust;
7404         nsecsig->private1 = rdataset->private1;
7405         nsecsig->private2 = rdataset->private2;
7406         nsecsig->private3 = closest->negsig;
7407         nsecsig->privateuint4 = 0;
7408         nsecsig->private5 = NULL;
7409         nsec->private6 = NULL;
7410         nsec->private7 = NULL;
7411
7412         dns_name_clone(&closest->name, name);
7413
7414         return (ISC_R_SUCCESS);
7415 }
7416
7417 static void
7418 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7419         dns_rbtdb_t *rbtdb = rdataset->private1;
7420         dns_rbtnode_t *rbtnode = rdataset->private2;
7421         rdatasetheader_t *header = rdataset->private3;
7422
7423         header--;
7424         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7425                   isc_rwlocktype_write);
7426         header->trust = rdataset->trust = trust;
7427         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7428                   isc_rwlocktype_write);
7429 }
7430
7431 static void
7432 rdataset_expire(dns_rdataset_t *rdataset) {
7433         dns_rbtdb_t *rbtdb = rdataset->private1;
7434         dns_rbtnode_t *rbtnode = rdataset->private2;
7435         rdatasetheader_t *header = rdataset->private3;
7436
7437         header--;
7438         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7439                   isc_rwlocktype_write);
7440         expire_header(rbtdb, header, ISC_FALSE);
7441         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7442                   isc_rwlocktype_write);
7443 }
7444
7445 /*
7446  * Rdataset Iterator Methods
7447  */
7448
7449 static void
7450 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7451         rbtdb_rdatasetiter_t *rbtiterator;
7452
7453         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7454
7455         if (rbtiterator->common.version != NULL)
7456                 closeversion(rbtiterator->common.db,
7457                              &rbtiterator->common.version, ISC_FALSE);
7458         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7459         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7460                     sizeof(*rbtiterator));
7461
7462         *iteratorp = NULL;
7463 }
7464
7465 static isc_result_t
7466 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7467         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7468         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7469         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7470         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7471         rdatasetheader_t *header, *top_next;
7472         rbtdb_serial_t serial;
7473         isc_stdtime_t now;
7474
7475         if (IS_CACHE(rbtdb)) {
7476                 serial = 1;
7477                 now = rbtiterator->common.now;
7478         } else {
7479                 serial = rbtversion->serial;
7480                 now = 0;
7481         }
7482
7483         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7484                   isc_rwlocktype_read);
7485
7486         for (header = rbtnode->data; header != NULL; header = top_next) {
7487                 top_next = header->next;
7488                 do {
7489                         if (header->serial <= serial && !IGNORE(header)) {
7490                                 /*
7491                                  * Is this a "this rdataset doesn't exist"
7492                                  * record?  Or is it too old in the cache?
7493                                  *
7494                                  * Note: unlike everywhere else, we
7495                                  * check for now > header->rdh_ttl instead
7496                                  * of now >= header->rdh_ttl.  This allows
7497                                  * ANY and RRSIG queries for 0 TTL
7498                                  * rdatasets to work.
7499                                  */
7500                                 if (NONEXISTENT(header) ||
7501                                     (now != 0 && now > header->rdh_ttl))
7502                                         header = NULL;
7503                                 break;
7504                         } else
7505                                 header = header->down;
7506                 } while (header != NULL);
7507                 if (header != NULL)
7508                         break;
7509         }
7510
7511         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7512                     isc_rwlocktype_read);
7513
7514         rbtiterator->current = header;
7515
7516         if (header == NULL)
7517                 return (ISC_R_NOMORE);
7518
7519         return (ISC_R_SUCCESS);
7520 }
7521
7522 static isc_result_t
7523 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7524         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7525         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7526         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7527         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7528         rdatasetheader_t *header, *top_next;
7529         rbtdb_serial_t serial;
7530         isc_stdtime_t now;
7531         rbtdb_rdatatype_t type, negtype;
7532         dns_rdatatype_t rdtype, covers;
7533
7534         header = rbtiterator->current;
7535         if (header == NULL)
7536                 return (ISC_R_NOMORE);
7537
7538         if (IS_CACHE(rbtdb)) {
7539                 serial = 1;
7540                 now = rbtiterator->common.now;
7541         } else {
7542                 serial = rbtversion->serial;
7543                 now = 0;
7544         }
7545
7546         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7547                   isc_rwlocktype_read);
7548
7549         type = header->type;
7550         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7551         if (rdtype == 0) {
7552                 covers = RBTDB_RDATATYPE_EXT(header->type);
7553                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7554         } else
7555                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7556         for (header = header->next; header != NULL; header = top_next) {
7557                 top_next = header->next;
7558                 /*
7559                  * If not walking back up the down list.
7560                  */
7561                 if (header->type != type && header->type != negtype) {
7562                         do {
7563                                 if (header->serial <= serial &&
7564                                     !IGNORE(header)) {
7565                                         /*
7566                                          * Is this a "this rdataset doesn't
7567                                          * exist" record?
7568                                          *
7569                                          * Note: unlike everywhere else, we
7570                                          * check for now > header->ttl instead
7571                                          * of now >= header->ttl.  This allows
7572                                          * ANY and RRSIG queries for 0 TTL
7573                                          * rdatasets to work.
7574                                          */
7575                                         if ((header->attributes &
7576                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7577                                             (now != 0 && now > header->rdh_ttl))
7578                                                 header = NULL;
7579                                         break;
7580                                 } else
7581                                         header = header->down;
7582                         } while (header != NULL);
7583                         if (header != NULL)
7584                                 break;
7585                 }
7586         }
7587
7588         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7589                     isc_rwlocktype_read);
7590
7591         rbtiterator->current = header;
7592
7593         if (header == NULL)
7594                 return (ISC_R_NOMORE);
7595
7596         return (ISC_R_SUCCESS);
7597 }
7598
7599 static void
7600 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7601         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7602         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7603         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7604         rdatasetheader_t *header;
7605
7606         header = rbtiterator->current;
7607         REQUIRE(header != NULL);
7608
7609         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7610                   isc_rwlocktype_read);
7611
7612         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7613                       rdataset);
7614
7615         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7616                     isc_rwlocktype_read);
7617 }
7618
7619
7620 /*
7621  * Database Iterator Methods
7622  */
7623
7624 static inline void
7625 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7626         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7627         dns_rbtnode_t *node = rbtdbiter->node;
7628
7629         if (node == NULL)
7630                 return;
7631
7632         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7633         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7634 }
7635
7636 static inline void
7637 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7638         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7639         dns_rbtnode_t *node = rbtdbiter->node;
7640         nodelock_t *lock;
7641
7642         if (node == NULL)
7643                 return;
7644
7645         lock = &rbtdb->node_locks[node->locknum].lock;
7646         NODE_LOCK(lock, isc_rwlocktype_read);
7647         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7648                             rbtdbiter->tree_locked, ISC_FALSE);
7649         NODE_UNLOCK(lock, isc_rwlocktype_read);
7650
7651         rbtdbiter->node = NULL;
7652 }
7653
7654 static void
7655 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7656         dns_rbtnode_t *node;
7657         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7658         isc_boolean_t was_read_locked = ISC_FALSE;
7659         nodelock_t *lock;
7660         int i;
7661
7662         if (rbtdbiter->delete != 0) {
7663                 /*
7664                  * Note that "%d node of %d in tree" can report things like
7665                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7666                  * That some nodes appear on the deletions list more than
7667                  * once.  Only the last occurence will actually be deleted.
7668                  */
7669                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7670                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7671                               "flush_deletions: %d nodes of %d in tree",
7672                               rbtdbiter->delete,
7673                               dns_rbt_nodecount(rbtdb->tree));
7674
7675                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7676                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7677                         was_read_locked = ISC_TRUE;
7678                 }
7679                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7680                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7681
7682                 for (i = 0; i < rbtdbiter->delete; i++) {
7683                         node = rbtdbiter->deletions[i];
7684                         lock = &rbtdb->node_locks[node->locknum].lock;
7685
7686                         NODE_LOCK(lock, isc_rwlocktype_read);
7687                         decrement_reference(rbtdb, node, 0,
7688                                             isc_rwlocktype_read,
7689                                             rbtdbiter->tree_locked, ISC_FALSE);
7690                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7691                 }
7692
7693                 rbtdbiter->delete = 0;
7694
7695                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7696                 if (was_read_locked) {
7697                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7698                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7699
7700                 } else {
7701                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7702                 }
7703         }
7704 }
7705
7706 static inline void
7707 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7708         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7709
7710         REQUIRE(rbtdbiter->paused);
7711         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7712
7713         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7714         rbtdbiter->tree_locked = isc_rwlocktype_read;
7715
7716         rbtdbiter->paused = ISC_FALSE;
7717 }
7718
7719 static void
7720 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7721         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7722         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7723         dns_db_t *db = NULL;
7724
7725         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7726                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7727                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7728         } else
7729                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7730
7731         dereference_iter_node(rbtdbiter);
7732
7733         flush_deletions(rbtdbiter);
7734
7735         dns_db_attach(rbtdbiter->common.db, &db);
7736         dns_db_detach(&rbtdbiter->common.db);
7737
7738         dns_rbtnodechain_reset(&rbtdbiter->chain);
7739         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7740         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7741         dns_db_detach(&db);
7742
7743         *iteratorp = NULL;
7744 }
7745
7746 static isc_result_t
7747 dbiterator_first(dns_dbiterator_t *iterator) {
7748         isc_result_t result;
7749         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7750         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7751         dns_name_t *name, *origin;
7752
7753         if (rbtdbiter->result != ISC_R_SUCCESS &&
7754             rbtdbiter->result != ISC_R_NOMORE)
7755                 return (rbtdbiter->result);
7756
7757         if (rbtdbiter->paused)
7758                 resume_iteration(rbtdbiter);
7759
7760         dereference_iter_node(rbtdbiter);
7761
7762         name = dns_fixedname_name(&rbtdbiter->name);
7763         origin = dns_fixedname_name(&rbtdbiter->origin);
7764         dns_rbtnodechain_reset(&rbtdbiter->chain);
7765         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7766
7767         if (rbtdbiter->nsec3only) {
7768                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7769                 result = dns_rbtnodechain_first(rbtdbiter->current,
7770                                                 rbtdb->nsec3, name, origin);
7771         } else {
7772                 rbtdbiter->current = &rbtdbiter->chain;
7773                 result = dns_rbtnodechain_first(rbtdbiter->current,
7774                                                 rbtdb->tree, name, origin);
7775                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7776                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7777                         result = dns_rbtnodechain_first(rbtdbiter->current,
7778                                                         rbtdb->nsec3, name,
7779                                                         origin);
7780                 }
7781         }
7782         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7783                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7784                                                   NULL, &rbtdbiter->node);
7785                 if (result == ISC_R_SUCCESS) {
7786                         rbtdbiter->new_origin = ISC_TRUE;
7787                         reference_iter_node(rbtdbiter);
7788                 }
7789         } else {
7790                 INSIST(result == ISC_R_NOTFOUND);
7791                 result = ISC_R_NOMORE; /* The tree is empty. */
7792         }
7793
7794         rbtdbiter->result = result;
7795
7796         return (result);
7797 }
7798
7799 static isc_result_t
7800 dbiterator_last(dns_dbiterator_t *iterator) {
7801         isc_result_t result;
7802         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7803         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7804         dns_name_t *name, *origin;
7805
7806         if (rbtdbiter->result != ISC_R_SUCCESS &&
7807             rbtdbiter->result != ISC_R_NOMORE)
7808                 return (rbtdbiter->result);
7809
7810         if (rbtdbiter->paused)
7811                 resume_iteration(rbtdbiter);
7812
7813         dereference_iter_node(rbtdbiter);
7814
7815         name = dns_fixedname_name(&rbtdbiter->name);
7816         origin = dns_fixedname_name(&rbtdbiter->origin);
7817         dns_rbtnodechain_reset(&rbtdbiter->chain);
7818         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7819
7820         result = ISC_R_NOTFOUND;
7821         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7822                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7823                 result = dns_rbtnodechain_last(rbtdbiter->current,
7824                                                rbtdb->nsec3, name, origin);
7825         }
7826         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7827                 rbtdbiter->current = &rbtdbiter->chain;
7828                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7829                                                name, origin);
7830         }
7831         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7832                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7833                                                   NULL, &rbtdbiter->node);
7834                 if (result == ISC_R_SUCCESS) {
7835                         rbtdbiter->new_origin = ISC_TRUE;
7836                         reference_iter_node(rbtdbiter);
7837                 }
7838         } else {
7839                 INSIST(result == ISC_R_NOTFOUND);
7840                 result = ISC_R_NOMORE; /* The tree is empty. */
7841         }
7842
7843         rbtdbiter->result = result;
7844
7845         return (result);
7846 }
7847
7848 static isc_result_t
7849 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7850         isc_result_t result;
7851         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7852         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7853         dns_name_t *iname, *origin;
7854
7855         if (rbtdbiter->result != ISC_R_SUCCESS &&
7856             rbtdbiter->result != ISC_R_NOTFOUND &&
7857             rbtdbiter->result != ISC_R_NOMORE)
7858                 return (rbtdbiter->result);
7859
7860         if (rbtdbiter->paused)
7861                 resume_iteration(rbtdbiter);
7862
7863         dereference_iter_node(rbtdbiter);
7864
7865         iname = dns_fixedname_name(&rbtdbiter->name);
7866         origin = dns_fixedname_name(&rbtdbiter->origin);
7867         dns_rbtnodechain_reset(&rbtdbiter->chain);
7868         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7869
7870         if (rbtdbiter->nsec3only) {
7871                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7872                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7873                                           &rbtdbiter->node,
7874                                           rbtdbiter->current,
7875                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7876         } else if (rbtdbiter->nonsec3) {
7877                 rbtdbiter->current = &rbtdbiter->chain;
7878                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7879                                           &rbtdbiter->node,
7880                                           rbtdbiter->current,
7881                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7882         } else {
7883                 /*
7884                  * Stay on main chain if not found on either chain.
7885                  */
7886                 rbtdbiter->current = &rbtdbiter->chain;
7887                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7888                                           &rbtdbiter->node,
7889                                           rbtdbiter->current,
7890                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7891                 if (result == DNS_R_PARTIALMATCH) {
7892                         dns_rbtnode_t *node = NULL;
7893                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7894                                                   &node, &rbtdbiter->nsec3chain,
7895                                                   DNS_RBTFIND_EMPTYDATA,
7896                                                   NULL, NULL);
7897                         if (result == ISC_R_SUCCESS) {
7898                                 rbtdbiter->node = node;
7899                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7900                         }
7901                 }
7902         }
7903
7904 #if 1
7905         if (result == ISC_R_SUCCESS) {
7906                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7907                                                   origin, NULL);
7908                 if (result == ISC_R_SUCCESS) {
7909                         rbtdbiter->new_origin = ISC_TRUE;
7910                         reference_iter_node(rbtdbiter);
7911                 }
7912         } else if (result == DNS_R_PARTIALMATCH) {
7913                 result = ISC_R_NOTFOUND;
7914                 rbtdbiter->node = NULL;
7915         }
7916
7917         rbtdbiter->result = result;
7918 #else
7919         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7920                 isc_result_t tresult;
7921                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7922                                                    origin, NULL);
7923                 if (tresult == ISC_R_SUCCESS) {
7924                         rbtdbiter->new_origin = ISC_TRUE;
7925                         reference_iter_node(rbtdbiter);
7926                 } else {
7927                         result = tresult;
7928                         rbtdbiter->node = NULL;
7929                 }
7930         } else
7931                 rbtdbiter->node = NULL;
7932
7933         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7934                             ISC_R_SUCCESS : result;
7935 #endif
7936
7937         return (result);
7938 }
7939
7940 static isc_result_t
7941 dbiterator_prev(dns_dbiterator_t *iterator) {
7942         isc_result_t result;
7943         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7944         dns_name_t *name, *origin;
7945         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7946
7947         REQUIRE(rbtdbiter->node != NULL);
7948
7949         if (rbtdbiter->result != ISC_R_SUCCESS)
7950                 return (rbtdbiter->result);
7951
7952         if (rbtdbiter->paused)
7953                 resume_iteration(rbtdbiter);
7954
7955         name = dns_fixedname_name(&rbtdbiter->name);
7956         origin = dns_fixedname_name(&rbtdbiter->origin);
7957         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7958         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7959             !rbtdbiter->nonsec3 &&
7960             &rbtdbiter->nsec3chain == rbtdbiter->current) {
7961                 rbtdbiter->current = &rbtdbiter->chain;
7962                 dns_rbtnodechain_reset(rbtdbiter->current);
7963                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7964                                                name, origin);
7965                 if (result == ISC_R_NOTFOUND)
7966                         result = ISC_R_NOMORE;
7967         }
7968
7969         dereference_iter_node(rbtdbiter);
7970
7971         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7972                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7973                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7974                                                   NULL, &rbtdbiter->node);
7975         }
7976
7977         if (result == ISC_R_SUCCESS)
7978                 reference_iter_node(rbtdbiter);
7979
7980         rbtdbiter->result = result;
7981
7982         return (result);
7983 }
7984
7985 static isc_result_t
7986 dbiterator_next(dns_dbiterator_t *iterator) {
7987         isc_result_t result;
7988         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7989         dns_name_t *name, *origin;
7990         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7991
7992         REQUIRE(rbtdbiter->node != NULL);
7993
7994         if (rbtdbiter->result != ISC_R_SUCCESS)
7995                 return (rbtdbiter->result);
7996
7997         if (rbtdbiter->paused)
7998                 resume_iteration(rbtdbiter);
7999
8000         name = dns_fixedname_name(&rbtdbiter->name);
8001         origin = dns_fixedname_name(&rbtdbiter->origin);
8002         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8003         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8004             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8005                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8006                 dns_rbtnodechain_reset(rbtdbiter->current);
8007                 result = dns_rbtnodechain_first(rbtdbiter->current,
8008                                                 rbtdb->nsec3, name, origin);
8009                 if (result == ISC_R_NOTFOUND)
8010                         result = ISC_R_NOMORE;
8011         }
8012
8013         dereference_iter_node(rbtdbiter);
8014
8015         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8016                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8017                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8018                                                   NULL, &rbtdbiter->node);
8019         }
8020         if (result == ISC_R_SUCCESS)
8021                 reference_iter_node(rbtdbiter);
8022
8023         rbtdbiter->result = result;
8024
8025         return (result);
8026 }
8027
8028 static isc_result_t
8029 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8030                    dns_name_t *name)
8031 {
8032         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8033         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8034         dns_rbtnode_t *node = rbtdbiter->node;
8035         isc_result_t result;
8036         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8037         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8038
8039         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8040         REQUIRE(rbtdbiter->node != NULL);
8041
8042         if (rbtdbiter->paused)
8043                 resume_iteration(rbtdbiter);
8044
8045         if (name != NULL) {
8046                 if (rbtdbiter->common.relative_names)
8047                         origin = NULL;
8048                 result = dns_name_concatenate(nodename, origin, name, NULL);
8049                 if (result != ISC_R_SUCCESS)
8050                         return (result);
8051                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8052                         result = DNS_R_NEWORIGIN;
8053         } else
8054                 result = ISC_R_SUCCESS;
8055
8056         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8057         new_reference(rbtdb, node);
8058         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8059
8060         *nodep = rbtdbiter->node;
8061
8062         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8063                 isc_result_t expire_result;
8064
8065                 /*
8066                  * If the deletion array is full, flush it before trying
8067                  * to expire the current node.  The current node can't
8068                  * fully deleted while the iteration cursor is still on it.
8069                  */
8070                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8071                         flush_deletions(rbtdbiter);
8072
8073                 expire_result = expirenode(iterator->db, *nodep, 0);
8074
8075                 /*
8076                  * expirenode() currently always returns success.
8077                  */
8078                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8079                         unsigned int refs;
8080
8081                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8082                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8083                         dns_rbtnode_refincrement(node, &refs);
8084                         INSIST(refs != 0);
8085                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8086                 }
8087         }
8088
8089         return (result);
8090 }
8091
8092 static isc_result_t
8093 dbiterator_pause(dns_dbiterator_t *iterator) {
8094         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8095         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8096
8097         if (rbtdbiter->result != ISC_R_SUCCESS &&
8098             rbtdbiter->result != ISC_R_NOMORE)
8099                 return (rbtdbiter->result);
8100
8101         if (rbtdbiter->paused)
8102                 return (ISC_R_SUCCESS);
8103
8104         rbtdbiter->paused = ISC_TRUE;
8105
8106         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8107                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8108                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8109                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8110         }
8111
8112         flush_deletions(rbtdbiter);
8113
8114         return (ISC_R_SUCCESS);
8115 }
8116
8117 static isc_result_t
8118 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8119         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8120         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8121
8122         if (rbtdbiter->result != ISC_R_SUCCESS)
8123                 return (rbtdbiter->result);
8124
8125         return (dns_name_copy(origin, name, NULL));
8126 }
8127
8128 /*%
8129  * Additional cache routines.
8130  */
8131 static isc_result_t
8132 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8133                        dns_rdatatype_t qtype, dns_acache_t *acache,
8134                        dns_zone_t **zonep, dns_db_t **dbp,
8135                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8136                        dns_name_t *fname, dns_message_t *msg,
8137                        isc_stdtime_t now)
8138 {
8139         dns_rbtdb_t *rbtdb = rdataset->private1;
8140         dns_rbtnode_t *rbtnode = rdataset->private2;
8141         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8142         unsigned int current_count = rdataset->privateuint4;
8143         unsigned int count;
8144         rdatasetheader_t *header;
8145         nodelock_t *nodelock;
8146         unsigned int total_count;
8147         acachectl_t *acarray;
8148         dns_acacheentry_t *entry;
8149         isc_result_t result;
8150
8151         UNUSED(qtype); /* we do not use this value at least for now */
8152         UNUSED(acache);
8153
8154         header = (struct rdatasetheader *)(raw - sizeof(*header));
8155
8156         total_count = raw[0] * 256 + raw[1];
8157         INSIST(total_count > current_count);
8158         count = total_count - current_count - 1;
8159
8160         acarray = NULL;
8161
8162         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8163         NODE_LOCK(nodelock, isc_rwlocktype_read);
8164
8165         switch (type) {
8166         case dns_rdatasetadditional_fromauth:
8167                 acarray = header->additional_auth;
8168                 break;
8169         case dns_rdatasetadditional_fromcache:
8170                 acarray = NULL;
8171                 break;
8172         case dns_rdatasetadditional_fromglue:
8173                 acarray = header->additional_glue;
8174                 break;
8175         default:
8176                 INSIST(0);
8177         }
8178
8179         if (acarray == NULL) {
8180                 if (type != dns_rdatasetadditional_fromcache)
8181                         dns_acache_countquerymiss(acache);
8182                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8183                 return (ISC_R_NOTFOUND);
8184         }
8185
8186         if (acarray[count].entry == NULL) {
8187                 dns_acache_countquerymiss(acache);
8188                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8189                 return (ISC_R_NOTFOUND);
8190         }
8191
8192         entry = NULL;
8193         dns_acache_attachentry(acarray[count].entry, &entry);
8194
8195         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8196
8197         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8198                                      nodep, fname, msg, now);
8199
8200         dns_acache_detachentry(&entry);
8201
8202         return (result);
8203 }
8204
8205 static void
8206 acache_callback(dns_acacheentry_t *entry, void **arg) {
8207         dns_rbtdb_t *rbtdb;
8208         dns_rbtnode_t *rbtnode;
8209         nodelock_t *nodelock;
8210         acachectl_t *acarray = NULL;
8211         acache_cbarg_t *cbarg;
8212         unsigned int count;
8213
8214         REQUIRE(arg != NULL);
8215         cbarg = *arg;
8216
8217         /*
8218          * The caller must hold the entry lock.
8219          */
8220
8221         rbtdb = (dns_rbtdb_t *)cbarg->db;
8222         rbtnode = (dns_rbtnode_t *)cbarg->node;
8223
8224         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8225         NODE_LOCK(nodelock, isc_rwlocktype_write);
8226
8227         switch (cbarg->type) {
8228         case dns_rdatasetadditional_fromauth:
8229                 acarray = cbarg->header->additional_auth;
8230                 break;
8231         case dns_rdatasetadditional_fromglue:
8232                 acarray = cbarg->header->additional_glue;
8233                 break;
8234         default:
8235                 INSIST(0);
8236         }
8237
8238         count = cbarg->count;
8239         if (acarray != NULL && acarray[count].entry == entry) {
8240                 acarray[count].entry = NULL;
8241                 INSIST(acarray[count].cbarg == cbarg);
8242                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8243                 acarray[count].cbarg = NULL;
8244         } else
8245                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8246
8247         dns_acache_detachentry(&entry);
8248
8249         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8250
8251         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8252         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8253
8254         *arg = NULL;
8255 }
8256
8257 static void
8258 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8259                       acache_cbarg_t **cbargp)
8260 {
8261         acache_cbarg_t *cbarg;
8262
8263         REQUIRE(mctx != NULL);
8264         REQUIRE(entry != NULL);
8265         REQUIRE(cbargp != NULL && *cbargp != NULL);
8266
8267         cbarg = *cbargp;
8268
8269         dns_acache_cancelentry(entry);
8270         dns_db_detachnode(cbarg->db, &cbarg->node);
8271         dns_db_detach(&cbarg->db);
8272
8273         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8274
8275         *cbargp = NULL;
8276 }
8277
8278 static isc_result_t
8279 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8280                        dns_rdatatype_t qtype, dns_acache_t *acache,
8281                        dns_zone_t *zone, dns_db_t *db,
8282                        dns_dbversion_t *version, dns_dbnode_t *node,
8283                        dns_name_t *fname)
8284 {
8285         dns_rbtdb_t *rbtdb = rdataset->private1;
8286         dns_rbtnode_t *rbtnode = rdataset->private2;
8287         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8288         unsigned int current_count = rdataset->privateuint4;
8289         rdatasetheader_t *header;
8290         unsigned int total_count, count;
8291         nodelock_t *nodelock;
8292         isc_result_t result;
8293         acachectl_t *acarray;
8294         dns_acacheentry_t *newentry, *oldentry = NULL;
8295         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8296
8297         UNUSED(qtype);
8298
8299         if (type == dns_rdatasetadditional_fromcache)
8300                 return (ISC_R_SUCCESS);
8301
8302         header = (struct rdatasetheader *)(raw - sizeof(*header));
8303
8304         total_count = raw[0] * 256 + raw[1];
8305         INSIST(total_count > current_count);
8306         count = total_count - current_count - 1; /* should be private data */
8307
8308         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8309         if (newcbarg == NULL)
8310                 return (ISC_R_NOMEMORY);
8311         newcbarg->type = type;
8312         newcbarg->count = count;
8313         newcbarg->header = header;
8314         newcbarg->db = NULL;
8315         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8316         newcbarg->node = NULL;
8317         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8318                           &newcbarg->node);
8319         newentry = NULL;
8320         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8321                                         acache_callback, newcbarg, &newentry);
8322         if (result != ISC_R_SUCCESS)
8323                 goto fail;
8324         /* Set cache data in the new entry. */
8325         result = dns_acache_setentry(acache, newentry, zone, db,
8326                                      version, node, fname);
8327         if (result != ISC_R_SUCCESS)
8328                 goto fail;
8329
8330         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8331         NODE_LOCK(nodelock, isc_rwlocktype_write);
8332
8333         acarray = NULL;
8334         switch (type) {
8335         case dns_rdatasetadditional_fromauth:
8336                 acarray = header->additional_auth;
8337                 break;
8338         case dns_rdatasetadditional_fromglue:
8339                 acarray = header->additional_glue;
8340                 break;
8341         default:
8342                 INSIST(0);
8343         }
8344
8345         if (acarray == NULL) {
8346                 unsigned int i;
8347
8348                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8349                                       sizeof(acachectl_t));
8350
8351                 if (acarray == NULL) {
8352                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8353                         goto fail;
8354                 }
8355
8356                 for (i = 0; i < total_count; i++) {
8357                         acarray[i].entry = NULL;
8358                         acarray[i].cbarg = NULL;
8359                 }
8360         }
8361         switch (type) {
8362         case dns_rdatasetadditional_fromauth:
8363                 header->additional_auth = acarray;
8364                 break;
8365         case dns_rdatasetadditional_fromglue:
8366                 header->additional_glue = acarray;
8367                 break;
8368         default:
8369                 INSIST(0);
8370         }
8371
8372         if (acarray[count].entry != NULL) {
8373                 /*
8374                  * Swap the entry.  Delay cleaning-up the old entry since
8375                  * it would require a node lock.
8376                  */
8377                 oldentry = acarray[count].entry;
8378                 INSIST(acarray[count].cbarg != NULL);
8379                 oldcbarg = acarray[count].cbarg;
8380         }
8381         acarray[count].entry = newentry;
8382         acarray[count].cbarg = newcbarg;
8383
8384         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8385
8386         if (oldentry != NULL) {
8387                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8388                 dns_acache_detachentry(&oldentry);
8389         }
8390
8391         return (ISC_R_SUCCESS);
8392
8393  fail:
8394         if (newcbarg != NULL) {
8395                 if (newentry != NULL) {
8396                         acache_cancelentry(rbtdb->common.mctx, newentry,
8397                                            &newcbarg);
8398                         dns_acache_detachentry(&newentry);
8399                 } else {
8400                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8401                         dns_db_detach(&newcbarg->db);
8402                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8403                             sizeof(*newcbarg));
8404                 }
8405         }
8406
8407         return (result);
8408 }
8409
8410 static isc_result_t
8411 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8412                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8413 {
8414         dns_rbtdb_t *rbtdb = rdataset->private1;
8415         dns_rbtnode_t *rbtnode = rdataset->private2;
8416         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8417         unsigned int current_count = rdataset->privateuint4;
8418         rdatasetheader_t *header;
8419         nodelock_t *nodelock;
8420         unsigned int total_count, count;
8421         acachectl_t *acarray;
8422         dns_acacheentry_t *entry;
8423         acache_cbarg_t *cbarg;
8424
8425         UNUSED(qtype);          /* we do not use this value at least for now */
8426         UNUSED(acache);
8427
8428         if (type == dns_rdatasetadditional_fromcache)
8429                 return (ISC_R_SUCCESS);
8430
8431         header = (struct rdatasetheader *)(raw - sizeof(*header));
8432
8433         total_count = raw[0] * 256 + raw[1];
8434         INSIST(total_count > current_count);
8435         count = total_count - current_count - 1;
8436
8437         acarray = NULL;
8438         entry = NULL;
8439
8440         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8441         NODE_LOCK(nodelock, isc_rwlocktype_write);
8442
8443         switch (type) {
8444         case dns_rdatasetadditional_fromauth:
8445                 acarray = header->additional_auth;
8446                 break;
8447         case dns_rdatasetadditional_fromglue:
8448                 acarray = header->additional_glue;
8449                 break;
8450         default:
8451                 INSIST(0);
8452         }
8453
8454         if (acarray == NULL) {
8455                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8456                 return (ISC_R_NOTFOUND);
8457         }
8458
8459         entry = acarray[count].entry;
8460         if (entry == NULL) {
8461                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8462                 return (ISC_R_NOTFOUND);
8463         }
8464
8465         acarray[count].entry = NULL;
8466         cbarg = acarray[count].cbarg;
8467         acarray[count].cbarg = NULL;
8468
8469         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8470
8471         if (entry != NULL) {
8472                 if (cbarg != NULL)
8473                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8474                 dns_acache_detachentry(&entry);
8475         }
8476
8477         return (ISC_R_SUCCESS);
8478 }
8479
8480 /*%
8481  * Routines for LRU-based cache management.
8482  */
8483
8484 /*%
8485  * See if a given cache entry that is being reused needs to be updated
8486  * in the LRU-list.  From the LRU management point of view, this function is
8487  * expected to return true for almost all cases.  When used with threads,
8488  * however, this may cause a non-negligible performance penalty because a
8489  * writer lock will have to be acquired before updating the list.
8490  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8491  * function returns true if the entry has not been updated for some period of
8492  * time.  We differentiate the NS or glue address case and the others since
8493  * experiments have shown that the former tends to be accessed relatively
8494  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8495  * may cause external queries at a higher level zone, involving more
8496  * transactions).
8497  *
8498  * Caller must hold the node (read or write) lock.
8499  */
8500 static inline isc_boolean_t
8501 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8502         if ((header->attributes &
8503              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8504                 return (ISC_FALSE);
8505
8506 #if DNS_RBTDB_LIMITLRUUPDATE
8507         if (header->type == dns_rdatatype_ns ||
8508             (header->trust == dns_trust_glue &&
8509              (header->type == dns_rdatatype_a ||
8510               header->type == dns_rdatatype_aaaa))) {
8511                 /*
8512                  * Glue records are updated if at least 60 seconds have passed
8513                  * since the previous update time.
8514                  */
8515                 return (header->last_used + 60 <= now);
8516         }
8517
8518         /* Other records are updated if 5 minutes have passed. */
8519         return (header->last_used + 300 <= now);
8520 #else
8521         UNUSED(now);
8522
8523         return (ISC_TRUE);
8524 #endif
8525 }
8526
8527 /*%
8528  * Update the timestamp of a given cache entry and move it to the head
8529  * of the corresponding LRU list.
8530  *
8531  * Caller must hold the node (write) lock.
8532  *
8533  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8534  */
8535 static void
8536 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8537               isc_stdtime_t now)
8538 {
8539         INSIST(IS_CACHE(rbtdb));
8540
8541         /* To be checked: can we really assume this? XXXMLG */
8542         INSIST(ISC_LINK_LINKED(header, link));
8543
8544         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8545         header->last_used = now;
8546         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8547 }
8548
8549 /*%
8550  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8551  * under an overmem condition.  To recover from this condition quickly, up to
8552  * 2 entries will be purged.  This process is triggered while adding a new
8553  * entry, and we specifically avoid purging entries in the same LRU bucket as
8554  * the one to which the new entry will belong.  Otherwise, we might purge
8555  * entries of the same name of different RR types while adding RRsets from a
8556  * single response (consider the case where we're adding A and AAAA glue records
8557  * of the same NS name).
8558  */
8559 static void
8560 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8561               isc_stdtime_t now, isc_boolean_t tree_locked)
8562 {
8563         rdatasetheader_t *header, *header_prev;
8564         unsigned int locknum;
8565         int purgecount = 2;
8566
8567         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8568              locknum != locknum_start && purgecount > 0;
8569              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8570                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8571                           isc_rwlocktype_write);
8572
8573                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8574                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8575                         expire_header(rbtdb, header, tree_locked);
8576                         purgecount--;
8577                 }
8578
8579                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8580                      header != NULL && purgecount > 0;
8581                      header = header_prev) {
8582                         header_prev = ISC_LIST_PREV(header, link);
8583                         /*
8584                          * Unlink the entry at this point to avoid checking it
8585                          * again even if it's currently used someone else and
8586                          * cannot be purged at this moment.  This entry won't be
8587                          * referenced any more (so unlinking is safe) since the
8588                          * TTL was reset to 0.
8589                          */
8590                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8591                                         link);
8592                         expire_header(rbtdb, header, tree_locked);
8593                         purgecount--;
8594                 }
8595
8596                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8597                                     isc_rwlocktype_write);
8598         }
8599 }
8600
8601 static void
8602 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8603               isc_boolean_t tree_locked)
8604 {
8605         set_ttl(rbtdb, header, 0);
8606         header->attributes |= RDATASET_ATTR_STALE;
8607         header->node->dirty = 1;
8608
8609         /*
8610          * Caller must hold the node (write) lock.
8611          */
8612
8613         if (dns_rbtnode_refcurrent(header->node) == 0) {
8614                 /*
8615                  * If no one else is using the node, we can clean it up now.
8616                  * We first need to gain a new reference to the node to meet a
8617                  * requirement of decrement_reference().
8618                  */
8619                 new_reference(rbtdb, header->node);
8620                 decrement_reference(rbtdb, header->node, 0,
8621                                     isc_rwlocktype_write,
8622                                     tree_locked ? isc_rwlocktype_write :
8623                                     isc_rwlocktype_none, ISC_FALSE);
8624         }
8625 }