]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - contrib/bind9/lib/dns/rbtdb.c
Update to 9.6-ESV-R2, the latest from ISC.
[FreeBSD/stable/8.git] / contrib / bind9 / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2010  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.270.12.16.10.3 2010/08/13 07:25:21 marka Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) link;
262
263         unsigned int                    heap_index;
264         /*%<
265          * Used for TTL-based cache cleaning.
266          */
267         isc_stdtime_t                   resign;
268 } rdatasetheader_t;
269
270 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
272
273 #define RDATASET_ATTR_NONEXISTENT       0x0001
274 #define RDATASET_ATTR_STALE             0x0002
275 #define RDATASET_ATTR_IGNORE            0x0004
276 #define RDATASET_ATTR_RETAIN            0x0008
277 #define RDATASET_ATTR_NXDOMAIN          0x0010
278 #define RDATASET_ATTR_RESIGN            0x0020
279 #define RDATASET_ATTR_STATCOUNT         0x0040
280 #define RDATASET_ATTR_OPTOUT            0x0080
281
282 typedef struct acache_cbarg {
283         dns_rdatasetadditional_t        type;
284         unsigned int                    count;
285         dns_db_t                        *db;
286         dns_dbnode_t                    *node;
287         rdatasetheader_t                *header;
288 } acache_cbarg_t;
289
290 struct acachectl {
291         dns_acacheentry_t               *entry;
292         acache_cbarg_t                  *cbarg;
293 };
294
295 /*
296  * XXX
297  * When the cache will pre-expire data (due to memory low or other
298  * situations) before the rdataset's TTL has expired, it MUST
299  * respect the RETAIN bit and not expire the data until its TTL is
300  * expired.
301  */
302
303 #undef IGNORE                   /* WIN32 winbase.h defines this. */
304
305 #define EXISTS(header) \
306         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
319
320 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
321
322 /*%
323  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324  * There is a tradeoff issue about configuring this value: if this is too
325  * small, it may cause heavier contention between threads; if this is too large,
326  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327  * The default value should work well for most environments, but this can
328  * also be configurable at compilation time via the
329  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
330  * 1 due to the assumption of overmem_purge().
331  */
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
335 #else
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #endif
338 #else
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
340 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
341
342 typedef struct {
343         nodelock_t                      lock;
344         /* Protected in the refcount routines. */
345         isc_refcount_t                  references;
346         /* Locked by lock. */
347         isc_boolean_t                   exiting;
348 } rbtdb_nodelock_t;
349
350 typedef struct rbtdb_changed {
351         dns_rbtnode_t *                 node;
352         isc_boolean_t                   dirty;
353         ISC_LINK(struct rbtdb_changed)  link;
354 } rbtdb_changed_t;
355
356 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
357
358 typedef enum {
359         dns_db_insecure,
360         dns_db_partial,
361         dns_db_secure
362 } dns_db_secure_t;
363
364 typedef struct rbtdb_version {
365         /* Not locked */
366         rbtdb_serial_t                  serial;
367         /*
368          * Protected in the refcount routines.
369          * XXXJT: should we change the lock policy based on the refcount
370          * performance?
371          */
372         isc_refcount_t                  references;
373         /* Locked by database lock. */
374         isc_boolean_t                   writer;
375         isc_boolean_t                   commit_ok;
376         rbtdb_changedlist_t             changed_list;
377         rdatasetheaderlist_t            resigned_list;
378         ISC_LINK(struct rbtdb_version)  link;
379         dns_db_secure_t                 secure;
380         isc_boolean_t                   havensec3;
381         /* NSEC3 parameters */
382         dns_hash_t                      hash;
383         isc_uint8_t                     flags;
384         isc_uint16_t                    iterations;
385         isc_uint8_t                     salt_length;
386         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
387 } rbtdb_version_t;
388
389 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
390
391 typedef struct {
392         /* Unlocked. */
393         dns_db_t                        common;
394 #if DNS_RBTDB_USERWLOCK
395         isc_rwlock_t                    lock;
396 #else
397         isc_mutex_t                     lock;
398 #endif
399         isc_rwlock_t                    tree_lock;
400         unsigned int                    node_lock_count;
401         rbtdb_nodelock_t *              node_locks;
402         dns_rbtnode_t *                 origin_node;
403         dns_stats_t *                   rrsetstats; /* cache DB only */
404         /* Locked by lock. */
405         unsigned int                    active;
406         isc_refcount_t                  references;
407         unsigned int                    attributes;
408         rbtdb_serial_t                  current_serial;
409         rbtdb_serial_t                  least_serial;
410         rbtdb_serial_t                  next_serial;
411         rbtdb_version_t *               current_version;
412         rbtdb_version_t *               future_version;
413         rbtdb_versionlist_t             open_versions;
414         isc_task_t *                    task;
415         dns_dbnode_t                    *soanode;
416         dns_dbnode_t                    *nsnode;
417
418         /*
419          * This is a linked list used to implement the LRU cache.  There will
420          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
421          * placed on the linked list rdatasets[1].
422          */
423         rdatasetheaderlist_t            *rdatasets;
424
425         /*%
426          * Temporary storage for stale cache nodes and dynamically deleted
427          * nodes that await being cleaned up.
428          */
429         rbtnodelist_t                   *deadnodes;
430
431         /*
432          * Heaps.  Each of these is used for TTL based expiry.
433          */
434         isc_heap_t                      **heaps;
435
436         /* Locked by tree_lock. */
437         dns_rbt_t *                     tree;
438         dns_rbt_t *                     nsec3;
439
440         /* Unlocked */
441         unsigned int                    quantum;
442 } dns_rbtdb_t;
443
444 #define RBTDB_ATTR_LOADED               0x01
445 #define RBTDB_ATTR_LOADING              0x02
446
447 /*%
448  * Search Context
449  */
450 typedef struct {
451         dns_rbtdb_t *           rbtdb;
452         rbtdb_version_t *       rbtversion;
453         rbtdb_serial_t          serial;
454         unsigned int            options;
455         dns_rbtnodechain_t      chain;
456         isc_boolean_t           copy_name;
457         isc_boolean_t           need_cleanup;
458         isc_boolean_t           wild;
459         dns_rbtnode_t *         zonecut;
460         rdatasetheader_t *      zonecut_rdataset;
461         rdatasetheader_t *      zonecut_sigrdataset;
462         dns_fixedname_t         zonecut_name;
463         isc_stdtime_t           now;
464 } rbtdb_search_t;
465
466 /*%
467  * Load Context
468  */
469 typedef struct {
470         dns_rbtdb_t *           rbtdb;
471         isc_stdtime_t           now;
472 } rbtdb_load_t;
473
474 static void rdataset_disassociate(dns_rdataset_t *rdataset);
475 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
476 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
477 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
478 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
479 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
480 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
481                                         dns_name_t *name,
482                                         dns_rdataset_t *neg,
483                                         dns_rdataset_t *negsig);
484 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
485                                         dns_name_t *name,
486                                         dns_rdataset_t *neg,
487                                         dns_rdataset_t *negsig);
488 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
489                                            dns_rdatasetadditional_t type,
490                                            dns_rdatatype_t qtype,
491                                            dns_acache_t *acache,
492                                            dns_zone_t **zonep,
493                                            dns_db_t **dbp,
494                                            dns_dbversion_t **versionp,
495                                            dns_dbnode_t **nodep,
496                                            dns_name_t *fname,
497                                            dns_message_t *msg,
498                                            isc_stdtime_t now);
499 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
500                                            dns_rdatasetadditional_t type,
501                                            dns_rdatatype_t qtype,
502                                            dns_acache_t *acache,
503                                            dns_zone_t *zone,
504                                            dns_db_t *db,
505                                            dns_dbversion_t *version,
506                                            dns_dbnode_t *node,
507                                            dns_name_t *fname);
508 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
509                                            dns_rdataset_t *rdataset,
510                                            dns_rdatasetadditional_t type,
511                                            dns_rdatatype_t qtype);
512 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
513                                               isc_stdtime_t now);
514 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
515                           isc_stdtime_t now);
516 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
517                           isc_boolean_t tree_locked);
518 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
519                           isc_stdtime_t now, isc_boolean_t tree_locked);
520 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
521                                   rdatasetheader_t *newheader);
522 static void prune_tree(isc_task_t *task, isc_event_t *event);
523 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
524 static void rdataset_expire(dns_rdataset_t *rdataset);
525
526 static dns_rdatasetmethods_t rdataset_methods = {
527         rdataset_disassociate,
528         rdataset_first,
529         rdataset_next,
530         rdataset_current,
531         rdataset_clone,
532         rdataset_count,
533         NULL,
534         rdataset_getnoqname,
535         NULL,
536         rdataset_getclosest,
537         rdataset_getadditional,
538         rdataset_setadditional,
539         rdataset_putadditional,
540         rdataset_settrust,
541         rdataset_expire
542 };
543
544 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
545 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
546 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
547 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
548                                  dns_rdataset_t *rdataset);
549
550 static dns_rdatasetitermethods_t rdatasetiter_methods = {
551         rdatasetiter_destroy,
552         rdatasetiter_first,
553         rdatasetiter_next,
554         rdatasetiter_current
555 };
556
557 typedef struct rbtdb_rdatasetiter {
558         dns_rdatasetiter_t              common;
559         rdatasetheader_t *              current;
560 } rbtdb_rdatasetiter_t;
561
562 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
563 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
564 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
565 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
566                                         dns_name_t *name);
567 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
568 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
569 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
570                                            dns_dbnode_t **nodep,
571                                            dns_name_t *name);
572 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
573 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
574                                           dns_name_t *name);
575
576 static dns_dbiteratormethods_t dbiterator_methods = {
577         dbiterator_destroy,
578         dbiterator_first,
579         dbiterator_last,
580         dbiterator_seek,
581         dbiterator_prev,
582         dbiterator_next,
583         dbiterator_current,
584         dbiterator_pause,
585         dbiterator_origin
586 };
587
588 #define DELETION_BATCH_MAX 64
589
590 /*
591  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
592  */
593 typedef struct rbtdb_dbiterator {
594         dns_dbiterator_t                common;
595         isc_boolean_t                   paused;
596         isc_boolean_t                   new_origin;
597         isc_rwlocktype_t                tree_locked;
598         isc_result_t                    result;
599         dns_fixedname_t                 name;
600         dns_fixedname_t                 origin;
601         dns_rbtnodechain_t              chain;
602         dns_rbtnodechain_t              nsec3chain;
603         dns_rbtnodechain_t              *current;
604         dns_rbtnode_t                   *node;
605         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
606         int                             delete;
607         isc_boolean_t                   nsec3only;
608         isc_boolean_t                   nonsec3;
609 } rbtdb_dbiterator_t;
610
611
612 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
613 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
614
615 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
616                        isc_event_t *event);
617 static void overmem(dns_db_t *db, isc_boolean_t overmem);
618 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
619                                isc_boolean_t *nsec3createflag);
620
621 /*%
622  * 'init_count' is used to initialize 'newheader->count' which inturn
623  * is used to determine where in the cycle rrset-order cyclic starts.
624  * We don't lock this as we don't care about simultaneous updates.
625  *
626  * Note:
627  *      Both init_count and header->count can be ISC_UINT32_MAX.
628  *      The count on the returned rdataset however can't be as
629  *      that indicates that the database does not implement cyclic
630  *      processing.
631  */
632 static unsigned int init_count;
633
634 /*
635  * Locking
636  *
637  * If a routine is going to lock more than one lock in this module, then
638  * the locking must be done in the following order:
639  *
640  *      Tree Lock
641  *
642  *      Node Lock       (Only one from the set may be locked at one time by
643  *                       any caller)
644  *
645  *      Database Lock
646  *
647  * Failure to follow this hierarchy can result in deadlock.
648  */
649
650 /*
651  * Deleting Nodes
652  *
653  * For zone databases the node for the origin of the zone MUST NOT be deleted.
654  */
655
656
657 /*
658  * DB Routines
659  */
660
661 static void
662 attach(dns_db_t *source, dns_db_t **targetp) {
663         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
664
665         REQUIRE(VALID_RBTDB(rbtdb));
666
667         isc_refcount_increment(&rbtdb->references, NULL);
668
669         *targetp = source;
670 }
671
672 static void
673 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
674         dns_rbtdb_t *rbtdb = event->ev_arg;
675
676         UNUSED(task);
677
678         free_rbtdb(rbtdb, ISC_TRUE, event);
679 }
680
681 static void
682 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
683                   isc_boolean_t increment)
684 {
685         dns_rdatastatstype_t statattributes = 0;
686         dns_rdatastatstype_t base = 0;
687         dns_rdatastatstype_t type;
688
689         /* At the moment we count statistics only for cache DB */
690         INSIST(IS_CACHE(rbtdb));
691
692         if (NXDOMAIN(header))
693                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
694         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
695                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
696                 base = RBTDB_RDATATYPE_EXT(header->type);
697         } else
698                 base = RBTDB_RDATATYPE_BASE(header->type);
699
700         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
701         if (increment)
702                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
703         else
704                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
705 }
706
707 static void
708 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
709         int idx;
710         isc_heap_t *heap;
711         dns_ttl_t oldttl;
712
713         oldttl = header->rdh_ttl;
714         header->rdh_ttl = newttl;
715
716         if (!IS_CACHE(rbtdb))
717                 return;
718
719         /*
720          * It's possible the rbtdb is not a cache.  If this is the case,
721          * we will not have a heap, and we move on.  If we do, though,
722          * we might need to adjust things.
723          */
724         if (header->heap_index == 0 || newttl == oldttl)
725                 return;
726         idx = header->node->locknum;
727         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
728             return;
729         heap = rbtdb->heaps[idx];
730
731         if (newttl < oldttl)
732                 isc_heap_increased(heap, header->heap_index);
733         else
734                 isc_heap_decreased(heap, header->heap_index);
735 }
736
737 /*%
738  * These functions allow the heap code to rank the priority of each
739  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
740  */
741 static isc_boolean_t
742 ttl_sooner(void *v1, void *v2) {
743         rdatasetheader_t *h1 = v1;
744         rdatasetheader_t *h2 = v2;
745
746         if (h1->rdh_ttl < h2->rdh_ttl)
747                 return (ISC_TRUE);
748         return (ISC_FALSE);
749 }
750
751 static isc_boolean_t
752 resign_sooner(void *v1, void *v2) {
753         rdatasetheader_t *h1 = v1;
754         rdatasetheader_t *h2 = v2;
755
756         if (h1->resign < h2->resign)
757                 return (ISC_TRUE);
758         return (ISC_FALSE);
759 }
760
761 /*%
762  * This function sets the heap index into the header.
763  */
764 static void
765 set_index(void *what, unsigned int index) {
766         rdatasetheader_t *h = what;
767
768         h->heap_index = index;
769 }
770
771 /*%
772  * Work out how many nodes can be deleted in the time between two
773  * requests to the nameserver.  Smooth the resulting number and use it
774  * as a estimate for the number of nodes to be deleted in the next
775  * iteration.
776  */
777 static unsigned int
778 adjust_quantum(unsigned int old, isc_time_t *start) {
779         unsigned int pps = dns_pps;     /* packets per second */
780         unsigned int interval;
781         isc_uint64_t usecs;
782         isc_time_t end;
783         unsigned int new;
784
785         if (pps < 100)
786                 pps = 100;
787         isc_time_now(&end);
788
789         interval = 1000000 / pps;       /* interval in usec */
790         if (interval == 0)
791                 interval = 1;
792         usecs = isc_time_microdiff(&end, start);
793         if (usecs == 0) {
794                 /*
795                  * We were unable to measure the amount of time taken.
796                  * Double the nodes deleted next time.
797                  */
798                 old *= 2;
799                 if (old > 1000)
800                         old = 1000;
801                 return (old);
802         }
803         new = old * interval;
804         new /= (unsigned int)usecs;
805         if (new == 0)
806                 new = 1;
807         else if (new > 1000)
808                 new = 1000;
809
810         /* Smooth */
811         new = (new + old * 3) / 4;
812
813         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
814                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
815
816         return (new);
817 }
818
819 static void
820 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
821         unsigned int i;
822         isc_ondestroy_t ondest;
823         isc_result_t result;
824         char buf[DNS_NAME_FORMATSIZE];
825         isc_time_t start;
826
827         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
828                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
829
830         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
831         REQUIRE(rbtdb->future_version == NULL);
832
833         if (rbtdb->current_version != NULL) {
834                 unsigned int refs;
835
836                 isc_refcount_decrement(&rbtdb->current_version->references,
837                                        &refs);
838                 INSIST(refs == 0);
839                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
840                 isc_refcount_destroy(&rbtdb->current_version->references);
841                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
842                             sizeof(rbtdb_version_t));
843         }
844
845         /*
846          * We assume the number of remaining dead nodes is reasonably small;
847          * the overhead of unlinking all nodes here should be negligible.
848          */
849         for (i = 0; i < rbtdb->node_lock_count; i++) {
850                 dns_rbtnode_t *node;
851
852                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
853                 while (node != NULL) {
854                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
855                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
856                 }
857         }
858
859         if (event == NULL)
860                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
861  again:
862         if (rbtdb->tree != NULL) {
863                 isc_time_now(&start);
864                 result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
865                 if (result == ISC_R_QUOTA) {
866                         INSIST(rbtdb->task != NULL);
867                         if (rbtdb->quantum != 0)
868                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
869                                                                 &start);
870                         if (event == NULL)
871                                 event = isc_event_allocate(rbtdb->common.mctx,
872                                                            NULL,
873                                                          DNS_EVENT_FREESTORAGE,
874                                                            free_rbtdb_callback,
875                                                            rbtdb,
876                                                            sizeof(isc_event_t));
877                         if (event == NULL)
878                                 goto again;
879                         isc_task_send(rbtdb->task, &event);
880                         return;
881                 }
882                 INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
883         }
884
885         if (rbtdb->nsec3 != NULL) {
886                 isc_time_now(&start);
887                 result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
888                 if (result == ISC_R_QUOTA) {
889                         INSIST(rbtdb->task != NULL);
890                         if (rbtdb->quantum != 0)
891                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
892                                                                 &start);
893                         if (event == NULL)
894                                 event = isc_event_allocate(rbtdb->common.mctx,
895                                                            NULL,
896                                                          DNS_EVENT_FREESTORAGE,
897                                                            free_rbtdb_callback,
898                                                            rbtdb,
899                                                            sizeof(isc_event_t));
900                         if (event == NULL)
901                                 goto again;
902                         isc_task_send(rbtdb->task, &event);
903                         return;
904                 }
905                 INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
906         }
907
908         if (event != NULL)
909                 isc_event_free(&event);
910         if (log) {
911                 if (dns_name_dynamic(&rbtdb->common.origin))
912                         dns_name_format(&rbtdb->common.origin, buf,
913                                         sizeof(buf));
914                 else
915                         strcpy(buf, "<UNKNOWN>");
916                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
917                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
918                               "done free_rbtdb(%s)", buf);
919         }
920         if (dns_name_dynamic(&rbtdb->common.origin))
921                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
922         for (i = 0; i < rbtdb->node_lock_count; i++) {
923                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
924                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
925         }
926
927         /*
928          * Clean up LRU / re-signing order lists.
929          */
930         if (rbtdb->rdatasets != NULL) {
931                 for (i = 0; i < rbtdb->node_lock_count; i++)
932                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
933                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
934                             rbtdb->node_lock_count *
935                             sizeof(rdatasetheaderlist_t));
936         }
937         /*
938          * Clean up dead node buckets.
939          */
940         if (rbtdb->deadnodes != NULL) {
941                 for (i = 0; i < rbtdb->node_lock_count; i++)
942                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
943                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
944                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
945         }
946         /*
947          * Clean up heap objects.
948          */
949         if (rbtdb->heaps != NULL) {
950                 for (i = 0; i < rbtdb->node_lock_count; i++)
951                         isc_heap_destroy(&rbtdb->heaps[i]);
952                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
953                             rbtdb->node_lock_count *
954                             sizeof(isc_heap_t *));
955         }
956
957         if (rbtdb->rrsetstats != NULL)
958                 dns_stats_detach(&rbtdb->rrsetstats);
959
960         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
961                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
962         isc_rwlock_destroy(&rbtdb->tree_lock);
963         isc_refcount_destroy(&rbtdb->references);
964         if (rbtdb->task != NULL)
965                 isc_task_detach(&rbtdb->task);
966
967         RBTDB_DESTROYLOCK(&rbtdb->lock);
968         rbtdb->common.magic = 0;
969         rbtdb->common.impmagic = 0;
970         ondest = rbtdb->common.ondest;
971         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
972         isc_ondestroy_notify(&ondest, rbtdb);
973 }
974
975 static inline void
976 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
977         isc_boolean_t want_free = ISC_FALSE;
978         unsigned int i;
979         unsigned int inactive = 0;
980
981         /* XXX check for open versions here */
982
983         if (rbtdb->soanode != NULL)
984                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
985         if (rbtdb->nsnode != NULL)
986                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
987
988         /*
989          * Even though there are no external direct references, there still
990          * may be nodes in use.
991          */
992         for (i = 0; i < rbtdb->node_lock_count; i++) {
993                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
994                 rbtdb->node_locks[i].exiting = ISC_TRUE;
995                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
996                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
997                     == 0) {
998                         inactive++;
999                 }
1000         }
1001
1002         if (inactive != 0) {
1003                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1004                 rbtdb->active -= inactive;
1005                 if (rbtdb->active == 0)
1006                         want_free = ISC_TRUE;
1007                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1008                 if (want_free) {
1009                         char buf[DNS_NAME_FORMATSIZE];
1010                         if (dns_name_dynamic(&rbtdb->common.origin))
1011                                 dns_name_format(&rbtdb->common.origin, buf,
1012                                                 sizeof(buf));
1013                         else
1014                                 strcpy(buf, "<UNKNOWN>");
1015                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1016                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1017                                       "calling free_rbtdb(%s)", buf);
1018                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1019                 }
1020         }
1021 }
1022
1023 static void
1024 detach(dns_db_t **dbp) {
1025         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1026         unsigned int refs;
1027
1028         REQUIRE(VALID_RBTDB(rbtdb));
1029
1030         isc_refcount_decrement(&rbtdb->references, &refs);
1031
1032         if (refs == 0)
1033                 maybe_free_rbtdb(rbtdb);
1034
1035         *dbp = NULL;
1036 }
1037
1038 static void
1039 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1040         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1041         rbtdb_version_t *version;
1042         unsigned int refs;
1043
1044         REQUIRE(VALID_RBTDB(rbtdb));
1045
1046         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1047         version = rbtdb->current_version;
1048         isc_refcount_increment(&version->references, &refs);
1049         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1050
1051         *versionp = (dns_dbversion_t *)version;
1052 }
1053
1054 static inline rbtdb_version_t *
1055 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1056                  unsigned int references, isc_boolean_t writer)
1057 {
1058         isc_result_t result;
1059         rbtdb_version_t *version;
1060
1061         version = isc_mem_get(mctx, sizeof(*version));
1062         if (version == NULL)
1063                 return (NULL);
1064         version->serial = serial;
1065         result = isc_refcount_init(&version->references, references);
1066         if (result != ISC_R_SUCCESS) {
1067                 isc_mem_put(mctx, version, sizeof(*version));
1068                 return (NULL);
1069         }
1070         version->writer = writer;
1071         version->commit_ok = ISC_FALSE;
1072         ISC_LIST_INIT(version->changed_list);
1073         ISC_LIST_INIT(version->resigned_list);
1074         ISC_LINK_INIT(version, link);
1075
1076         return (version);
1077 }
1078
1079 static isc_result_t
1080 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1081         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1082         rbtdb_version_t *version;
1083
1084         REQUIRE(VALID_RBTDB(rbtdb));
1085         REQUIRE(versionp != NULL && *versionp == NULL);
1086         REQUIRE(rbtdb->future_version == NULL);
1087
1088         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1089         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1090         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1091                                    ISC_TRUE);
1092         if (version != NULL) {
1093                 version->commit_ok = ISC_TRUE;
1094                 version->secure = rbtdb->current_version->secure;
1095                 version->havensec3 = rbtdb->current_version->havensec3;
1096                 if (version->havensec3) {
1097                         version->flags = rbtdb->current_version->flags;
1098                         version->iterations =
1099                                 rbtdb->current_version->iterations;
1100                         version->hash = rbtdb->current_version->hash;
1101                         version->salt_length =
1102                                 rbtdb->current_version->salt_length;
1103                         memcpy(version->salt, rbtdb->current_version->salt,
1104                                version->salt_length);
1105                 } else {
1106                         version->flags = 0;
1107                         version->iterations = 0;
1108                         version->hash = 0;
1109                         version->salt_length = 0;
1110                         memset(version->salt, 0, sizeof(version->salt));
1111                 }
1112                 rbtdb->next_serial++;
1113                 rbtdb->future_version = version;
1114         }
1115         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1116
1117         if (version == NULL)
1118                 return (ISC_R_NOMEMORY);
1119
1120         *versionp = version;
1121
1122         return (ISC_R_SUCCESS);
1123 }
1124
1125 static void
1126 attachversion(dns_db_t *db, dns_dbversion_t *source,
1127               dns_dbversion_t **targetp)
1128 {
1129         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1130         rbtdb_version_t *rbtversion = source;
1131         unsigned int refs;
1132
1133         REQUIRE(VALID_RBTDB(rbtdb));
1134
1135         isc_refcount_increment(&rbtversion->references, &refs);
1136         INSIST(refs > 1);
1137
1138         *targetp = rbtversion;
1139 }
1140
1141 static rbtdb_changed_t *
1142 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1143             dns_rbtnode_t *node)
1144 {
1145         rbtdb_changed_t *changed;
1146         unsigned int refs;
1147
1148         /*
1149          * Caller must be holding the node lock if its reference must be
1150          * protected by the lock.
1151          */
1152
1153         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1154
1155         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1156
1157         REQUIRE(version->writer);
1158
1159         if (changed != NULL) {
1160                 dns_rbtnode_refincrement(node, &refs);
1161                 INSIST(refs != 0);
1162                 changed->node = node;
1163                 changed->dirty = ISC_FALSE;
1164                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1165         } else
1166                 version->commit_ok = ISC_FALSE;
1167
1168         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1169
1170         return (changed);
1171 }
1172
1173 static void
1174 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1175                  acachectl_t *array)
1176 {
1177         unsigned int count;
1178         unsigned int i;
1179         unsigned char *raw;     /* RDATASLAB */
1180
1181         /*
1182          * The caller must be holding the corresponding node lock.
1183          */
1184
1185         if (array == NULL)
1186                 return;
1187
1188         raw = (unsigned char *)header + sizeof(*header);
1189         count = raw[0] * 256 + raw[1];
1190
1191         /*
1192          * Sanity check: since an additional cache entry has a reference to
1193          * the original DB node (in the callback arg), there should be no
1194          * acache entries when the node can be freed.
1195          */
1196         for (i = 0; i < count; i++)
1197                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1198
1199         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1200 }
1201
1202 static inline void
1203 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1204
1205         if (dns_name_dynamic(&(*noqname)->name))
1206                 dns_name_free(&(*noqname)->name, mctx);
1207         if ((*noqname)->neg != NULL)
1208                 isc_mem_put(mctx, (*noqname)->neg,
1209                             dns_rdataslab_size((*noqname)->neg, 0));
1210         if ((*noqname)->negsig != NULL)
1211                 isc_mem_put(mctx, (*noqname)->negsig,
1212                             dns_rdataslab_size((*noqname)->negsig, 0));
1213         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1214         *noqname = NULL;
1215 }
1216
1217 static inline void
1218 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1219 {
1220         ISC_LINK_INIT(h, link);
1221         h->heap_index = 0;
1222
1223 #if TRACE_HEADER
1224         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1225                 fprintf(stderr, "initialized header: %p\n", h);
1226 #else
1227         UNUSED(rbtdb);
1228 #endif
1229 }
1230
1231 static inline rdatasetheader_t *
1232 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1233 {
1234         rdatasetheader_t *h;
1235
1236         h = isc_mem_get(mctx, sizeof(*h));
1237         if (h == NULL)
1238                 return (NULL);
1239
1240 #if TRACE_HEADER
1241         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1242                 fprintf(stderr, "allocated header: %p\n", h);
1243 #endif
1244         init_rdataset(rbtdb, h);
1245         return (h);
1246 }
1247
1248 static inline void
1249 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1250 {
1251         unsigned int size;
1252         int idx;
1253
1254         if (EXISTS(rdataset) &&
1255             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1256                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1257         }
1258
1259         idx = rdataset->node->locknum;
1260         if (ISC_LINK_LINKED(rdataset, link)) {
1261                 INSIST(IS_CACHE(rbtdb));
1262                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1263         }
1264         if (rdataset->heap_index != 0)
1265                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1266         rdataset->heap_index = 0;
1267
1268         if (rdataset->noqname != NULL)
1269                 free_noqname(mctx, &rdataset->noqname);
1270         if (rdataset->closest != NULL)
1271                 free_noqname(mctx, &rdataset->closest);
1272
1273         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1274         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1275
1276         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1277                 size = sizeof(*rdataset);
1278         else
1279                 size = dns_rdataslab_size((unsigned char *)rdataset,
1280                                           sizeof(*rdataset));
1281         isc_mem_put(mctx, rdataset, size);
1282 }
1283
1284 static inline void
1285 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1286         rdatasetheader_t *header, *dcurrent;
1287         isc_boolean_t make_dirty = ISC_FALSE;
1288
1289         /*
1290          * Caller must hold the node lock.
1291          */
1292
1293         /*
1294          * We set the IGNORE attribute on rdatasets with serial number
1295          * 'serial'.  When the reference count goes to zero, these rdatasets
1296          * will be cleaned up; until that time, they will be ignored.
1297          */
1298         for (header = node->data; header != NULL; header = header->next) {
1299                 if (header->serial == serial) {
1300                         header->attributes |= RDATASET_ATTR_IGNORE;
1301                         make_dirty = ISC_TRUE;
1302                 }
1303                 for (dcurrent = header->down;
1304                      dcurrent != NULL;
1305                      dcurrent = dcurrent->down) {
1306                         if (dcurrent->serial == serial) {
1307                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1308                                 make_dirty = ISC_TRUE;
1309                         }
1310                 }
1311         }
1312         if (make_dirty)
1313                 node->dirty = 1;
1314 }
1315
1316 static inline void
1317 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1318 {
1319         rdatasetheader_t *d, *down_next;
1320
1321         for (d = top->down; d != NULL; d = down_next) {
1322                 down_next = d->down;
1323                 free_rdataset(rbtdb, mctx, d);
1324         }
1325         top->down = NULL;
1326 }
1327
1328 static inline void
1329 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1330         rdatasetheader_t *current, *top_prev, *top_next;
1331         isc_mem_t *mctx = rbtdb->common.mctx;
1332
1333         /*
1334          * Caller must be holding the node lock.
1335          */
1336
1337         top_prev = NULL;
1338         for (current = node->data; current != NULL; current = top_next) {
1339                 top_next = current->next;
1340                 clean_stale_headers(rbtdb, mctx, current);
1341                 /*
1342                  * If current is nonexistent or stale, we can clean it up.
1343                  */
1344                 if ((current->attributes &
1345                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1346                         if (top_prev != NULL)
1347                                 top_prev->next = current->next;
1348                         else
1349                                 node->data = current->next;
1350                         free_rdataset(rbtdb, mctx, current);
1351                 } else
1352                         top_prev = current;
1353         }
1354         node->dirty = 0;
1355 }
1356
1357 static inline void
1358 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1359                 rbtdb_serial_t least_serial)
1360 {
1361         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1362         rdatasetheader_t *top_prev, *top_next;
1363         isc_mem_t *mctx = rbtdb->common.mctx;
1364         isc_boolean_t still_dirty = ISC_FALSE;
1365
1366         /*
1367          * Caller must be holding the node lock.
1368          */
1369         REQUIRE(least_serial != 0);
1370
1371         top_prev = NULL;
1372         for (current = node->data; current != NULL; current = top_next) {
1373                 top_next = current->next;
1374
1375                 /*
1376                  * First, we clean up any instances of multiple rdatasets
1377                  * with the same serial number, or that have the IGNORE
1378                  * attribute.
1379                  */
1380                 dparent = current;
1381                 for (dcurrent = current->down;
1382                      dcurrent != NULL;
1383                      dcurrent = down_next) {
1384                         down_next = dcurrent->down;
1385                         INSIST(dcurrent->serial <= dparent->serial);
1386                         if (dcurrent->serial == dparent->serial ||
1387                             IGNORE(dcurrent)) {
1388                                 if (down_next != NULL)
1389                                         down_next->next = dparent;
1390                                 dparent->down = down_next;
1391                                 free_rdataset(rbtdb, mctx, dcurrent);
1392                         } else
1393                                 dparent = dcurrent;
1394                 }
1395
1396                 /*
1397                  * We've now eliminated all IGNORE datasets with the possible
1398                  * exception of current, which we now check.
1399                  */
1400                 if (IGNORE(current)) {
1401                         down_next = current->down;
1402                         if (down_next == NULL) {
1403                                 if (top_prev != NULL)
1404                                         top_prev->next = current->next;
1405                                 else
1406                                         node->data = current->next;
1407                                 free_rdataset(rbtdb, mctx, current);
1408                                 /*
1409                                  * current no longer exists, so we can
1410                                  * just continue with the loop.
1411                                  */
1412                                 continue;
1413                         } else {
1414                                 /*
1415                                  * Pull up current->down, making it the new
1416                                  * current.
1417                                  */
1418                                 if (top_prev != NULL)
1419                                         top_prev->next = down_next;
1420                                 else
1421                                         node->data = down_next;
1422                                 down_next->next = top_next;
1423                                 free_rdataset(rbtdb, mctx, current);
1424                                 current = down_next;
1425                         }
1426                 }
1427
1428                 /*
1429                  * We now try to find the first down node less than the
1430                  * least serial.
1431                  */
1432                 dparent = current;
1433                 for (dcurrent = current->down;
1434                      dcurrent != NULL;
1435                      dcurrent = down_next) {
1436                         down_next = dcurrent->down;
1437                         if (dcurrent->serial < least_serial)
1438                                 break;
1439                         dparent = dcurrent;
1440                 }
1441
1442                 /*
1443                  * If there is a such an rdataset, delete it and any older
1444                  * versions.
1445                  */
1446                 if (dcurrent != NULL) {
1447                         do {
1448                                 down_next = dcurrent->down;
1449                                 INSIST(dcurrent->serial <= least_serial);
1450                                 free_rdataset(rbtdb, mctx, dcurrent);
1451                                 dcurrent = down_next;
1452                         } while (dcurrent != NULL);
1453                         dparent->down = NULL;
1454                 }
1455
1456                 /*
1457                  * Note.  The serial number of 'current' might be less than
1458                  * least_serial too, but we cannot delete it because it is
1459                  * the most recent version, unless it is a NONEXISTENT
1460                  * rdataset.
1461                  */
1462                 if (current->down != NULL) {
1463                         still_dirty = ISC_TRUE;
1464                         top_prev = current;
1465                 } else {
1466                         /*
1467                          * If this is a NONEXISTENT rdataset, we can delete it.
1468                          */
1469                         if (NONEXISTENT(current)) {
1470                                 if (top_prev != NULL)
1471                                         top_prev->next = current->next;
1472                                 else
1473                                         node->data = current->next;
1474                                 free_rdataset(rbtdb, mctx, current);
1475                         } else
1476                                 top_prev = current;
1477                 }
1478         }
1479         if (!still_dirty)
1480                 node->dirty = 0;
1481 }
1482
1483 /*%
1484  * Clean up dead nodes.  These are nodes which have no references, and
1485  * have no data.  They are dead but we could not or chose not to delete
1486  * them when we deleted all the data at that node because we did not want
1487  * to wait for the tree write lock.
1488  *
1489  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1490  */
1491 static void
1492 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1493         dns_rbtnode_t *node;
1494         isc_result_t result;
1495         int count = 10;         /* XXXJT: should be adjustable */
1496
1497         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1498         while (node != NULL && count > 0) {
1499                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1500
1501                 /*
1502                  * Since we're holding a tree write lock, it should be
1503                  * impossible for this node to be referenced by others.
1504                  */
1505                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1506                        node->data == NULL);
1507
1508                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1509                 if (node->nsec3)
1510                         result = dns_rbt_deletenode(rbtdb->nsec3, node,
1511                                                     ISC_FALSE);
1512                 else
1513                         result = dns_rbt_deletenode(rbtdb->tree, node,
1514                                                     ISC_FALSE);
1515                 if (result != ISC_R_SUCCESS)
1516                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1517                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1518                                       "cleanup_dead_nodes: "
1519                                       "dns_rbt_deletenode: %s",
1520                                       isc_result_totext(result));
1521                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1522                 count--;
1523         }
1524 }
1525
1526 /*
1527  * Caller must be holding the node lock if its reference must be protected
1528  * by the lock.
1529  */
1530 static inline void
1531 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1532         unsigned int lockrefs, noderefs;
1533         isc_refcount_t *lockref;
1534
1535         dns_rbtnode_refincrement0(node, &noderefs);
1536         if (noderefs == 1) {    /* this is the first reference to the node */
1537                 lockref = &rbtdb->node_locks[node->locknum].references;
1538                 isc_refcount_increment0(lockref, &lockrefs);
1539                 INSIST(lockrefs != 0);
1540         }
1541         INSIST(noderefs != 0);
1542 }
1543
1544 /*
1545  * This function is assumed to be called when a node is newly referenced
1546  * and can be in the deadnode list.  In that case the node must be retrieved
1547  * from the list because it is going to be used.  In addition, if the caller
1548  * happens to hold a write lock on the tree, it's a good chance to purge dead
1549  * nodes.
1550  * Note: while a new reference is gained in multiple places, there are only very
1551  * few cases where the node can be in the deadnode list (only empty nodes can
1552  * have been added to the list).
1553  */
1554 static inline void
1555 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1556                 isc_rwlocktype_t treelocktype)
1557 {
1558         isc_boolean_t need_relock = ISC_FALSE;
1559
1560         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1561         new_reference(rbtdb, node);
1562
1563         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1564                       isc_rwlocktype_read);
1565         if (ISC_LINK_LINKED(node, deadlink))
1566                 need_relock = ISC_TRUE;
1567         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1568                  treelocktype == isc_rwlocktype_write)
1569                 need_relock = ISC_TRUE;
1570         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1571                         isc_rwlocktype_read);
1572         if (need_relock) {
1573                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1574                               isc_rwlocktype_write);
1575                 if (ISC_LINK_LINKED(node, deadlink))
1576                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1577                                         node, deadlink);
1578                 if (treelocktype == isc_rwlocktype_write)
1579                         cleanup_dead_nodes(rbtdb, node->locknum);
1580                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1581                                 isc_rwlocktype_write);
1582         }
1583
1584         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1585 }
1586
1587 /*
1588  * Caller must be holding the node lock; either the "strong", read or write
1589  * lock.  Note that the lock must be held even when node references are
1590  * atomically modified; in that case the decrement operation itself does not
1591  * have to be protected, but we must avoid a race condition where multiple
1592  * threads are decreasing the reference to zero simultaneously and at least
1593  * one of them is going to free the node.
1594  * This function returns ISC_TRUE if and only if the node reference decreases
1595  * to zero.
1596  */
1597 static isc_boolean_t
1598 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1599                     rbtdb_serial_t least_serial,
1600                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1601                     isc_boolean_t pruning)
1602 {
1603         isc_result_t result;
1604         isc_boolean_t write_locked;
1605         rbtdb_nodelock_t *nodelock;
1606         unsigned int refs, nrefs;
1607         int bucket = node->locknum;
1608         isc_boolean_t no_reference;
1609
1610         nodelock = &rbtdb->node_locks[bucket];
1611
1612         /* Handle easy and typical case first. */
1613         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1614                 dns_rbtnode_refdecrement(node, &nrefs);
1615                 INSIST((int)nrefs >= 0);
1616                 if (nrefs == 0) {
1617                         isc_refcount_decrement(&nodelock->references, &refs);
1618                         INSIST((int)refs >= 0);
1619                 }
1620                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1621         }
1622
1623         /* Upgrade the lock? */
1624         if (nlock == isc_rwlocktype_read) {
1625                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1626                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1627         }
1628         dns_rbtnode_refdecrement(node, &nrefs);
1629         INSIST((int)nrefs >= 0);
1630         if (nrefs > 0) {
1631                 /* Restore the lock? */
1632                 if (nlock == isc_rwlocktype_read)
1633                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1634                 return (ISC_FALSE);
1635         }
1636
1637         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1638                 if (IS_CACHE(rbtdb))
1639                         clean_cache_node(rbtdb, node);
1640                 else {
1641                         if (least_serial == 0) {
1642                                 /*
1643                                  * Caller doesn't know the least serial.
1644                                  * Get it.
1645                                  */
1646                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1647                                 least_serial = rbtdb->least_serial;
1648                                 RBTDB_UNLOCK(&rbtdb->lock,
1649                                              isc_rwlocktype_read);
1650                         }
1651                         clean_zone_node(rbtdb, node, least_serial);
1652                 }
1653         }
1654
1655         isc_refcount_decrement(&nodelock->references, &refs);
1656         INSIST((int)refs >= 0);
1657
1658         /*
1659          * XXXDCL should this only be done for cache zones?
1660          */
1661         if (node->data != NULL || node->down != NULL) {
1662                 /* Restore the lock? */
1663                 if (nlock == isc_rwlocktype_read)
1664                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1665                 return (ISC_TRUE);
1666         }
1667
1668         /*
1669          * Attempt to switch to a write lock on the tree.  If this fails,
1670          * we will add this node to a linked list of nodes in this locking
1671          * bucket which we will free later.
1672          */
1673         if (tlock != isc_rwlocktype_write) {
1674                 /*
1675                  * Locking hierarchy notwithstanding, we don't need to free
1676                  * the node lock before acquiring the tree write lock because
1677                  * we only do a trylock.
1678                  */
1679                 if (tlock == isc_rwlocktype_read)
1680                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1681                 else
1682                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1683                                                     isc_rwlocktype_write);
1684                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1685                               result == ISC_R_LOCKBUSY);
1686
1687                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1688         } else
1689                 write_locked = ISC_TRUE;
1690
1691         no_reference = ISC_TRUE;
1692         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1693                 /*
1694                  * We can now delete the node if the reference counter is
1695                  * zero.  This should be typically the case, but a different
1696                  * thread may still gain a (new) reference just before the
1697                  * current thread locks the tree (e.g., in findnode()).
1698                  */
1699
1700                 /*
1701                  * If this node is the only one in the level it's in, deleting
1702                  * this node may recursively make its parent the only node in
1703                  * the parent level; if so, and if no one is currently using
1704                  * the parent node, this is almost the only opportunity to
1705                  * clean it up.  But the recursive cleanup is not that trivial
1706                  * since the child and parent may be in different lock buckets,
1707                  * which would cause a lock order reversal problem.  To avoid
1708                  * the trouble, we'll dispatch a separate event for batch
1709                  * cleaning.  We need to check whether we're deleting the node
1710                  * as a result of pruning to avoid infinite dispatching.
1711                  * Note: pruning happens only when a task has been set for the
1712                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1713                  * it's their responsibility to purge stale leaves (e.g. by
1714                  * periodic walk-through).
1715                  */
1716                 if (!pruning && node->parent != NULL &&
1717                     node->parent->down == node && node->left == NULL &&
1718                     node->right == NULL && rbtdb->task != NULL) {
1719                         isc_event_t *ev;
1720                         dns_db_t *db;
1721
1722                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1723                                                 DNS_EVENT_RBTPRUNE,
1724                                                 prune_tree, node,
1725                                                 sizeof(isc_event_t));
1726                         if (ev != NULL) {
1727                                 new_reference(rbtdb, node);
1728                                 db = NULL;
1729                                 attach((dns_db_t *)rbtdb, &db);
1730                                 ev->ev_sender = db;
1731                                 isc_task_send(rbtdb->task, &ev);
1732                                 no_reference = ISC_FALSE;
1733                         } else {
1734                                 /*
1735                                  * XXX: this is a weird situation.  We could
1736                                  * ignore this error case, but then the stale
1737                                  * node will unlikely be purged except via a
1738                                  * rare condition such as manual cleanup.  So
1739                                  * we queue it in the deadnodes list, hoping
1740                                  * the memory shortage is temporary and the node
1741                                  * will be deleted later.
1742                                  */
1743                                 isc_log_write(dns_lctx,
1744                                               DNS_LOGCATEGORY_DATABASE,
1745                                               DNS_LOGMODULE_CACHE,
1746                                               ISC_LOG_INFO,
1747                                               "decrement_reference: failed to "
1748                                               "allocate pruning event");
1749                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1750                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1751                                                 deadlink);
1752                         }
1753                 } else {
1754                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1755                                 char printname[DNS_NAME_FORMATSIZE];
1756
1757                                 isc_log_write(dns_lctx,
1758                                               DNS_LOGCATEGORY_DATABASE,
1759                                               DNS_LOGMODULE_CACHE,
1760                                               ISC_LOG_DEBUG(1),
1761                                               "decrement_reference: "
1762                                               "delete from rbt: %p %s",
1763                                               node,
1764                                               dns_rbt_formatnodename(node,
1765                                                         printname,
1766                                                         sizeof(printname)));
1767                         }
1768
1769                         INSIST(!ISC_LINK_LINKED(node, deadlink));
1770                         if (node->nsec3)
1771                                 result = dns_rbt_deletenode(rbtdb->nsec3, node,
1772                                                             ISC_FALSE);
1773                         else
1774                                 result = dns_rbt_deletenode(rbtdb->tree, node,
1775                                                             ISC_FALSE);
1776                         if (result != ISC_R_SUCCESS) {
1777                                 isc_log_write(dns_lctx,
1778                                               DNS_LOGCATEGORY_DATABASE,
1779                                               DNS_LOGMODULE_CACHE,
1780                                               ISC_LOG_WARNING,
1781                                               "decrement_reference: "
1782                                               "dns_rbt_deletenode: %s",
1783                                               isc_result_totext(result));
1784                         }
1785                 }
1786         } else if (dns_rbtnode_refcurrent(node) == 0) {
1787                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1788                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1789         } else
1790                 no_reference = ISC_FALSE;
1791
1792         /* Restore the lock? */
1793         if (nlock == isc_rwlocktype_read)
1794                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1795
1796         /*
1797          * Relock a read lock, or unlock the write lock if no lock was held.
1798          */
1799         if (tlock == isc_rwlocktype_none)
1800                 if (write_locked)
1801                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1802
1803         if (tlock == isc_rwlocktype_read)
1804                 if (write_locked)
1805                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1806
1807         return (no_reference);
1808 }
1809
1810 /*
1811  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1812  * case, the number of iteration is the number of tree levels, which is at
1813  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1814  * should be much smaller (only a few times), and even the worst case would be
1815  * acceptable for a single event.
1816  */
1817 static void
1818 prune_tree(isc_task_t *task, isc_event_t *event) {
1819         dns_rbtdb_t *rbtdb = event->ev_sender;
1820         dns_rbtnode_t *node = event->ev_arg;
1821         dns_rbtnode_t *parent;
1822         unsigned int locknum;
1823
1824         UNUSED(task);
1825
1826         isc_event_free(&event);
1827
1828         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1829         locknum = node->locknum;
1830         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1831         do {
1832                 parent = node->parent;
1833                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1834                                     isc_rwlocktype_write, ISC_TRUE);
1835
1836                 if (parent != NULL && parent->down == NULL) {
1837                         /*
1838                          * node was the only down child of the parent and has
1839                          * just been removed.  We'll then need to examine the
1840                          * parent.  Keep the lock if possible; otherwise,
1841                          * release the old lock and acquire one for the parent.
1842                          */
1843                         if (parent->locknum != locknum) {
1844                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1845                                             isc_rwlocktype_write);
1846                                 locknum = parent->locknum;
1847                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1848                                           isc_rwlocktype_write);
1849                         }
1850
1851                         /*
1852                          * We need to gain a reference to the node before
1853                          * decrementing it in the next iteration.  In addition,
1854                          * if the node is in the dead-nodes list, extract it
1855                          * from the list beforehand as we do in
1856                          * reactivate_node().
1857                          */
1858                         new_reference(rbtdb, parent);
1859                         if (ISC_LINK_LINKED(parent, deadlink)) {
1860                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1861                                                 parent, deadlink);
1862                         }
1863                 } else
1864                         parent = NULL;
1865
1866                 node = parent;
1867         } while (node != NULL);
1868         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1869         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1870
1871         detach((dns_db_t **)&rbtdb);
1872 }
1873
1874 static inline void
1875 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1876                    rbtdb_changedlist_t *cleanup_list)
1877 {
1878         /*
1879          * Caller must be holding the database lock.
1880          */
1881
1882         rbtdb->least_serial = version->serial;
1883         *cleanup_list = version->changed_list;
1884         ISC_LIST_INIT(version->changed_list);
1885 }
1886
1887 static inline void
1888 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1889         rbtdb_changed_t *changed, *next_changed;
1890
1891         /*
1892          * If the changed record is dirty, then
1893          * an update created multiple versions of
1894          * a given rdataset.  We keep this list
1895          * until we're the least open version, at
1896          * which point it's safe to get rid of any
1897          * older versions.
1898          *
1899          * If the changed record isn't dirty, then
1900          * we don't need it anymore since we're
1901          * committing and not rolling back.
1902          *
1903          * The caller must be holding the database lock.
1904          */
1905         for (changed = HEAD(version->changed_list);
1906              changed != NULL;
1907              changed = next_changed) {
1908                 next_changed = NEXT(changed, link);
1909                 if (!changed->dirty) {
1910                         UNLINK(version->changed_list,
1911                                changed, link);
1912                         APPEND(*cleanup_list,
1913                                changed, link);
1914                 }
1915         }
1916 }
1917
1918 static void
1919 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1920         dns_rdataset_t keyset;
1921         dns_rdataset_t nsecset, signsecset;
1922         dns_rdata_t rdata = DNS_RDATA_INIT;
1923         isc_boolean_t haszonekey = ISC_FALSE;
1924         isc_boolean_t hasnsec = ISC_FALSE;
1925         isc_boolean_t hasoptbit = ISC_FALSE;
1926         isc_boolean_t nsec3createflag = ISC_FALSE;
1927         isc_result_t result;
1928
1929         dns_rdataset_init(&keyset);
1930         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1931                                      0, 0, &keyset, NULL);
1932         if (result == ISC_R_SUCCESS) {
1933                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1934                 result = dns_rdataset_first(&keyset);
1935                 while (result == ISC_R_SUCCESS) {
1936                         dns_rdataset_current(&keyset, &keyrdata);
1937                         if (dns_zonekey_iszonekey(&keyrdata)) {
1938                                 haszonekey = ISC_TRUE;
1939                                 break;
1940                         }
1941                         result = dns_rdataset_next(&keyset);
1942                 }
1943                 dns_rdataset_disassociate(&keyset);
1944         }
1945         if (!haszonekey) {
1946                 version->secure = dns_db_insecure;
1947                 version->havensec3 = ISC_FALSE;
1948                 return;
1949         }
1950
1951         dns_rdataset_init(&nsecset);
1952         dns_rdataset_init(&signsecset);
1953         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1954                                      0, 0, &nsecset, &signsecset);
1955         if (result == ISC_R_SUCCESS) {
1956                 if (dns_rdataset_isassociated(&signsecset)) {
1957                         hasnsec = ISC_TRUE;
1958                         result = dns_rdataset_first(&nsecset);
1959                         if (result == ISC_R_SUCCESS) {
1960                                 dns_rdataset_current(&nsecset, &rdata);
1961                                 hasoptbit = dns_nsec_typepresent(&rdata,
1962                                                              dns_rdatatype_opt);
1963                         }
1964                         dns_rdataset_disassociate(&signsecset);
1965                 }
1966                 dns_rdataset_disassociate(&nsecset);
1967         }
1968
1969         setnsec3parameters(db, version, &nsec3createflag);
1970
1971         /*
1972          * Do we have a valid NSEC/NSEC3 chain?
1973          */
1974         if (version->havensec3 || (hasnsec && !hasoptbit))
1975                 version->secure = dns_db_secure;
1976         /*
1977          * Do we have a NSEC/NSEC3 chain under creation?
1978          */
1979         else if (hasoptbit || nsec3createflag)
1980                 version->secure = dns_db_partial;
1981         else
1982                 version->secure = dns_db_insecure;
1983 }
1984
1985 /*%<
1986  * Walk the origin node looking for NSEC3PARAM records.
1987  * Cache the nsec3 parameters.
1988  */
1989 static void
1990 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
1991                    isc_boolean_t *nsec3createflag)
1992 {
1993         dns_rbtnode_t *node;
1994         dns_rdata_nsec3param_t nsec3param;
1995         dns_rdata_t rdata = DNS_RDATA_INIT;
1996         isc_region_t region;
1997         isc_result_t result;
1998         rdatasetheader_t *header, *header_next;
1999         unsigned char *raw;             /* RDATASLAB */
2000         unsigned int count, length;
2001         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2002
2003         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2004         version->havensec3 = ISC_FALSE;
2005         node = rbtdb->origin_node;
2006         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2007                   isc_rwlocktype_read);
2008         for (header = node->data;
2009              header != NULL;
2010              header = header_next) {
2011                 header_next = header->next;
2012                 do {
2013                         if (header->serial <= version->serial &&
2014                             !IGNORE(header)) {
2015                                 if (NONEXISTENT(header))
2016                                         header = NULL;
2017                                 break;
2018                         } else
2019                                 header = header->down;
2020                 } while (header != NULL);
2021
2022                 if (header != NULL &&
2023                     header->type == dns_rdatatype_nsec3param) {
2024                         /*
2025                          * Find A NSEC3PARAM with a supported algorithm.
2026                          */
2027                         raw = (unsigned char *)header + sizeof(*header);
2028                         count = raw[0] * 256 + raw[1]; /* count */
2029 #if DNS_RDATASET_FIXED
2030                         raw += count * 4 + 2;
2031 #else
2032                         raw += 2;
2033 #endif
2034                         while (count-- > 0U) {
2035                                 length = raw[0] * 256 + raw[1];
2036 #if DNS_RDATASET_FIXED
2037                                 raw += 4;
2038 #else
2039                                 raw += 2;
2040 #endif
2041                                 region.base = raw;
2042                                 region.length = length;
2043                                 raw += length;
2044                                 dns_rdata_fromregion(&rdata,
2045                                                      rbtdb->common.rdclass,
2046                                                      dns_rdatatype_nsec3param,
2047                                                      &region);
2048                                 result = dns_rdata_tostruct(&rdata,
2049                                                             &nsec3param,
2050                                                             NULL);
2051                                 INSIST(result == ISC_R_SUCCESS);
2052                                 dns_rdata_reset(&rdata);
2053
2054                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2055                                     !dns_nsec3_supportedhash(nsec3param.hash))
2056                                         continue;
2057
2058 #ifdef RFC5155_STRICT
2059                                 if (nsec3param.flags != 0)
2060                                         continue;
2061 #else
2062                                 if ((nsec3param.flags & DNS_NSEC3FLAG_CREATE)
2063                                     != 0)
2064                                         *nsec3createflag = ISC_TRUE;
2065                                 if ((nsec3param.flags & ~DNS_NSEC3FLAG_OPTOUT)
2066                                     != 0)
2067                                         continue;
2068 #endif
2069
2070                                 memcpy(version->salt, nsec3param.salt,
2071                                        nsec3param.salt_length);
2072                                 version->hash = nsec3param.hash;
2073                                 version->salt_length = nsec3param.salt_length;
2074                                 version->iterations = nsec3param.iterations;
2075                                 version->flags = nsec3param.flags;
2076                                 version->havensec3 = ISC_TRUE;
2077                                 /*
2078                                  * Look for a better algorithm than the
2079                                  * unknown test algorithm.
2080                                  */
2081                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2082                                         goto unlock;
2083                         }
2084                 }
2085         }
2086  unlock:
2087         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2088                     isc_rwlocktype_read);
2089         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2090 }
2091
2092 static void
2093 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2094         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2095         rbtdb_version_t *version, *cleanup_version, *least_greater;
2096         isc_boolean_t rollback = ISC_FALSE;
2097         rbtdb_changedlist_t cleanup_list;
2098         rdatasetheaderlist_t resigned_list;
2099         rbtdb_changed_t *changed, *next_changed;
2100         rbtdb_serial_t serial, least_serial;
2101         dns_rbtnode_t *rbtnode;
2102         unsigned int refs;
2103         rdatasetheader_t *header;
2104         isc_boolean_t writer;
2105
2106         REQUIRE(VALID_RBTDB(rbtdb));
2107         version = (rbtdb_version_t *)*versionp;
2108
2109         cleanup_version = NULL;
2110         ISC_LIST_INIT(cleanup_list);
2111         ISC_LIST_INIT(resigned_list);
2112
2113         isc_refcount_decrement(&version->references, &refs);
2114         if (refs > 0) {         /* typical and easy case first */
2115                 if (commit) {
2116                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2117                         INSIST(!version->writer);
2118                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2119                 }
2120                 goto end;
2121         }
2122
2123         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2124         serial = version->serial;
2125         writer = version->writer;
2126         if (version->writer) {
2127                 if (commit) {
2128                         unsigned cur_ref;
2129                         rbtdb_version_t *cur_version;
2130
2131                         INSIST(version->commit_ok);
2132                         INSIST(version == rbtdb->future_version);
2133                         /*
2134                          * The current version is going to be replaced.
2135                          * Release the (likely last) reference to it from the
2136                          * DB itself and unlink it from the open list.
2137                          */
2138                         cur_version = rbtdb->current_version;
2139                         isc_refcount_decrement(&cur_version->references,
2140                                                &cur_ref);
2141                         if (cur_ref == 0) {
2142                                 if (cur_version->serial == rbtdb->least_serial)
2143                                         INSIST(EMPTY(cur_version->changed_list));
2144                                 UNLINK(rbtdb->open_versions,
2145                                        cur_version, link);
2146                         }
2147                         if (EMPTY(rbtdb->open_versions)) {
2148                                 /*
2149                                  * We're going to become the least open
2150                                  * version.
2151                                  */
2152                                 make_least_version(rbtdb, version,
2153                                                    &cleanup_list);
2154                         } else {
2155                                 /*
2156                                  * Some other open version is the
2157                                  * least version.  We can't cleanup
2158                                  * records that were changed in this
2159                                  * version because the older versions
2160                                  * may still be in use by an open
2161                                  * version.
2162                                  *
2163                                  * We can, however, discard the
2164                                  * changed records for things that
2165                                  * we've added that didn't exist in
2166                                  * prior versions.
2167                                  */
2168                                 cleanup_nondirty(version, &cleanup_list);
2169                         }
2170                         /*
2171                          * If the (soon to be former) current version
2172                          * isn't being used by anyone, we can clean
2173                          * it up.
2174                          */
2175                         if (cur_ref == 0) {
2176                                 cleanup_version = cur_version;
2177                                 APPENDLIST(version->changed_list,
2178                                            cleanup_version->changed_list,
2179                                            link);
2180                         }
2181                         /*
2182                          * Become the current version.
2183                          */
2184                         version->writer = ISC_FALSE;
2185                         rbtdb->current_version = version;
2186                         rbtdb->current_serial = version->serial;
2187                         rbtdb->future_version = NULL;
2188
2189                         /*
2190                          * Keep the current version in the open list, and
2191                          * gain a reference for the DB itself (see the DB
2192                          * creation function below).  This must be the only
2193                          * case where we need to increment the counter from
2194                          * zero and need to use isc_refcount_increment0().
2195                          */
2196                         isc_refcount_increment0(&version->references,
2197                                                 &cur_ref);
2198                         INSIST(cur_ref == 1);
2199                         PREPEND(rbtdb->open_versions,
2200                                 rbtdb->current_version, link);
2201                         resigned_list = version->resigned_list;
2202                         ISC_LIST_INIT(version->resigned_list);
2203                 } else {
2204                         /*
2205                          * We're rolling back this transaction.
2206                          */
2207                         cleanup_list = version->changed_list;
2208                         ISC_LIST_INIT(version->changed_list);
2209                         resigned_list = version->resigned_list;
2210                         ISC_LIST_INIT(version->resigned_list);
2211                         rollback = ISC_TRUE;
2212                         cleanup_version = version;
2213                         rbtdb->future_version = NULL;
2214                 }
2215         } else {
2216                 if (version != rbtdb->current_version) {
2217                         /*
2218                          * There are no external or internal references
2219                          * to this version and it can be cleaned up.
2220                          */
2221                         cleanup_version = version;
2222
2223                         /*
2224                          * Find the version with the least serial
2225                          * number greater than ours.
2226                          */
2227                         least_greater = PREV(version, link);
2228                         if (least_greater == NULL)
2229                                 least_greater = rbtdb->current_version;
2230
2231                         INSIST(version->serial < least_greater->serial);
2232                         /*
2233                          * Is this the least open version?
2234                          */
2235                         if (version->serial == rbtdb->least_serial) {
2236                                 /*
2237                                  * Yes.  Install the new least open
2238                                  * version.
2239                                  */
2240                                 make_least_version(rbtdb,
2241                                                    least_greater,
2242                                                    &cleanup_list);
2243                         } else {
2244                                 /*
2245                                  * Add any unexecuted cleanups to
2246                                  * those of the least greater version.
2247                                  */
2248                                 APPENDLIST(least_greater->changed_list,
2249                                            version->changed_list,
2250                                            link);
2251                         }
2252                 } else if (version->serial == rbtdb->least_serial)
2253                         INSIST(EMPTY(version->changed_list));
2254                 UNLINK(rbtdb->open_versions, version, link);
2255         }
2256         least_serial = rbtdb->least_serial;
2257         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2258
2259         /*
2260          * Update the zone's secure status.
2261          */
2262         if (writer && commit && !IS_CACHE(rbtdb))
2263                 iszonesecure(db, version, rbtdb->origin_node);
2264
2265         if (cleanup_version != NULL) {
2266                 INSIST(EMPTY(cleanup_version->changed_list));
2267                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2268                             sizeof(*cleanup_version));
2269         }
2270
2271         /*
2272          * Commit/rollback re-signed headers.
2273          */
2274         for (header = HEAD(resigned_list);
2275              header != NULL;
2276              header = HEAD(resigned_list)) {
2277                 nodelock_t *lock;
2278
2279                 ISC_LIST_UNLINK(resigned_list, header, link);
2280
2281                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2282                 NODE_LOCK(lock, isc_rwlocktype_write);
2283                 if (rollback)
2284                         resign_insert(rbtdb, header->node->locknum, header);
2285                 decrement_reference(rbtdb, header->node, least_serial,
2286                                     isc_rwlocktype_write, isc_rwlocktype_none,
2287                                     ISC_FALSE);
2288                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2289         }
2290
2291         if (!EMPTY(cleanup_list)) {
2292                 /*
2293                  * We acquire a tree write lock here in order to make sure
2294                  * that stale nodes will be removed in decrement_reference().
2295                  * If we didn't have the lock, those nodes could miss the
2296                  * chance to be removed until the server stops.  The write lock
2297                  * is expensive, but this event should be rare enough to justify
2298                  * the cost.
2299                  */
2300                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2301                 for (changed = HEAD(cleanup_list);
2302                      changed != NULL;
2303                      changed = next_changed) {
2304                         nodelock_t *lock;
2305
2306                         next_changed = NEXT(changed, link);
2307                         rbtnode = changed->node;
2308                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2309
2310                         NODE_LOCK(lock, isc_rwlocktype_write);
2311                         /*
2312                          * This is a good opportunity to purge any dead nodes,
2313                          * so use it.
2314                          */
2315                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2316
2317                         if (rollback)
2318                                 rollback_node(rbtnode, serial);
2319                         decrement_reference(rbtdb, rbtnode, least_serial,
2320                                             isc_rwlocktype_write,
2321                                             isc_rwlocktype_write, ISC_FALSE);
2322
2323                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2324
2325                         isc_mem_put(rbtdb->common.mctx, changed,
2326                                     sizeof(*changed));
2327                 }
2328                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2329         }
2330
2331  end:
2332         *versionp = NULL;
2333 }
2334
2335 /*
2336  * Add the necessary magic for the wildcard name 'name'
2337  * to be found in 'rbtdb'.
2338  *
2339  * In order for wildcard matching to work correctly in
2340  * zone_find(), we must ensure that a node for the wildcarding
2341  * level exists in the database, and has its 'find_callback'
2342  * and 'wild' bits set.
2343  *
2344  * E.g. if the wildcard name is "*.sub.example." then we
2345  * must ensure that "sub.example." exists and is marked as
2346  * a wildcard level.
2347  */
2348 static isc_result_t
2349 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2350         isc_result_t result;
2351         dns_name_t foundname;
2352         dns_offsets_t offsets;
2353         unsigned int n;
2354         dns_rbtnode_t *node = NULL;
2355
2356         dns_name_init(&foundname, offsets);
2357         n = dns_name_countlabels(name);
2358         INSIST(n >= 2);
2359         n--;
2360         dns_name_getlabelsequence(name, 1, n, &foundname);
2361         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2362         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2363                 return (result);
2364         node->nsec3 = 0;
2365         node->find_callback = 1;
2366         node->wild = 1;
2367         return (ISC_R_SUCCESS);
2368 }
2369
2370 static isc_result_t
2371 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2372         isc_result_t result;
2373         dns_name_t foundname;
2374         dns_offsets_t offsets;
2375         unsigned int n, l, i;
2376
2377         dns_name_init(&foundname, offsets);
2378         n = dns_name_countlabels(name);
2379         l = dns_name_countlabels(&rbtdb->common.origin);
2380         i = l + 1;
2381         while (i < n) {
2382                 dns_rbtnode_t *node = NULL;     /* dummy */
2383                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2384                 if (dns_name_iswildcard(&foundname)) {
2385                         result = add_wildcard_magic(rbtdb, &foundname);
2386                         if (result != ISC_R_SUCCESS)
2387                                 return (result);
2388                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2389                                                  &node);
2390                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2391                                 return (result);
2392                         node->nsec3 = 0;
2393                 }
2394                 i++;
2395         }
2396         return (ISC_R_SUCCESS);
2397 }
2398
2399 static isc_result_t
2400 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2401          dns_dbnode_t **nodep)
2402 {
2403         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2404         dns_rbtnode_t *node = NULL;
2405         dns_name_t nodename;
2406         isc_result_t result;
2407         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2408
2409         REQUIRE(VALID_RBTDB(rbtdb));
2410
2411         dns_name_init(&nodename, NULL);
2412         RWLOCK(&rbtdb->tree_lock, locktype);
2413         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2414                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2415         if (result != ISC_R_SUCCESS) {
2416                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2417                 if (!create) {
2418                         if (result == DNS_R_PARTIALMATCH)
2419                                 result = ISC_R_NOTFOUND;
2420                         return (result);
2421                 }
2422                 /*
2423                  * It would be nice to try to upgrade the lock instead of
2424                  * unlocking then relocking.
2425                  */
2426                 locktype = isc_rwlocktype_write;
2427                 RWLOCK(&rbtdb->tree_lock, locktype);
2428                 node = NULL;
2429                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2430                 if (result == ISC_R_SUCCESS) {
2431                         dns_rbt_namefromnode(node, &nodename);
2432 #ifdef DNS_RBT_USEHASH
2433                         node->locknum = node->hashval % rbtdb->node_lock_count;
2434 #else
2435                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2436                                 rbtdb->node_lock_count;
2437 #endif
2438                         node->nsec3 = 0;
2439                         add_empty_wildcards(rbtdb, name);
2440
2441                         if (dns_name_iswildcard(name)) {
2442                                 result = add_wildcard_magic(rbtdb, name);
2443                                 if (result != ISC_R_SUCCESS) {
2444                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2445                                         return (result);
2446                                 }
2447                         }
2448                 } else if (result != ISC_R_EXISTS) {
2449                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2450                         return (result);
2451                 }
2452         }
2453         reactivate_node(rbtdb, node, locktype);
2454         RWUNLOCK(&rbtdb->tree_lock, locktype);
2455
2456         *nodep = (dns_dbnode_t *)node;
2457
2458         return (ISC_R_SUCCESS);
2459 }
2460
2461 static isc_result_t
2462 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2463               dns_dbnode_t **nodep)
2464 {
2465         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2466         dns_rbtnode_t *node = NULL;
2467         dns_name_t nodename;
2468         isc_result_t result;
2469         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2470
2471         REQUIRE(VALID_RBTDB(rbtdb));
2472
2473         dns_name_init(&nodename, NULL);
2474         RWLOCK(&rbtdb->tree_lock, locktype);
2475         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2476                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2477         if (result != ISC_R_SUCCESS) {
2478                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2479                 if (!create) {
2480                         if (result == DNS_R_PARTIALMATCH)
2481                                 result = ISC_R_NOTFOUND;
2482                         return (result);
2483                 }
2484                 /*
2485                  * It would be nice to try to upgrade the lock instead of
2486                  * unlocking then relocking.
2487                  */
2488                 locktype = isc_rwlocktype_write;
2489                 RWLOCK(&rbtdb->tree_lock, locktype);
2490                 node = NULL;
2491                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2492                 if (result == ISC_R_SUCCESS) {
2493                         dns_rbt_namefromnode(node, &nodename);
2494 #ifdef DNS_RBT_USEHASH
2495                         node->locknum = node->hashval % rbtdb->node_lock_count;
2496 #else
2497                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2498                                 rbtdb->node_lock_count;
2499 #endif
2500                         node->nsec3 = 1U;
2501                 } else if (result != ISC_R_EXISTS) {
2502                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2503                         return (result);
2504                 }
2505         } else
2506                 INSIST(node->nsec3);
2507         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2508         new_reference(rbtdb, node);
2509         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2510         RWUNLOCK(&rbtdb->tree_lock, locktype);
2511
2512         *nodep = (dns_dbnode_t *)node;
2513
2514         return (ISC_R_SUCCESS);
2515 }
2516
2517 static isc_result_t
2518 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2519         rbtdb_search_t *search = arg;
2520         rdatasetheader_t *header, *header_next;
2521         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2522         rdatasetheader_t *found;
2523         isc_result_t result;
2524         dns_rbtnode_t *onode;
2525
2526         /*
2527          * We only want to remember the topmost zone cut, since it's the one
2528          * that counts, so we'll just continue if we've already found a
2529          * zonecut.
2530          */
2531         if (search->zonecut != NULL)
2532                 return (DNS_R_CONTINUE);
2533
2534         found = NULL;
2535         result = DNS_R_CONTINUE;
2536         onode = search->rbtdb->origin_node;
2537
2538         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2539                   isc_rwlocktype_read);
2540
2541         /*
2542          * Look for an NS or DNAME rdataset active in our version.
2543          */
2544         ns_header = NULL;
2545         dname_header = NULL;
2546         sigdname_header = NULL;
2547         for (header = node->data; header != NULL; header = header_next) {
2548                 header_next = header->next;
2549                 if (header->type == dns_rdatatype_ns ||
2550                     header->type == dns_rdatatype_dname ||
2551                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2552                         do {
2553                                 if (header->serial <= search->serial &&
2554                                     !IGNORE(header)) {
2555                                         /*
2556                                          * Is this a "this rdataset doesn't
2557                                          * exist" record?
2558                                          */
2559                                         if (NONEXISTENT(header))
2560                                                 header = NULL;
2561                                         break;
2562                                 } else
2563                                         header = header->down;
2564                         } while (header != NULL);
2565                         if (header != NULL) {
2566                                 if (header->type == dns_rdatatype_dname)
2567                                         dname_header = header;
2568                                 else if (header->type ==
2569                                            RBTDB_RDATATYPE_SIGDNAME)
2570                                         sigdname_header = header;
2571                                 else if (node != onode ||
2572                                          IS_STUB(search->rbtdb)) {
2573                                         /*
2574                                          * We've found an NS rdataset that
2575                                          * isn't at the origin node.  We check
2576                                          * that they're not at the origin node,
2577                                          * because otherwise we'd erroneously
2578                                          * treat the zone top as if it were
2579                                          * a delegation.
2580                                          */
2581                                         ns_header = header;
2582                                 }
2583                         }
2584                 }
2585         }
2586
2587         /*
2588          * Did we find anything?
2589          */
2590         if (dname_header != NULL) {
2591                 /*
2592                  * Note that DNAME has precedence over NS if both exist.
2593                  */
2594                 found = dname_header;
2595                 search->zonecut_sigrdataset = sigdname_header;
2596         } else if (ns_header != NULL) {
2597                 found = ns_header;
2598                 search->zonecut_sigrdataset = NULL;
2599         }
2600
2601         if (found != NULL) {
2602                 /*
2603                  * We increment the reference count on node to ensure that
2604                  * search->zonecut_rdataset will still be valid later.
2605                  */
2606                 new_reference(search->rbtdb, node);
2607                 search->zonecut = node;
2608                 search->zonecut_rdataset = found;
2609                 search->need_cleanup = ISC_TRUE;
2610                 /*
2611                  * Since we've found a zonecut, anything beneath it is
2612                  * glue and is not subject to wildcard matching, so we
2613                  * may clear search->wild.
2614                  */
2615                 search->wild = ISC_FALSE;
2616                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2617                         /*
2618                          * If the caller does not want to find glue, then
2619                          * this is the best answer and the search should
2620                          * stop now.
2621                          */
2622                         result = DNS_R_PARTIALMATCH;
2623                 } else {
2624                         dns_name_t *zcname;
2625
2626                         /*
2627                          * The search will continue beneath the zone cut.
2628                          * This may or may not be the best match.  In case it
2629                          * is, we need to remember the node name.
2630                          */
2631                         zcname = dns_fixedname_name(&search->zonecut_name);
2632                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2633                                       ISC_R_SUCCESS);
2634                         search->copy_name = ISC_TRUE;
2635                 }
2636         } else {
2637                 /*
2638                  * There is no zonecut at this node which is active in this
2639                  * version.
2640                  *
2641                  * If this is a "wild" node and the caller hasn't disabled
2642                  * wildcard matching, remember that we've seen a wild node
2643                  * in case we need to go searching for wildcard matches
2644                  * later on.
2645                  */
2646                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2647                         search->wild = ISC_TRUE;
2648         }
2649
2650         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2651                     isc_rwlocktype_read);
2652
2653         return (result);
2654 }
2655
2656 static inline void
2657 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2658               rdatasetheader_t *header, isc_stdtime_t now,
2659               dns_rdataset_t *rdataset)
2660 {
2661         unsigned char *raw;     /* RDATASLAB */
2662
2663         /*
2664          * Caller must be holding the node reader lock.
2665          * XXXJT: technically, we need a writer lock, since we'll increment
2666          * the header count below.  However, since the actual counter value
2667          * doesn't matter, we prioritize performance here.  (We may want to
2668          * use atomic increment when available).
2669          */
2670
2671         if (rdataset == NULL)
2672                 return;
2673
2674         new_reference(rbtdb, node);
2675
2676         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2677
2678         rdataset->methods = &rdataset_methods;
2679         rdataset->rdclass = rbtdb->common.rdclass;
2680         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2681         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2682         rdataset->ttl = header->rdh_ttl - now;
2683         rdataset->trust = header->trust;
2684         if (NXDOMAIN(header))
2685                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2686         if (OPTOUT(header))
2687                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2688         rdataset->private1 = rbtdb;
2689         rdataset->private2 = node;
2690         raw = (unsigned char *)header + sizeof(*header);
2691         rdataset->private3 = raw;
2692         rdataset->count = header->count++;
2693         if (rdataset->count == ISC_UINT32_MAX)
2694                 rdataset->count = 0;
2695
2696         /*
2697          * Reset iterator state.
2698          */
2699         rdataset->privateuint4 = 0;
2700         rdataset->private5 = NULL;
2701
2702         /*
2703          * Add noqname proof.
2704          */
2705         rdataset->private6 = header->noqname;
2706         if (rdataset->private6 != NULL)
2707                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2708         rdataset->private7 = header->closest;
2709         if (rdataset->private7 != NULL)
2710                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2711
2712         /*
2713          * Copy out re-signing information.
2714          */
2715         if (RESIGN(header)) {
2716                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2717                 rdataset->resign = header->resign;
2718         } else
2719                 rdataset->resign = 0;
2720 }
2721
2722 static inline isc_result_t
2723 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2724                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2725                  dns_rdataset_t *sigrdataset)
2726 {
2727         isc_result_t result;
2728         dns_name_t *zcname;
2729         rbtdb_rdatatype_t type;
2730         dns_rbtnode_t *node;
2731
2732         /*
2733          * The caller MUST NOT be holding any node locks.
2734          */
2735
2736         node = search->zonecut;
2737         type = search->zonecut_rdataset->type;
2738
2739         /*
2740          * If we have to set foundname, we do it before anything else.
2741          * If we were to set foundname after we had set nodep or bound the
2742          * rdataset, then we'd have to undo that work if dns_name_copy()
2743          * failed.  By setting foundname first, there's nothing to undo if
2744          * we have trouble.
2745          */
2746         if (foundname != NULL && search->copy_name) {
2747                 zcname = dns_fixedname_name(&search->zonecut_name);
2748                 result = dns_name_copy(zcname, foundname, NULL);
2749                 if (result != ISC_R_SUCCESS)
2750                         return (result);
2751         }
2752         if (nodep != NULL) {
2753                 /*
2754                  * Note that we don't have to increment the node's reference
2755                  * count here because we're going to use the reference we
2756                  * already have in the search block.
2757                  */
2758                 *nodep = node;
2759                 search->need_cleanup = ISC_FALSE;
2760         }
2761         if (rdataset != NULL) {
2762                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2763                           isc_rwlocktype_read);
2764                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2765                               search->now, rdataset);
2766                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2767                         bind_rdataset(search->rbtdb, node,
2768                                       search->zonecut_sigrdataset,
2769                                       search->now, sigrdataset);
2770                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2771                             isc_rwlocktype_read);
2772         }
2773
2774         if (type == dns_rdatatype_dname)
2775                 return (DNS_R_DNAME);
2776         return (DNS_R_DELEGATION);
2777 }
2778
2779 static inline isc_boolean_t
2780 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2781            dns_rbtnode_t *node)
2782 {
2783         unsigned char *raw;     /* RDATASLAB */
2784         unsigned int count, size;
2785         dns_name_t ns_name;
2786         isc_boolean_t valid = ISC_FALSE;
2787         dns_offsets_t offsets;
2788         isc_region_t region;
2789         rdatasetheader_t *header;
2790
2791         /*
2792          * No additional locking is required.
2793          */
2794
2795         /*
2796          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2797          * if it occurs at a zone cut, but is not valid below it.
2798          */
2799         if (type == dns_rdatatype_ns) {
2800                 if (node != search->zonecut) {
2801                         return (ISC_FALSE);
2802                 }
2803         } else if (type != dns_rdatatype_a &&
2804                    type != dns_rdatatype_aaaa &&
2805                    type != dns_rdatatype_a6) {
2806                 return (ISC_FALSE);
2807         }
2808
2809         header = search->zonecut_rdataset;
2810         raw = (unsigned char *)header + sizeof(*header);
2811         count = raw[0] * 256 + raw[1];
2812 #if DNS_RDATASET_FIXED
2813         raw += 2 + (4 * count);
2814 #else
2815         raw += 2;
2816 #endif
2817
2818         while (count > 0) {
2819                 count--;
2820                 size = raw[0] * 256 + raw[1];
2821 #if DNS_RDATASET_FIXED
2822                 raw += 4;
2823 #else
2824                 raw += 2;
2825 #endif
2826                 region.base = raw;
2827                 region.length = size;
2828                 raw += size;
2829                 /*
2830                  * XXX Until we have rdata structures, we have no choice but
2831                  * to directly access the rdata format.
2832                  */
2833                 dns_name_init(&ns_name, offsets);
2834                 dns_name_fromregion(&ns_name, &region);
2835                 if (dns_name_compare(&ns_name, name) == 0) {
2836                         valid = ISC_TRUE;
2837                         break;
2838                 }
2839         }
2840
2841         return (valid);
2842 }
2843
2844 static inline isc_boolean_t
2845 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2846             dns_name_t *name)
2847 {
2848         dns_fixedname_t fnext;
2849         dns_fixedname_t forigin;
2850         dns_name_t *next;
2851         dns_name_t *origin;
2852         dns_name_t prefix;
2853         dns_rbtdb_t *rbtdb;
2854         dns_rbtnode_t *node;
2855         isc_result_t result;
2856         isc_boolean_t answer = ISC_FALSE;
2857         rdatasetheader_t *header;
2858
2859         rbtdb = search->rbtdb;
2860
2861         dns_name_init(&prefix, NULL);
2862         dns_fixedname_init(&fnext);
2863         next = dns_fixedname_name(&fnext);
2864         dns_fixedname_init(&forigin);
2865         origin = dns_fixedname_name(&forigin);
2866
2867         result = dns_rbtnodechain_next(chain, NULL, NULL);
2868         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2869                 node = NULL;
2870                 result = dns_rbtnodechain_current(chain, &prefix,
2871                                                   origin, &node);
2872                 if (result != ISC_R_SUCCESS)
2873                         break;
2874                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2875                           isc_rwlocktype_read);
2876                 for (header = node->data;
2877                      header != NULL;
2878                      header = header->next) {
2879                         if (header->serial <= search->serial &&
2880                             !IGNORE(header) && EXISTS(header))
2881                                 break;
2882                 }
2883                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2884                             isc_rwlocktype_read);
2885                 if (header != NULL)
2886                         break;
2887                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2888         }
2889         if (result == ISC_R_SUCCESS)
2890                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2891         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2892                 answer = ISC_TRUE;
2893         return (answer);
2894 }
2895
2896 static inline isc_boolean_t
2897 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2898         dns_fixedname_t fnext;
2899         dns_fixedname_t forigin;
2900         dns_fixedname_t fprev;
2901         dns_name_t *next;
2902         dns_name_t *origin;
2903         dns_name_t *prev;
2904         dns_name_t name;
2905         dns_name_t rname;
2906         dns_name_t tname;
2907         dns_rbtdb_t *rbtdb;
2908         dns_rbtnode_t *node;
2909         dns_rbtnodechain_t chain;
2910         isc_boolean_t check_next = ISC_TRUE;
2911         isc_boolean_t check_prev = ISC_TRUE;
2912         isc_boolean_t answer = ISC_FALSE;
2913         isc_result_t result;
2914         rdatasetheader_t *header;
2915         unsigned int n;
2916
2917         rbtdb = search->rbtdb;
2918
2919         dns_name_init(&name, NULL);
2920         dns_name_init(&tname, NULL);
2921         dns_name_init(&rname, NULL);
2922         dns_fixedname_init(&fnext);
2923         next = dns_fixedname_name(&fnext);
2924         dns_fixedname_init(&fprev);
2925         prev = dns_fixedname_name(&fprev);
2926         dns_fixedname_init(&forigin);
2927         origin = dns_fixedname_name(&forigin);
2928
2929         /*
2930          * Find if qname is at or below a empty node.
2931          * Use our own copy of the chain.
2932          */
2933
2934         chain = search->chain;
2935         do {
2936                 node = NULL;
2937                 result = dns_rbtnodechain_current(&chain, &name,
2938                                                   origin, &node);
2939                 if (result != ISC_R_SUCCESS)
2940                         break;
2941                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2942                           isc_rwlocktype_read);
2943                 for (header = node->data;
2944                      header != NULL;
2945                      header = header->next) {
2946                         if (header->serial <= search->serial &&
2947                             !IGNORE(header) && EXISTS(header))
2948                                 break;
2949                 }
2950                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2951                             isc_rwlocktype_read);
2952                 if (header != NULL)
2953                         break;
2954                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
2955         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
2956         if (result == ISC_R_SUCCESS)
2957                 result = dns_name_concatenate(&name, origin, prev, NULL);
2958         if (result != ISC_R_SUCCESS)
2959                 check_prev = ISC_FALSE;
2960
2961         result = dns_rbtnodechain_next(&chain, NULL, NULL);
2962         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2963                 node = NULL;
2964                 result = dns_rbtnodechain_current(&chain, &name,
2965                                                   origin, &node);
2966                 if (result != ISC_R_SUCCESS)
2967                         break;
2968                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2969                           isc_rwlocktype_read);
2970                 for (header = node->data;
2971                      header != NULL;
2972                      header = header->next) {
2973                         if (header->serial <= search->serial &&
2974                             !IGNORE(header) && EXISTS(header))
2975                                 break;
2976                 }
2977                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2978                             isc_rwlocktype_read);
2979                 if (header != NULL)
2980                         break;
2981                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
2982         }
2983         if (result == ISC_R_SUCCESS)
2984                 result = dns_name_concatenate(&name, origin, next, NULL);
2985         if (result != ISC_R_SUCCESS)
2986                 check_next = ISC_FALSE;
2987
2988         dns_name_clone(qname, &rname);
2989
2990         /*
2991          * Remove the wildcard label to find the terminal name.
2992          */
2993         n = dns_name_countlabels(wname);
2994         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
2995
2996         do {
2997                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
2998                     (check_next && dns_name_issubdomain(next, &rname))) {
2999                         answer = ISC_TRUE;
3000                         break;
3001                 }
3002                 /*
3003                  * Remove the left hand label.
3004                  */
3005                 n = dns_name_countlabels(&rname);
3006                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3007         } while (!dns_name_equal(&rname, &tname));
3008         return (answer);
3009 }
3010
3011 static inline isc_result_t
3012 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3013               dns_name_t *qname)
3014 {
3015         unsigned int i, j;
3016         dns_rbtnode_t *node, *level_node, *wnode;
3017         rdatasetheader_t *header;
3018         isc_result_t result = ISC_R_NOTFOUND;
3019         dns_name_t name;
3020         dns_name_t *wname;
3021         dns_fixedname_t fwname;
3022         dns_rbtdb_t *rbtdb;
3023         isc_boolean_t done, wild, active;
3024         dns_rbtnodechain_t wchain;
3025
3026         /*
3027          * Caller must be holding the tree lock and MUST NOT be holding
3028          * any node locks.
3029          */
3030
3031         /*
3032          * Examine each ancestor level.  If the level's wild bit
3033          * is set, then construct the corresponding wildcard name and
3034          * search for it.  If the wildcard node exists, and is active in
3035          * this version, we're done.  If not, then we next check to see
3036          * if the ancestor is active in this version.  If so, then there
3037          * can be no possible wildcard match and again we're done.  If not,
3038          * continue the search.
3039          */
3040
3041         rbtdb = search->rbtdb;
3042         i = search->chain.level_matches;
3043         done = ISC_FALSE;
3044         node = *nodep;
3045         do {
3046                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3047                           isc_rwlocktype_read);
3048
3049                 /*
3050                  * First we try to figure out if this node is active in
3051                  * the search's version.  We do this now, even though we
3052                  * may not need the information, because it simplifies the
3053                  * locking and code flow.
3054                  */
3055                 for (header = node->data;
3056                      header != NULL;
3057                      header = header->next) {
3058                         if (header->serial <= search->serial &&
3059                             !IGNORE(header) && EXISTS(header))
3060                                 break;
3061                 }
3062                 if (header != NULL)
3063                         active = ISC_TRUE;
3064                 else
3065                         active = ISC_FALSE;
3066
3067                 if (node->wild)
3068                         wild = ISC_TRUE;
3069                 else
3070                         wild = ISC_FALSE;
3071
3072                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3073                             isc_rwlocktype_read);
3074
3075                 if (wild) {
3076                         /*
3077                          * Construct the wildcard name for this level.
3078                          */
3079                         dns_name_init(&name, NULL);
3080                         dns_rbt_namefromnode(node, &name);
3081                         dns_fixedname_init(&fwname);
3082                         wname = dns_fixedname_name(&fwname);
3083                         result = dns_name_concatenate(dns_wildcardname, &name,
3084                                                       wname, NULL);
3085                         j = i;
3086                         while (result == ISC_R_SUCCESS && j != 0) {
3087                                 j--;
3088                                 level_node = search->chain.levels[j];
3089                                 dns_name_init(&name, NULL);
3090                                 dns_rbt_namefromnode(level_node, &name);
3091                                 result = dns_name_concatenate(wname,
3092                                                               &name,
3093                                                               wname,
3094                                                               NULL);
3095                         }
3096                         if (result != ISC_R_SUCCESS)
3097                                 break;
3098
3099                         wnode = NULL;
3100                         dns_rbtnodechain_init(&wchain, NULL);
3101                         result = dns_rbt_findnode(rbtdb->tree, wname,
3102                                                   NULL, &wnode, &wchain,
3103                                                   DNS_RBTFIND_EMPTYDATA,
3104                                                   NULL, NULL);
3105                         if (result == ISC_R_SUCCESS) {
3106                                 nodelock_t *lock;
3107
3108                                 /*
3109                                  * We have found the wildcard node.  If it
3110                                  * is active in the search's version, we're
3111                                  * done.
3112                                  */
3113                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3114                                 NODE_LOCK(lock, isc_rwlocktype_read);
3115                                 for (header = wnode->data;
3116                                      header != NULL;
3117                                      header = header->next) {
3118                                         if (header->serial <= search->serial &&
3119                                             !IGNORE(header) && EXISTS(header))
3120                                                 break;
3121                                 }
3122                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3123                                 if (header != NULL ||
3124                                     activeempty(search, &wchain, wname)) {
3125                                         if (activeemtpynode(search, qname,
3126                                                             wname)) {
3127                                                 return (ISC_R_NOTFOUND);
3128                                         }
3129                                         /*
3130                                          * The wildcard node is active!
3131                                          *
3132                                          * Note: result is still ISC_R_SUCCESS
3133                                          * so we don't have to set it.
3134                                          */
3135                                         *nodep = wnode;
3136                                         break;
3137                                 }
3138                         } else if (result != ISC_R_NOTFOUND &&
3139                                    result != DNS_R_PARTIALMATCH) {
3140                                 /*
3141                                  * An error has occurred.  Bail out.
3142                                  */
3143                                 break;
3144                         }
3145                 }
3146
3147                 if (active) {
3148                         /*
3149                          * The level node is active.  Any wildcarding
3150                          * present at higher levels has no
3151                          * effect and we're done.
3152                          */
3153                         result = ISC_R_NOTFOUND;
3154                         break;
3155                 }
3156
3157                 if (i > 0) {
3158                         i--;
3159                         node = search->chain.levels[i];
3160                 } else
3161                         done = ISC_TRUE;
3162         } while (!done);
3163
3164         return (result);
3165 }
3166
3167 static isc_boolean_t
3168 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3169 {
3170         dns_rdata_t rdata = DNS_RDATA_INIT;
3171         dns_rdata_nsec3_t nsec3;
3172         unsigned char *raw;                     /* RDATASLAB */
3173         unsigned int rdlen, count;
3174         isc_region_t region;
3175         isc_result_t result;
3176
3177         REQUIRE(header->type == dns_rdatatype_nsec3);
3178
3179         raw = (unsigned char *)header + sizeof(*header);
3180         count = raw[0] * 256 + raw[1]; /* count */
3181 #if DNS_RDATASET_FIXED
3182         raw += count * 4 + 2;
3183 #else
3184         raw += 2;
3185 #endif
3186         while (count-- > 0) {
3187                 rdlen = raw[0] * 256 + raw[1];
3188 #if DNS_RDATASET_FIXED
3189                 raw += 4;
3190 #else
3191                 raw += 2;
3192 #endif
3193                 region.base = raw;
3194                 region.length = rdlen;
3195                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3196                                      dns_rdatatype_nsec3, &region);
3197                 raw += rdlen;
3198                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3199                 INSIST(result == ISC_R_SUCCESS);
3200                 if (nsec3.hash == search->rbtversion->hash &&
3201                     nsec3.iterations == search->rbtversion->iterations &&
3202                     nsec3.salt_length == search->rbtversion->salt_length &&
3203                     memcmp(nsec3.salt, search->rbtversion->salt,
3204                            nsec3.salt_length) == 0)
3205                         return (ISC_TRUE);
3206                 dns_rdata_reset(&rdata);
3207         }
3208         return (ISC_FALSE);
3209 }
3210
3211 /*
3212  * Find node of the NSEC/NSEC3 record that is 'name'.
3213  */
3214 static inline isc_result_t
3215 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3216                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3217                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3218                   dns_db_secure_t secure)
3219 {
3220         dns_rbtnode_t *node;
3221         rdatasetheader_t *header, *header_next, *found, *foundsig;
3222         isc_boolean_t empty_node;
3223         isc_result_t result;
3224         dns_fixedname_t fname, forigin;
3225         dns_name_t *name, *origin;
3226         dns_rdatatype_t type;
3227         rbtdb_rdatatype_t sigtype;
3228         isc_boolean_t wraps;
3229         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3230
3231         if (tree == search->rbtdb->nsec3) {
3232                 type = dns_rdatatype_nsec3;
3233                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3234                 wraps = ISC_TRUE;
3235         } else {
3236                 type = dns_rdatatype_nsec;
3237                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3238                 wraps = ISC_FALSE;
3239         }
3240
3241  again:
3242         do {
3243                 node = NULL;
3244                 dns_fixedname_init(&fname);
3245                 name = dns_fixedname_name(&fname);
3246                 dns_fixedname_init(&forigin);
3247                 origin = dns_fixedname_name(&forigin);
3248                 result = dns_rbtnodechain_current(&search->chain, name,
3249                                                   origin, &node);
3250                 if (result != ISC_R_SUCCESS)
3251                         return (result);
3252                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3253                           isc_rwlocktype_read);
3254                 found = NULL;
3255                 foundsig = NULL;
3256                 empty_node = ISC_TRUE;
3257                 for (header = node->data;
3258                      header != NULL;
3259                      header = header_next) {
3260                         header_next = header->next;
3261                         /*
3262                          * Look for an active, extant NSEC or RRSIG NSEC.
3263                          */
3264                         do {
3265                                 if (header->serial <= search->serial &&
3266                                     !IGNORE(header)) {
3267                                         /*
3268                                          * Is this a "this rdataset doesn't
3269                                          * exist" record?
3270                                          */
3271                                         if (NONEXISTENT(header))
3272                                                 header = NULL;
3273                                         break;
3274                                 } else
3275                                         header = header->down;
3276                         } while (header != NULL);
3277                         if (header != NULL) {
3278                                 /*
3279                                  * We now know that there is at least one
3280                                  * active rdataset at this node.
3281                                  */
3282                                 empty_node = ISC_FALSE;
3283                                 if (header->type == type) {
3284                                         found = header;
3285                                         if (foundsig != NULL)
3286                                                 break;
3287                                 } else if (header->type == sigtype) {
3288                                         foundsig = header;
3289                                         if (found != NULL)
3290                                                 break;
3291                                 }
3292                         }
3293                 }
3294                 if (!empty_node) {
3295                         if (found != NULL && search->rbtversion->havensec3 &&
3296                             found->type == dns_rdatatype_nsec3 &&
3297                             !matchparams(found, search)) {
3298                                 empty_node = ISC_TRUE;
3299                                 found = NULL;
3300                                 foundsig = NULL;
3301                                 result = dns_rbtnodechain_prev(&search->chain,
3302                                                                NULL, NULL);
3303                         } else if (found != NULL &&
3304                                    (foundsig != NULL || !need_sig))
3305                         {
3306                                 /*
3307                                  * We've found the right NSEC/NSEC3 record.
3308                                  *
3309                                  * Note: for this to really be the right
3310                                  * NSEC record, it's essential that the NSEC
3311                                  * records of any nodes obscured by a zone
3312                                  * cut have been removed; we assume this is
3313                                  * the case.
3314                                  */
3315                                 result = dns_name_concatenate(name, origin,
3316                                                               foundname, NULL);
3317                                 if (result == ISC_R_SUCCESS) {
3318                                         if (nodep != NULL) {
3319                                                 new_reference(search->rbtdb,
3320                                                               node);
3321                                                 *nodep = node;
3322                                         }
3323                                         bind_rdataset(search->rbtdb, node,
3324                                                       found, search->now,
3325                                                       rdataset);
3326                                         if (foundsig != NULL)
3327                                                 bind_rdataset(search->rbtdb,
3328                                                               node,
3329                                                               foundsig,
3330                                                               search->now,
3331                                                               sigrdataset);
3332                                 }
3333                         } else if (found == NULL && foundsig == NULL) {
3334                                 /*
3335                                  * This node is active, but has no NSEC or
3336                                  * RRSIG NSEC.  That means it's glue or
3337                                  * other obscured zone data that isn't
3338                                  * relevant for our search.  Treat the
3339                                  * node as if it were empty and keep looking.
3340                                  */
3341                                 empty_node = ISC_TRUE;
3342                                 result = dns_rbtnodechain_prev(&search->chain,
3343                                                                NULL, NULL);
3344                         } else {
3345                                 /*
3346                                  * We found an active node, but either the
3347                                  * NSEC or the RRSIG NSEC is missing.  This
3348                                  * shouldn't happen.
3349                                  */
3350                                 result = DNS_R_BADDB;
3351                         }
3352                 } else {
3353                         /*
3354                          * This node isn't active.  We've got to keep
3355                          * looking.
3356                          */
3357                         result = dns_rbtnodechain_prev(&search->chain, NULL,
3358                                                        NULL);
3359                 }
3360                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3361                             isc_rwlocktype_read);
3362         } while (empty_node && result == ISC_R_SUCCESS);
3363
3364         if (result == ISC_R_NOMORE && wraps) {
3365                 result = dns_rbtnodechain_last(&search->chain, tree,
3366                                                NULL, NULL);
3367                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3368                         wraps = ISC_FALSE;
3369                         goto again;
3370                 }
3371         }
3372
3373         /*
3374          * If the result is ISC_R_NOMORE, then we got to the beginning of
3375          * the database and didn't find a NSEC record.  This shouldn't
3376          * happen.
3377          */
3378         if (result == ISC_R_NOMORE)
3379                 result = DNS_R_BADDB;
3380
3381         return (result);
3382 }
3383
3384 static isc_result_t
3385 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3386           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3387           dns_dbnode_t **nodep, dns_name_t *foundname,
3388           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3389 {
3390         dns_rbtnode_t *node = NULL;
3391         isc_result_t result;
3392         rbtdb_search_t search;
3393         isc_boolean_t cname_ok = ISC_TRUE;
3394         isc_boolean_t close_version = ISC_FALSE;
3395         isc_boolean_t maybe_zonecut = ISC_FALSE;
3396         isc_boolean_t at_zonecut = ISC_FALSE;
3397         isc_boolean_t wild;
3398         isc_boolean_t empty_node;
3399         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3400         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3401         rbtdb_rdatatype_t sigtype;
3402         isc_boolean_t active;
3403         dns_rbtnodechain_t chain;
3404         nodelock_t *lock;
3405         dns_rbt_t *tree;
3406
3407         search.rbtdb = (dns_rbtdb_t *)db;
3408
3409         REQUIRE(VALID_RBTDB(search.rbtdb));
3410
3411         /*
3412          * We don't care about 'now'.
3413          */
3414         UNUSED(now);
3415
3416         /*
3417          * If the caller didn't supply a version, attach to the current
3418          * version.
3419          */
3420         if (version == NULL) {
3421                 currentversion(db, &version);
3422                 close_version = ISC_TRUE;
3423         }
3424
3425         search.rbtversion = version;
3426         search.serial = search.rbtversion->serial;
3427         search.options = options;
3428         search.copy_name = ISC_FALSE;
3429         search.need_cleanup = ISC_FALSE;
3430         search.wild = ISC_FALSE;
3431         search.zonecut = NULL;
3432         dns_fixedname_init(&search.zonecut_name);
3433         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3434         search.now = 0;
3435
3436         /*
3437          * 'wild' will be true iff. we've matched a wildcard.
3438          */
3439         wild = ISC_FALSE;
3440
3441         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3442
3443         /*
3444          * Search down from the root of the tree.  If, while going down, we
3445          * encounter a callback node, zone_zonecut_callback() will search the
3446          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3447          */
3448         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3449                                                          search.rbtdb->tree;
3450         result = dns_rbt_findnode(tree, name, foundname, &node,
3451                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3452                                   zone_zonecut_callback, &search);
3453
3454         if (result == DNS_R_PARTIALMATCH) {
3455         partial_match:
3456                 if (search.zonecut != NULL) {
3457                     result = setup_delegation(&search, nodep, foundname,
3458                                               rdataset, sigrdataset);
3459                     goto tree_exit;
3460                 }
3461
3462                 if (search.wild) {
3463                         /*
3464                          * At least one of the levels in the search chain
3465                          * potentially has a wildcard.  For each such level,
3466                          * we must see if there's a matching wildcard active
3467                          * in the current version.
3468                          */
3469                         result = find_wildcard(&search, &node, name);
3470                         if (result == ISC_R_SUCCESS) {
3471                                 result = dns_name_copy(name, foundname, NULL);
3472                                 if (result != ISC_R_SUCCESS)
3473                                         goto tree_exit;
3474                                 wild = ISC_TRUE;
3475                                 goto found;
3476                         }
3477                         else if (result != ISC_R_NOTFOUND)
3478                                 goto tree_exit;
3479                 }
3480
3481                 chain = search.chain;
3482                 active = activeempty(&search, &chain, name);
3483
3484                 /*
3485                  * If we're here, then the name does not exist, is not
3486                  * beneath a zonecut, and there's no matching wildcard.
3487                  */
3488                 if ((search.rbtversion->secure == dns_db_secure &&
3489                      !search.rbtversion->havensec3) ||
3490                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3491                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3492                 {
3493                         result = find_closest_nsec(&search, nodep, foundname,
3494                                                    rdataset, sigrdataset, tree,
3495                                                    search.rbtversion->secure);
3496                         if (result == ISC_R_SUCCESS)
3497                                 result = active ? DNS_R_EMPTYNAME :
3498                                                   DNS_R_NXDOMAIN;
3499                 } else
3500                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3501                 goto tree_exit;
3502         } else if (result != ISC_R_SUCCESS)
3503                 goto tree_exit;
3504
3505  found:
3506         /*
3507          * We have found a node whose name is the desired name, or we
3508          * have matched a wildcard.
3509          */
3510
3511         if (search.zonecut != NULL) {
3512                 /*
3513                  * If we're beneath a zone cut, we don't want to look for
3514                  * CNAMEs because they're not legitimate zone glue.
3515                  */
3516                 cname_ok = ISC_FALSE;
3517         } else {
3518                 /*
3519                  * The node may be a zone cut itself.  If it might be one,
3520                  * make sure we check for it later.
3521                  *
3522                  * DS records live above the zone cut in ordinary zone so
3523                  * we want to ignore any referral.
3524                  *
3525                  * Stub zones don't have anything "above" the delgation so
3526                  * we always return a referral.
3527                  */
3528                 if (node->find_callback &&
3529                     ((node != search.rbtdb->origin_node &&
3530                       !dns_rdatatype_atparent(type)) ||
3531                      IS_STUB(search.rbtdb)))
3532                         maybe_zonecut = ISC_TRUE;
3533         }
3534
3535         /*
3536          * Certain DNSSEC types are not subject to CNAME matching
3537          * (RFC4035, section 2.5 and RFC3007).
3538          *
3539          * We don't check for RRSIG, because we don't store RRSIG records
3540          * directly.
3541          */
3542         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3543                 cname_ok = ISC_FALSE;
3544
3545         /*
3546          * We now go looking for rdata...
3547          */
3548
3549         lock = &search.rbtdb->node_locks[node->locknum].lock;
3550         NODE_LOCK(lock, isc_rwlocktype_read);
3551
3552         found = NULL;
3553         foundsig = NULL;
3554         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3555         nsecheader = NULL;
3556         nsecsig = NULL;
3557         cnamesig = NULL;
3558         empty_node = ISC_TRUE;
3559         for (header = node->data; header != NULL; header = header_next) {
3560                 header_next = header->next;
3561                 /*
3562                  * Look for an active, extant rdataset.
3563                  */
3564                 do {
3565                         if (header->serial <= search.serial &&
3566                             !IGNORE(header)) {
3567                                 /*
3568                                  * Is this a "this rdataset doesn't
3569                                  * exist" record?
3570                                  */
3571                                 if (NONEXISTENT(header))
3572                                         header = NULL;
3573                                 break;
3574                         } else
3575                                 header = header->down;
3576                 } while (header != NULL);
3577                 if (header != NULL) {
3578                         /*
3579                          * We now know that there is at least one active
3580                          * rdataset at this node.
3581                          */
3582                         empty_node = ISC_FALSE;
3583
3584                         /*
3585                          * Do special zone cut handling, if requested.
3586                          */
3587                         if (maybe_zonecut &&
3588                             header->type == dns_rdatatype_ns) {
3589                                 /*
3590                                  * We increment the reference count on node to
3591                                  * ensure that search->zonecut_rdataset will
3592                                  * still be valid later.
3593                                  */
3594                                 new_reference(search.rbtdb, node);
3595                                 search.zonecut = node;
3596                                 search.zonecut_rdataset = header;
3597                                 search.zonecut_sigrdataset = NULL;
3598                                 search.need_cleanup = ISC_TRUE;
3599                                 maybe_zonecut = ISC_FALSE;
3600                                 at_zonecut = ISC_TRUE;
3601                                 /*
3602                                  * It is not clear if KEY should still be
3603                                  * allowed at the parent side of the zone
3604                                  * cut or not.  It is needed for RFC3007
3605                                  * validated updates.
3606                                  */
3607                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3608                                     && type != dns_rdatatype_nsec
3609                                     && type != dns_rdatatype_key) {
3610                                         /*
3611                                          * Glue is not OK, but any answer we
3612                                          * could return would be glue.  Return
3613                                          * the delegation.
3614                                          */
3615                                         found = NULL;
3616                                         break;
3617                                 }
3618                                 if (found != NULL && foundsig != NULL)
3619                                         break;
3620                         }
3621
3622
3623                         /*
3624                          * If the NSEC3 record doesn't match the chain
3625                          * we are using behave as if it isn't here.
3626                          */
3627                         if (header->type == dns_rdatatype_nsec3 &&
3628                            !matchparams(header, &search)) {
3629                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3630                                 goto partial_match;
3631                         }
3632                         /*
3633                          * If we found a type we were looking for,
3634                          * remember it.
3635                          */
3636                         if (header->type == type ||
3637                             type == dns_rdatatype_any ||
3638                             (header->type == dns_rdatatype_cname &&
3639                              cname_ok)) {
3640                                 /*
3641                                  * We've found the answer!
3642                                  */
3643                                 found = header;
3644                                 if (header->type == dns_rdatatype_cname &&
3645                                     cname_ok) {
3646                                         /*
3647                                          * We may be finding a CNAME instead
3648                                          * of the desired type.
3649                                          *
3650                                          * If we've already got the CNAME RRSIG,
3651                                          * use it, otherwise change sigtype
3652                                          * so that we find it.
3653                                          */
3654                                         if (cnamesig != NULL)
3655                                                 foundsig = cnamesig;
3656                                         else
3657                                                 sigtype =
3658                                                     RBTDB_RDATATYPE_SIGCNAME;
3659                                 }
3660                                 /*
3661                                  * If we've got all we need, end the search.
3662                                  */
3663                                 if (!maybe_zonecut && foundsig != NULL)
3664                                         break;
3665                         } else if (header->type == sigtype) {
3666                                 /*
3667                                  * We've found the RRSIG rdataset for our
3668                                  * target type.  Remember it.
3669                                  */
3670                                 foundsig = header;
3671                                 /*
3672                                  * If we've got all we need, end the search.
3673                                  */
3674                                 if (!maybe_zonecut && found != NULL)
3675                                         break;
3676                         } else if (header->type == dns_rdatatype_nsec &&
3677                                    !search.rbtversion->havensec3) {
3678                                 /*
3679                                  * Remember a NSEC rdataset even if we're
3680                                  * not specifically looking for it, because
3681                                  * we might need it later.
3682                                  */
3683                                 nsecheader = header;
3684                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3685                                    !search.rbtversion->havensec3) {
3686                                 /*
3687                                  * If we need the NSEC rdataset, we'll also
3688                                  * need its signature.
3689                                  */
3690                                 nsecsig = header;
3691                         } else if (cname_ok &&
3692                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3693                                 /*
3694                                  * If we get a CNAME match, we'll also need
3695                                  * its signature.
3696                                  */
3697                                 cnamesig = header;
3698                         }
3699                 }
3700         }
3701
3702         if (empty_node) {
3703                 /*
3704                  * We have an exact match for the name, but there are no
3705                  * active rdatasets in the desired version.  That means that
3706                  * this node doesn't exist in the desired version, and that
3707                  * we really have a partial match.
3708                  */
3709                 if (!wild) {
3710                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3711                         goto partial_match;
3712                 }
3713         }
3714
3715         /*
3716          * If we didn't find what we were looking for...
3717          */
3718         if (found == NULL) {
3719                 if (search.zonecut != NULL) {
3720                         /*
3721                          * We were trying to find glue at a node beneath a
3722                          * zone cut, but didn't.
3723                          *
3724                          * Return the delegation.
3725                          */
3726                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3727                         result = setup_delegation(&search, nodep, foundname,
3728                                                   rdataset, sigrdataset);
3729                         goto tree_exit;
3730                 }
3731                 /*
3732                  * The desired type doesn't exist.
3733                  */
3734                 result = DNS_R_NXRRSET;
3735                 if (search.rbtversion->secure == dns_db_secure &&
3736                     !search.rbtversion->havensec3 &&
3737                     (nsecheader == NULL || nsecsig == NULL)) {
3738                         /*
3739                          * The zone is secure but there's no NSEC,
3740                          * or the NSEC has no signature!
3741                          */
3742                         if (!wild) {
3743                                 result = DNS_R_BADDB;
3744                                 goto node_exit;
3745                         }
3746
3747                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3748                         result = find_closest_nsec(&search, nodep, foundname,
3749                                                    rdataset, sigrdataset,
3750                                                    search.rbtdb->tree,
3751                                                    search.rbtversion->secure);
3752                         if (result == ISC_R_SUCCESS)
3753                                 result = DNS_R_EMPTYWILD;
3754                         goto tree_exit;
3755                 }
3756                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3757                     nsecheader == NULL)
3758                 {
3759                         /*
3760                          * There's no NSEC record, and we were told
3761                          * to find one.
3762                          */
3763                         result = DNS_R_BADDB;
3764                         goto node_exit;
3765                 }
3766                 if (nodep != NULL) {
3767                         new_reference(search.rbtdb, node);
3768                         *nodep = node;
3769                 }
3770                 if ((search.rbtversion->secure == dns_db_secure &&
3771                      !search.rbtversion->havensec3) ||
3772                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3773                 {
3774                         bind_rdataset(search.rbtdb, node, nsecheader,
3775                                       0, rdataset);
3776                         if (nsecsig != NULL)
3777                                 bind_rdataset(search.rbtdb, node,
3778                                               nsecsig, 0, sigrdataset);
3779                 }
3780                 if (wild)
3781                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3782                 goto node_exit;
3783         }
3784
3785         /*
3786          * We found what we were looking for, or we found a CNAME.
3787          */
3788
3789         if (type != found->type &&
3790             type != dns_rdatatype_any &&
3791             found->type == dns_rdatatype_cname) {
3792                 /*
3793                  * We weren't doing an ANY query and we found a CNAME instead
3794                  * of the type we were looking for, so we need to indicate
3795                  * that result to the caller.
3796                  */
3797                 result = DNS_R_CNAME;
3798         } else if (search.zonecut != NULL) {
3799                 /*
3800                  * If we're beneath a zone cut, we must indicate that the
3801                  * result is glue, unless we're actually at the zone cut
3802                  * and the type is NSEC or KEY.
3803                  */
3804                 if (search.zonecut == node) {
3805                         /*
3806                          * It is not clear if KEY should still be
3807                          * allowed at the parent side of the zone
3808                          * cut or not.  It is needed for RFC3007
3809                          * validated updates.
3810                          */
3811                         if (type == dns_rdatatype_nsec ||
3812                             type == dns_rdatatype_nsec3 ||
3813                             type == dns_rdatatype_key)
3814                                 result = ISC_R_SUCCESS;
3815                         else if (type == dns_rdatatype_any)
3816                                 result = DNS_R_ZONECUT;
3817                         else
3818                                 result = DNS_R_GLUE;
3819                 } else
3820                         result = DNS_R_GLUE;
3821                 /*
3822                  * We might have found data that isn't glue, but was occluded
3823                  * by a dynamic update.  If the caller cares about this, they
3824                  * will have told us to validate glue.
3825                  *
3826                  * XXX We should cache the glue validity state!
3827                  */
3828                 if (result == DNS_R_GLUE &&
3829                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
3830                     !valid_glue(&search, foundname, type, node)) {
3831                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3832                         result = setup_delegation(&search, nodep, foundname,
3833                                                   rdataset, sigrdataset);
3834                     goto tree_exit;
3835                 }
3836         } else {
3837                 /*
3838                  * An ordinary successful query!
3839                  */
3840                 result = ISC_R_SUCCESS;
3841         }
3842
3843         if (nodep != NULL) {
3844                 if (!at_zonecut)
3845                         new_reference(search.rbtdb, node);
3846                 else
3847                         search.need_cleanup = ISC_FALSE;
3848                 *nodep = node;
3849         }
3850
3851         if (type != dns_rdatatype_any) {
3852                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
3853                 if (foundsig != NULL)
3854                         bind_rdataset(search.rbtdb, node, foundsig, 0,
3855                                       sigrdataset);
3856         }
3857
3858         if (wild)
3859                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3860
3861  node_exit:
3862         NODE_UNLOCK(lock, isc_rwlocktype_read);
3863
3864  tree_exit:
3865         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3866
3867         /*
3868          * If we found a zonecut but aren't going to use it, we have to
3869          * let go of it.
3870          */
3871         if (search.need_cleanup) {
3872                 node = search.zonecut;
3873                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
3874
3875                 NODE_LOCK(lock, isc_rwlocktype_read);
3876                 decrement_reference(search.rbtdb, node, 0,
3877                                     isc_rwlocktype_read, isc_rwlocktype_none,
3878                                     ISC_FALSE);
3879                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3880         }
3881
3882         if (close_version)
3883                 closeversion(db, &version, ISC_FALSE);
3884
3885         dns_rbtnodechain_reset(&search.chain);
3886
3887         return (result);
3888 }
3889
3890 static isc_result_t
3891 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
3892                  isc_stdtime_t now, dns_dbnode_t **nodep,
3893                  dns_name_t *foundname,
3894                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3895 {
3896         UNUSED(db);
3897         UNUSED(name);
3898         UNUSED(options);
3899         UNUSED(now);
3900         UNUSED(nodep);
3901         UNUSED(foundname);
3902         UNUSED(rdataset);
3903         UNUSED(sigrdataset);
3904
3905         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
3906
3907         return (ISC_R_NOTIMPLEMENTED);
3908 }
3909
3910 static isc_result_t
3911 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
3912         rbtdb_search_t *search = arg;
3913         rdatasetheader_t *header, *header_prev, *header_next;
3914         rdatasetheader_t *dname_header, *sigdname_header;
3915         isc_result_t result;
3916         nodelock_t *lock;
3917         isc_rwlocktype_t locktype;
3918
3919         /* XXX comment */
3920
3921         REQUIRE(search->zonecut == NULL);
3922
3923         /*
3924          * Keep compiler silent.
3925          */
3926         UNUSED(name);
3927
3928         lock = &(search->rbtdb->node_locks[node->locknum].lock);
3929         locktype = isc_rwlocktype_read;
3930         NODE_LOCK(lock, locktype);
3931
3932         /*
3933          * Look for a DNAME or RRSIG DNAME rdataset.
3934          */
3935         dname_header = NULL;
3936         sigdname_header = NULL;
3937         header_prev = NULL;
3938         for (header = node->data; header != NULL; header = header_next) {
3939                 header_next = header->next;
3940                 if (header->rdh_ttl <= search->now) {
3941                         /*
3942                          * This rdataset is stale.  If no one else is
3943                          * using the node, we can clean it up right
3944                          * now, otherwise we mark it as stale, and
3945                          * the node as dirty, so it will get cleaned
3946                          * up later.
3947                          */
3948                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
3949                             (locktype == isc_rwlocktype_write ||
3950                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
3951                                 /*
3952                                  * We update the node's status only when we
3953                                  * can get write access; otherwise, we leave
3954                                  * others to this work.  Periodical cleaning
3955                                  * will eventually take the job as the last
3956                                  * resort.
3957                                  * We won't downgrade the lock, since other
3958                                  * rdatasets are probably stale, too.
3959                                  */
3960                                 locktype = isc_rwlocktype_write;
3961
3962                                 if (dns_rbtnode_refcurrent(node) == 0) {
3963                                         isc_mem_t *mctx;
3964
3965                                         /*
3966                                          * header->down can be non-NULL if the
3967                                          * refcount has just decremented to 0
3968                                          * but decrement_reference() has not
3969                                          * performed clean_cache_node(), in
3970                                          * which case we need to purge the
3971                                          * stale headers first.
3972                                          */
3973                                         mctx = search->rbtdb->common.mctx;
3974                                         clean_stale_headers(search->rbtdb,
3975                                                             mctx,
3976                                                             header);
3977                                         if (header_prev != NULL)
3978                                                 header_prev->next =
3979                                                         header->next;
3980                                         else
3981                                                 node->data = header->next;
3982                                         free_rdataset(search->rbtdb, mctx,
3983                                                       header);
3984                                 } else {
3985                                         header->attributes |=
3986                                                 RDATASET_ATTR_STALE;
3987                                         node->dirty = 1;
3988                                         header_prev = header;
3989                                 }
3990                         } else
3991                                 header_prev = header;
3992                 } else if (header->type == dns_rdatatype_dname &&
3993                            EXISTS(header)) {
3994                         dname_header = header;
3995                         header_prev = header;
3996                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
3997                          EXISTS(header)) {
3998                         sigdname_header = header;
3999                         header_prev = header;
4000                 } else
4001                         header_prev = header;
4002         }
4003
4004         if (dname_header != NULL &&
4005             (!DNS_TRUST_PENDING(dname_header->trust) ||
4006              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4007                 /*
4008                  * We increment the reference count on node to ensure that
4009                  * search->zonecut_rdataset will still be valid later.
4010                  */
4011                 new_reference(search->rbtdb, node);
4012                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4013                 search->zonecut = node;
4014                 search->zonecut_rdataset = dname_header;
4015                 search->zonecut_sigrdataset = sigdname_header;
4016                 search->need_cleanup = ISC_TRUE;
4017                 result = DNS_R_PARTIALMATCH;
4018         } else
4019                 result = DNS_R_CONTINUE;
4020
4021         NODE_UNLOCK(lock, locktype);
4022
4023         return (result);
4024 }
4025
4026 static inline isc_result_t
4027 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4028                      dns_dbnode_t **nodep, dns_name_t *foundname,
4029                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4030 {
4031         unsigned int i;
4032         dns_rbtnode_t *level_node;
4033         rdatasetheader_t *header, *header_prev, *header_next;
4034         rdatasetheader_t *found, *foundsig;
4035         isc_result_t result = ISC_R_NOTFOUND;
4036         dns_name_t name;
4037         dns_rbtdb_t *rbtdb;
4038         isc_boolean_t done;
4039         nodelock_t *lock;
4040         isc_rwlocktype_t locktype;
4041
4042         /*
4043          * Caller must be holding the tree lock.
4044          */
4045
4046         rbtdb = search->rbtdb;
4047         i = search->chain.level_matches;
4048         done = ISC_FALSE;
4049         do {
4050                 locktype = isc_rwlocktype_read;
4051                 lock = &rbtdb->node_locks[node->locknum].lock;
4052                 NODE_LOCK(lock, locktype);
4053
4054                 /*
4055                  * Look for NS and RRSIG NS rdatasets.
4056                  */
4057                 found = NULL;
4058                 foundsig = NULL;
4059                 header_prev = NULL;
4060                 for (header = node->data;
4061                      header != NULL;
4062                      header = header_next) {
4063                         header_next = header->next;
4064                         if (header->rdh_ttl <= search->now) {
4065                                 /*
4066                                  * This rdataset is stale.  If no one else is
4067                                  * using the node, we can clean it up right
4068                                  * now, otherwise we mark it as stale, and
4069                                  * the node as dirty, so it will get cleaned
4070                                  * up later.
4071                                  */
4072                                 if ((header->rdh_ttl <= search->now -
4073                                                     RBTDB_VIRTUAL) &&
4074                                     (locktype == isc_rwlocktype_write ||
4075                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4076                                         /*
4077                                          * We update the node's status only
4078                                          * when we can get write access.
4079                                          */
4080                                         locktype = isc_rwlocktype_write;
4081
4082                                         if (dns_rbtnode_refcurrent(node)
4083                                             == 0) {
4084                                                 isc_mem_t *m;
4085
4086                                                 m = search->rbtdb->common.mctx;
4087                                                 clean_stale_headers(
4088                                                         search->rbtdb,
4089                                                         m, header);
4090                                                 if (header_prev != NULL)
4091                                                         header_prev->next =
4092                                                                 header->next;
4093                                                 else
4094                                                         node->data =
4095                                                                 header->next;
4096                                                 free_rdataset(rbtdb, m,
4097                                                               header);
4098                                         } else {
4099                                                 header->attributes |=
4100                                                         RDATASET_ATTR_STALE;
4101                                                 node->dirty = 1;
4102                                                 header_prev = header;
4103                                         }
4104                                 } else
4105                                         header_prev = header;
4106                         } else if (EXISTS(header)) {
4107                                 /*
4108                                  * We've found an extant rdataset.  See if
4109                                  * we're interested in it.
4110                                  */
4111                                 if (header->type == dns_rdatatype_ns) {
4112                                         found = header;
4113                                         if (foundsig != NULL)
4114                                                 break;
4115                                 } else if (header->type ==
4116                                            RBTDB_RDATATYPE_SIGNS) {
4117                                         foundsig = header;
4118                                         if (found != NULL)
4119                                                 break;
4120                                 }
4121                                 header_prev = header;
4122                         } else
4123                                 header_prev = header;
4124                 }
4125
4126                 if (found != NULL) {
4127                         /*
4128                          * If we have to set foundname, we do it before
4129                          * anything else.  If we were to set foundname after
4130                          * we had set nodep or bound the rdataset, then we'd
4131                          * have to undo that work if dns_name_concatenate()
4132                          * failed.  By setting foundname first, there's
4133                          * nothing to undo if we have trouble.
4134                          */
4135                         if (foundname != NULL) {
4136                                 dns_name_init(&name, NULL);
4137                                 dns_rbt_namefromnode(node, &name);
4138                                 result = dns_name_copy(&name, foundname, NULL);
4139                                 while (result == ISC_R_SUCCESS && i > 0) {
4140                                         i--;
4141                                         level_node = search->chain.levels[i];
4142                                         dns_name_init(&name, NULL);
4143                                         dns_rbt_namefromnode(level_node,
4144                                                              &name);
4145                                         result =
4146                                                 dns_name_concatenate(foundname,
4147                                                                      &name,
4148                                                                      foundname,
4149                                                                      NULL);
4150                                 }
4151                                 if (result != ISC_R_SUCCESS) {
4152                                         *nodep = NULL;
4153                                         goto node_exit;
4154                                 }
4155                         }
4156                         result = DNS_R_DELEGATION;
4157                         if (nodep != NULL) {
4158                                 new_reference(search->rbtdb, node);
4159                                 *nodep = node;
4160                         }
4161                         bind_rdataset(search->rbtdb, node, found, search->now,
4162                                       rdataset);
4163                         if (foundsig != NULL)
4164                                 bind_rdataset(search->rbtdb, node, foundsig,
4165                                               search->now, sigrdataset);
4166                         if (need_headerupdate(found, search->now) ||
4167                             (foundsig != NULL &&
4168                              need_headerupdate(foundsig, search->now))) {
4169                                 if (locktype != isc_rwlocktype_write) {
4170                                         NODE_UNLOCK(lock, locktype);
4171                                         NODE_LOCK(lock, isc_rwlocktype_write);
4172                                         locktype = isc_rwlocktype_write;
4173                                 }
4174                                 if (need_headerupdate(found, search->now))
4175                                         update_header(search->rbtdb, found,
4176                                                       search->now);
4177                                 if (foundsig != NULL &&
4178                                     need_headerupdate(foundsig, search->now)) {
4179                                         update_header(search->rbtdb, foundsig,
4180                                                       search->now);
4181                                 }
4182                         }
4183                 }
4184
4185         node_exit:
4186                 NODE_UNLOCK(lock, locktype);
4187
4188                 if (found == NULL && i > 0) {
4189                         i--;
4190                         node = search->chain.levels[i];
4191                 } else
4192                         done = ISC_TRUE;
4193
4194         } while (!done);
4195
4196         return (result);
4197 }
4198
4199 static isc_result_t
4200 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4201                   isc_stdtime_t now, dns_name_t *foundname,
4202                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4203 {
4204         dns_rbtnode_t *node;
4205         rdatasetheader_t *header, *header_next, *header_prev;
4206         rdatasetheader_t *found, *foundsig;
4207         isc_boolean_t empty_node;
4208         isc_result_t result;
4209         dns_fixedname_t fname, forigin;
4210         dns_name_t *name, *origin;
4211         rbtdb_rdatatype_t matchtype, sigmatchtype;
4212         nodelock_t *lock;
4213         isc_rwlocktype_t locktype;
4214
4215         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4216         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4217                                              dns_rdatatype_nsec);
4218
4219         do {
4220                 node = NULL;
4221                 dns_fixedname_init(&fname);
4222                 name = dns_fixedname_name(&fname);
4223                 dns_fixedname_init(&forigin);
4224                 origin = dns_fixedname_name(&forigin);
4225                 result = dns_rbtnodechain_current(&search->chain, name,
4226                                                   origin, &node);
4227                 if (result != ISC_R_SUCCESS)
4228                         return (result);
4229                 locktype = isc_rwlocktype_read;
4230                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4231                 NODE_LOCK(lock, locktype);
4232                 found = NULL;
4233                 foundsig = NULL;
4234                 empty_node = ISC_TRUE;
4235                 header_prev = NULL;
4236                 for (header = node->data;
4237                      header != NULL;
4238                      header = header_next) {
4239                         header_next = header->next;
4240                         if (header->rdh_ttl <= now) {
4241                                 /*
4242                                  * This rdataset is stale.  If no one else is
4243                                  * using the node, we can clean it up right
4244                                  * now, otherwise we mark it as stale, and the
4245                                  * node as dirty, so it will get cleaned up
4246                                  * later.
4247                                  */
4248                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4249                                     (locktype == isc_rwlocktype_write ||
4250                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4251                                         /*
4252                                          * We update the node's status only
4253                                          * when we can get write access.
4254                                          */
4255                                         locktype = isc_rwlocktype_write;
4256
4257                                         if (dns_rbtnode_refcurrent(node)
4258                                             == 0) {
4259                                                 isc_mem_t *m;
4260
4261                                                 m = search->rbtdb->common.mctx;
4262                                                 clean_stale_headers(
4263                                                         search->rbtdb,
4264                                                         m, header);
4265                                                 if (header_prev != NULL)
4266                                                         header_prev->next =
4267                                                                 header->next;
4268                                                 else
4269                                                         node->data = header->next;
4270                                                 free_rdataset(search->rbtdb, m,
4271                                                               header);
4272                                         } else {
4273                                                 header->attributes |=
4274                                                         RDATASET_ATTR_STALE;
4275                                                 node->dirty = 1;
4276                                                 header_prev = header;
4277                                         }
4278                                 } else
4279                                         header_prev = header;
4280                                 continue;
4281                         }
4282                         if (NONEXISTENT(header) ||
4283                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4284                                 header_prev = header;
4285                                 continue;
4286                         }
4287                         empty_node = ISC_FALSE;
4288                         if (header->type == matchtype)
4289                                 found = header;
4290                         else if (header->type == sigmatchtype)
4291                                 foundsig = header;
4292                         header_prev = header;
4293                 }
4294                 if (found != NULL) {
4295                         result = dns_name_concatenate(name, origin,
4296                                                       foundname, NULL);
4297                         if (result != ISC_R_SUCCESS)
4298                                 goto unlock_node;
4299                         bind_rdataset(search->rbtdb, node, found,
4300                                       now, rdataset);
4301                         if (foundsig != NULL)
4302                                 bind_rdataset(search->rbtdb, node, foundsig,
4303                                               now, sigrdataset);
4304                         new_reference(search->rbtdb, node);
4305                         *nodep = node;
4306                         result = DNS_R_COVERINGNSEC;
4307                 } else if (!empty_node) {
4308                         result = ISC_R_NOTFOUND;
4309                 } else
4310                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4311                                                        NULL);
4312  unlock_node:
4313                 NODE_UNLOCK(lock, locktype);
4314         } while (empty_node && result == ISC_R_SUCCESS);
4315         return (result);
4316 }
4317
4318 static isc_result_t
4319 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4320            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4321            dns_dbnode_t **nodep, dns_name_t *foundname,
4322            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4323 {
4324         dns_rbtnode_t *node = NULL;
4325         isc_result_t result;
4326         rbtdb_search_t search;
4327         isc_boolean_t cname_ok = ISC_TRUE;
4328         isc_boolean_t empty_node;
4329         nodelock_t *lock;
4330         isc_rwlocktype_t locktype;
4331         rdatasetheader_t *header, *header_prev, *header_next;
4332         rdatasetheader_t *found, *nsheader;
4333         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4334         rdatasetheader_t *update, *updatesig;
4335         rbtdb_rdatatype_t sigtype, negtype;
4336
4337         UNUSED(version);
4338
4339         search.rbtdb = (dns_rbtdb_t *)db;
4340
4341         REQUIRE(VALID_RBTDB(search.rbtdb));
4342         REQUIRE(version == NULL);
4343
4344         if (now == 0)
4345                 isc_stdtime_get(&now);
4346
4347         search.rbtversion = NULL;
4348         search.serial = 1;
4349         search.options = options;
4350         search.copy_name = ISC_FALSE;
4351         search.need_cleanup = ISC_FALSE;
4352         search.wild = ISC_FALSE;
4353         search.zonecut = NULL;
4354         dns_fixedname_init(&search.zonecut_name);
4355         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4356         search.now = now;
4357         update = NULL;
4358         updatesig = NULL;
4359
4360         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4361
4362         /*
4363          * Search down from the root of the tree.  If, while going down, we
4364          * encounter a callback node, cache_zonecut_callback() will search the
4365          * rdatasets at the zone cut for a DNAME rdataset.
4366          */
4367         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4368                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4369                                   cache_zonecut_callback, &search);
4370
4371         if (result == DNS_R_PARTIALMATCH) {
4372                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4373                         result = find_coveringnsec(&search, nodep, now,
4374                                                    foundname, rdataset,
4375                                                    sigrdataset);
4376                         if (result == DNS_R_COVERINGNSEC)
4377                                 goto tree_exit;
4378                 }
4379                 if (search.zonecut != NULL) {
4380                     result = setup_delegation(&search, nodep, foundname,
4381                                               rdataset, sigrdataset);
4382                     goto tree_exit;
4383                 } else {
4384                 find_ns:
4385                         result = find_deepest_zonecut(&search, node, nodep,
4386                                                       foundname, rdataset,
4387                                                       sigrdataset);
4388                         goto tree_exit;
4389                 }
4390         } else if (result != ISC_R_SUCCESS)
4391                 goto tree_exit;
4392
4393         /*
4394          * Certain DNSSEC types are not subject to CNAME matching
4395          * (RFC4035, section 2.5 and RFC3007).
4396          *
4397          * We don't check for RRSIG, because we don't store RRSIG records
4398          * directly.
4399          */
4400         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4401                 cname_ok = ISC_FALSE;
4402
4403         /*
4404          * We now go looking for rdata...
4405          */
4406
4407         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4408         locktype = isc_rwlocktype_read;
4409         NODE_LOCK(lock, locktype);
4410
4411         found = NULL;
4412         foundsig = NULL;
4413         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4414         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4415         nsheader = NULL;
4416         nssig = NULL;
4417         cnamesig = NULL;
4418         empty_node = ISC_TRUE;
4419         header_prev = NULL;
4420         for (header = node->data; header != NULL; header = header_next) {
4421                 header_next = header->next;
4422                 if (header->rdh_ttl <= now) {
4423                         /*
4424                          * This rdataset is stale.  If no one else is using the
4425                          * node, we can clean it up right now, otherwise we
4426                          * mark it as stale, and the node as dirty, so it will
4427                          * get cleaned up later.
4428                          */
4429                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4430                             (locktype == isc_rwlocktype_write ||
4431                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4432                                 /*
4433                                  * We update the node's status only when we
4434                                  * can get write access.
4435                                  */
4436                                 locktype = isc_rwlocktype_write;
4437
4438                                 if (dns_rbtnode_refcurrent(node) == 0) {
4439                                         isc_mem_t *mctx;
4440
4441                                         mctx = search.rbtdb->common.mctx;
4442                                         clean_stale_headers(search.rbtdb, mctx,
4443                                                             header);
4444                                         if (header_prev != NULL)
4445                                                 header_prev->next =
4446                                                         header->next;
4447                                         else
4448                                                 node->data = header->next;
4449                                         free_rdataset(search.rbtdb, mctx,
4450                                                       header);
4451                                 } else {
4452                                         header->attributes |=
4453                                                 RDATASET_ATTR_STALE;
4454                                         node->dirty = 1;
4455                                         header_prev = header;
4456                                 }
4457                         } else
4458                                 header_prev = header;
4459                 } else if (EXISTS(header)) {
4460                         /*
4461                          * We now know that there is at least one active
4462                          * non-stale rdataset at this node.
4463                          */
4464                         empty_node = ISC_FALSE;
4465
4466                         /*
4467                          * If we found a type we were looking for, remember
4468                          * it.
4469                          */
4470                         if (header->type == type ||
4471                             (type == dns_rdatatype_any &&
4472                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4473                             (cname_ok && header->type ==
4474                              dns_rdatatype_cname)) {
4475                                 /*
4476                                  * We've found the answer.
4477                                  */
4478                                 found = header;
4479                                 if (header->type == dns_rdatatype_cname &&
4480                                     cname_ok &&
4481                                     cnamesig != NULL) {
4482                                         /*
4483                                          * If we've already got the CNAME RRSIG,
4484                                          * use it, otherwise change sigtype
4485                                          * so that we find it.
4486                                          */
4487                                         if (cnamesig != NULL)
4488                                                 foundsig = cnamesig;
4489                                         else
4490                                                 sigtype =
4491                                                     RBTDB_RDATATYPE_SIGCNAME;
4492                                         foundsig = cnamesig;
4493                                 }
4494                         } else if (header->type == sigtype) {
4495                                 /*
4496                                  * We've found the RRSIG rdataset for our
4497                                  * target type.  Remember it.
4498                                  */
4499                                 foundsig = header;
4500                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4501                                    header->type == negtype) {
4502                                 /*
4503                                  * We've found a negative cache entry.
4504                                  */
4505                                 found = header;
4506                         } else if (header->type == dns_rdatatype_ns) {
4507                                 /*
4508                                  * Remember a NS rdataset even if we're
4509                                  * not specifically looking for it, because
4510                                  * we might need it later.
4511                                  */
4512                                 nsheader = header;
4513                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4514                                 /*
4515                                  * If we need the NS rdataset, we'll also
4516                                  * need its signature.
4517                                  */
4518                                 nssig = header;
4519                         } else if (cname_ok &&
4520                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4521                                 /*
4522                                  * If we get a CNAME match, we'll also need
4523                                  * its signature.
4524                                  */
4525                                 cnamesig = header;
4526                         }
4527                         header_prev = header;
4528                 } else
4529                         header_prev = header;
4530         }
4531
4532         if (empty_node) {
4533                 /*
4534                  * We have an exact match for the name, but there are no
4535                  * extant rdatasets.  That means that this node doesn't
4536                  * meaningfully exist, and that we really have a partial match.
4537                  */
4538                 NODE_UNLOCK(lock, locktype);
4539                 goto find_ns;
4540         }
4541
4542         /*
4543          * If we didn't find what we were looking for...
4544          */
4545         if (found == NULL ||
4546             (DNS_TRUST_ADDITIONAL(found->trust) &&
4547              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4548             (found->trust == dns_trust_glue &&
4549              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4550             (DNS_TRUST_PENDING(found->trust) &&
4551              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4552                 /*
4553                  * If there is an NS rdataset at this node, then this is the
4554                  * deepest zone cut.
4555                  */
4556                 if (nsheader != NULL) {
4557                         if (nodep != NULL) {
4558                                 new_reference(search.rbtdb, node);
4559                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4560                                 *nodep = node;
4561                         }
4562                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4563                                       rdataset);
4564                         if (need_headerupdate(nsheader, search.now))
4565                                 update = nsheader;
4566                         if (nssig != NULL) {
4567                                 bind_rdataset(search.rbtdb, node, nssig,
4568                                               search.now, sigrdataset);
4569                                 if (need_headerupdate(nssig, search.now))
4570                                         updatesig = nssig;
4571                         }
4572                         result = DNS_R_DELEGATION;
4573                         goto node_exit;
4574                 }
4575
4576                 /*
4577                  * Go find the deepest zone cut.
4578                  */
4579                 NODE_UNLOCK(lock, locktype);
4580                 goto find_ns;
4581         }
4582
4583         /*
4584          * We found what we were looking for, or we found a CNAME.
4585          */
4586
4587         if (nodep != NULL) {
4588                 new_reference(search.rbtdb, node);
4589                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4590                 *nodep = node;
4591         }
4592
4593         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4594                 /*
4595                  * We found a negative cache entry.
4596                  */
4597                 if (NXDOMAIN(found))
4598                         result = DNS_R_NCACHENXDOMAIN;
4599                 else
4600                         result = DNS_R_NCACHENXRRSET;
4601         } else if (type != found->type &&
4602                    type != dns_rdatatype_any &&
4603                    found->type == dns_rdatatype_cname) {
4604                 /*
4605                  * We weren't doing an ANY query and we found a CNAME instead
4606                  * of the type we were looking for, so we need to indicate
4607                  * that result to the caller.
4608                  */
4609                 result = DNS_R_CNAME;
4610         } else {
4611                 /*
4612                  * An ordinary successful query!
4613                  */
4614                 result = ISC_R_SUCCESS;
4615         }
4616
4617         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4618             result == DNS_R_NCACHENXRRSET) {
4619                 bind_rdataset(search.rbtdb, node, found, search.now,
4620                               rdataset);
4621                 if (need_headerupdate(found, search.now))
4622                         update = found;
4623                 if (foundsig != NULL) {
4624                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4625                                       sigrdataset);
4626                         if (need_headerupdate(foundsig, search.now))
4627                                 updatesig = foundsig;
4628                 }
4629         }
4630
4631  node_exit:
4632         if ((update != NULL || updatesig != NULL) &&
4633             locktype != isc_rwlocktype_write) {
4634                 NODE_UNLOCK(lock, locktype);
4635                 NODE_LOCK(lock, isc_rwlocktype_write);
4636                 locktype = isc_rwlocktype_write;
4637         }
4638         if (update != NULL && need_headerupdate(update, search.now))
4639                 update_header(search.rbtdb, update, search.now);
4640         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4641                 update_header(search.rbtdb, updatesig, search.now);
4642
4643         NODE_UNLOCK(lock, locktype);
4644
4645  tree_exit:
4646         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4647
4648         /*
4649          * If we found a zonecut but aren't going to use it, we have to
4650          * let go of it.
4651          */
4652         if (search.need_cleanup) {
4653                 node = search.zonecut;
4654                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4655
4656                 NODE_LOCK(lock, isc_rwlocktype_read);
4657                 decrement_reference(search.rbtdb, node, 0,
4658                                     isc_rwlocktype_read, isc_rwlocktype_none,
4659                                     ISC_FALSE);
4660                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4661         }
4662
4663         dns_rbtnodechain_reset(&search.chain);
4664
4665         return (result);
4666 }
4667
4668 static isc_result_t
4669 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4670                   isc_stdtime_t now, dns_dbnode_t **nodep,
4671                   dns_name_t *foundname,
4672                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4673 {
4674         dns_rbtnode_t *node = NULL;
4675         nodelock_t *lock;
4676         isc_result_t result;
4677         rbtdb_search_t search;
4678         rdatasetheader_t *header, *header_prev, *header_next;
4679         rdatasetheader_t *found, *foundsig;
4680         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4681         isc_rwlocktype_t locktype;
4682
4683         search.rbtdb = (dns_rbtdb_t *)db;
4684
4685         REQUIRE(VALID_RBTDB(search.rbtdb));
4686
4687         if (now == 0)
4688                 isc_stdtime_get(&now);
4689
4690         search.rbtversion = NULL;
4691         search.serial = 1;
4692         search.options = options;
4693         search.copy_name = ISC_FALSE;
4694         search.need_cleanup = ISC_FALSE;
4695         search.wild = ISC_FALSE;
4696         search.zonecut = NULL;
4697         dns_fixedname_init(&search.zonecut_name);
4698         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4699         search.now = now;
4700
4701         if ((options & DNS_DBFIND_NOEXACT) != 0)
4702                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4703
4704         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4705
4706         /*
4707          * Search down from the root of the tree.
4708          */
4709         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4710                                   &search.chain, rbtoptions, NULL, &search);
4711
4712         if (result == DNS_R_PARTIALMATCH) {
4713         find_ns:
4714                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4715                                               rdataset, sigrdataset);
4716                 goto tree_exit;
4717         } else if (result != ISC_R_SUCCESS)
4718                 goto tree_exit;
4719
4720         /*
4721          * We now go looking for an NS rdataset at the node.
4722          */
4723
4724         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4725         locktype = isc_rwlocktype_read;
4726         NODE_LOCK(lock, locktype);
4727
4728         found = NULL;
4729         foundsig = NULL;
4730         header_prev = NULL;
4731         for (header = node->data; header != NULL; header = header_next) {
4732                 header_next = header->next;
4733                 if (header->rdh_ttl <= now) {
4734                         /*
4735                          * This rdataset is stale.  If no one else is using the
4736                          * node, we can clean it up right now, otherwise we
4737                          * mark it as stale, and the node as dirty, so it will
4738                          * get cleaned up later.
4739                          */
4740                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4741                             (locktype == isc_rwlocktype_write ||
4742                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4743                                 /*
4744                                  * We update the node's status only when we
4745                                  * can get write access.
4746                                  */
4747                                 locktype = isc_rwlocktype_write;
4748
4749                                 if (dns_rbtnode_refcurrent(node) == 0) {
4750                                         isc_mem_t *mctx;
4751
4752                                         mctx = search.rbtdb->common.mctx;
4753                                         clean_stale_headers(search.rbtdb, mctx,
4754                                                             header);
4755                                         if (header_prev != NULL)
4756                                                 header_prev->next =
4757                                                         header->next;
4758                                         else
4759                                                 node->data = header->next;
4760                                         free_rdataset(search.rbtdb, mctx,
4761                                                       header);
4762                                 } else {
4763                                         header->attributes |=
4764                                                 RDATASET_ATTR_STALE;
4765                                         node->dirty = 1;
4766                                         header_prev = header;
4767                                 }
4768                         } else
4769                                 header_prev = header;
4770                 } else if (EXISTS(header)) {
4771                         /*
4772                          * If we found a type we were looking for, remember
4773                          * it.
4774                          */
4775                         if (header->type == dns_rdatatype_ns) {
4776                                 /*
4777                                  * Remember a NS rdataset even if we're
4778                                  * not specifically looking for it, because
4779                                  * we might need it later.
4780                                  */
4781                                 found = header;
4782                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4783                                 /*
4784                                  * If we need the NS rdataset, we'll also
4785                                  * need its signature.
4786                                  */
4787                                 foundsig = header;
4788                         }
4789                         header_prev = header;
4790                 } else
4791                         header_prev = header;
4792         }
4793
4794         if (found == NULL) {
4795                 /*
4796                  * No NS records here.
4797                  */
4798                 NODE_UNLOCK(lock, locktype);
4799                 goto find_ns;
4800         }
4801
4802         if (nodep != NULL) {
4803                 new_reference(search.rbtdb, node);
4804                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4805                 *nodep = node;
4806         }
4807
4808         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4809         if (foundsig != NULL)
4810                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4811                               sigrdataset);
4812
4813         if (need_headerupdate(found, search.now) ||
4814             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
4815                 if (locktype != isc_rwlocktype_write) {
4816                         NODE_UNLOCK(lock, locktype);
4817                         NODE_LOCK(lock, isc_rwlocktype_write);
4818                         locktype = isc_rwlocktype_write;
4819                 }
4820                 if (need_headerupdate(found, search.now))
4821                         update_header(search.rbtdb, found, search.now);
4822                 if (foundsig != NULL &&
4823                     need_headerupdate(foundsig, search.now)) {
4824                         update_header(search.rbtdb, foundsig, search.now);
4825                 }
4826         }
4827
4828         NODE_UNLOCK(lock, locktype);
4829
4830  tree_exit:
4831         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4832
4833         INSIST(!search.need_cleanup);
4834
4835         dns_rbtnodechain_reset(&search.chain);
4836
4837         if (result == DNS_R_DELEGATION)
4838                 result = ISC_R_SUCCESS;
4839
4840         return (result);
4841 }
4842
4843 static void
4844 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
4845         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4846         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
4847         unsigned int refs;
4848
4849         REQUIRE(VALID_RBTDB(rbtdb));
4850         REQUIRE(targetp != NULL && *targetp == NULL);
4851
4852         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
4853         dns_rbtnode_refincrement(node, &refs);
4854         INSIST(refs != 0);
4855         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
4856
4857         *targetp = source;
4858 }
4859
4860 static void
4861 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
4862         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4863         dns_rbtnode_t *node;
4864         isc_boolean_t want_free = ISC_FALSE;
4865         isc_boolean_t inactive = ISC_FALSE;
4866         rbtdb_nodelock_t *nodelock;
4867
4868         REQUIRE(VALID_RBTDB(rbtdb));
4869         REQUIRE(targetp != NULL && *targetp != NULL);
4870
4871         node = (dns_rbtnode_t *)(*targetp);
4872         nodelock = &rbtdb->node_locks[node->locknum];
4873
4874         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
4875
4876         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
4877                                 isc_rwlocktype_none, ISC_FALSE)) {
4878                 if (isc_refcount_current(&nodelock->references) == 0 &&
4879                     nodelock->exiting) {
4880                         inactive = ISC_TRUE;
4881                 }
4882         }
4883
4884         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
4885
4886         *targetp = NULL;
4887
4888         if (inactive) {
4889                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
4890                 rbtdb->active--;
4891                 if (rbtdb->active == 0)
4892                         want_free = ISC_TRUE;
4893                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
4894                 if (want_free) {
4895                         char buf[DNS_NAME_FORMATSIZE];
4896                         if (dns_name_dynamic(&rbtdb->common.origin))
4897                                 dns_name_format(&rbtdb->common.origin, buf,
4898                                                 sizeof(buf));
4899                         else
4900                                 strcpy(buf, "<UNKNOWN>");
4901                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
4902                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
4903                                       "calling free_rbtdb(%s)", buf);
4904                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
4905                 }
4906         }
4907 }
4908
4909 static isc_result_t
4910 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
4911         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
4912         dns_rbtnode_t *rbtnode = node;
4913         rdatasetheader_t *header;
4914         isc_boolean_t force_expire = ISC_FALSE;
4915         /*
4916          * These are the category and module used by the cache cleaner.
4917          */
4918         isc_boolean_t log = ISC_FALSE;
4919         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
4920         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
4921         int level = ISC_LOG_DEBUG(2);
4922         char printname[DNS_NAME_FORMATSIZE];
4923
4924         REQUIRE(VALID_RBTDB(rbtdb));
4925
4926         /*
4927          * Caller must hold a tree lock.
4928          */
4929
4930         if (now == 0)
4931                 isc_stdtime_get(&now);
4932
4933         if (isc_mem_isovermem(rbtdb->common.mctx)) {
4934                 isc_uint32_t val;
4935
4936                 isc_random_get(&val);
4937                 /*
4938                  * XXXDCL Could stand to have a better policy, like LRU.
4939                  */
4940                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
4941
4942                 /*
4943                  * Note that 'log' can be true IFF overmem is also true.
4944                  * overmem can currently only be true for cache
4945                  * databases -- hence all of the "overmem cache" log strings.
4946                  */
4947                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
4948                 if (log)
4949                         isc_log_write(dns_lctx, category, module, level,
4950                                       "overmem cache: %s %s",
4951                                       force_expire ? "FORCE" : "check",
4952                                       dns_rbt_formatnodename(rbtnode,
4953                                                            printname,
4954                                                            sizeof(printname)));
4955         }
4956
4957         /*
4958          * We may not need write access, but this code path is not performance
4959          * sensitive, so it should be okay to always lock as a writer.
4960          */
4961         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4962                   isc_rwlocktype_write);
4963
4964         for (header = rbtnode->data; header != NULL; header = header->next)
4965                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
4966                         /*
4967                          * We don't check if refcurrent(rbtnode) == 0 and try
4968                          * to free like we do in cache_find(), because
4969                          * refcurrent(rbtnode) must be non-zero.  This is so
4970                          * because 'node' is an argument to the function.
4971                          */
4972                         header->attributes |= RDATASET_ATTR_STALE;
4973                         rbtnode->dirty = 1;
4974                         if (log)
4975                                 isc_log_write(dns_lctx, category, module,
4976                                               level, "overmem cache: stale %s",
4977                                               printname);
4978                 } else if (force_expire) {
4979                         if (! RETAIN(header)) {
4980                                 set_ttl(rbtdb, header, 0);
4981                                 header->attributes |= RDATASET_ATTR_STALE;
4982                                 rbtnode->dirty = 1;
4983                         } else if (log) {
4984                                 isc_log_write(dns_lctx, category, module,
4985                                               level, "overmem cache: "
4986                                               "reprieve by RETAIN() %s",
4987                                               printname);
4988                         }
4989                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
4990                         isc_log_write(dns_lctx, category, module, level,
4991                                       "overmem cache: saved %s", printname);
4992
4993         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
4994                     isc_rwlocktype_write);
4995
4996         return (ISC_R_SUCCESS);
4997 }
4998
4999 static void
5000 overmem(dns_db_t *db, isc_boolean_t overmem) {
5001         /* This is an empty callback.  See adb.c:water() */
5002
5003         UNUSED(db);
5004         UNUSED(overmem);
5005
5006         return;
5007 }
5008
5009 static void
5010 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5011         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5012         dns_rbtnode_t *rbtnode = node;
5013         isc_boolean_t first;
5014
5015         REQUIRE(VALID_RBTDB(rbtdb));
5016
5017         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5018                   isc_rwlocktype_read);
5019
5020         fprintf(out, "node %p, %u references, locknum = %u\n",
5021                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5022                 rbtnode->locknum);
5023         if (rbtnode->data != NULL) {
5024                 rdatasetheader_t *current, *top_next;
5025
5026                 for (current = rbtnode->data; current != NULL;
5027                      current = top_next) {
5028                         top_next = current->next;
5029                         first = ISC_TRUE;
5030                         fprintf(out, "\ttype %u", current->type);
5031                         do {
5032                                 if (!first)
5033                                         fprintf(out, "\t");
5034                                 first = ISC_FALSE;
5035                                 fprintf(out,
5036                                         "\tserial = %lu, ttl = %u, "
5037                                         "trust = %u, attributes = %u, "
5038                                         "resign = %u\n",
5039                                         (unsigned long)current->serial,
5040                                         current->rdh_ttl,
5041                                         current->trust,
5042                                         current->attributes,
5043                                         current->resign);
5044                                 current = current->down;
5045                         } while (current != NULL);
5046                 }
5047         } else
5048                 fprintf(out, "(empty)\n");
5049
5050         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5051                     isc_rwlocktype_read);
5052 }
5053
5054 static isc_result_t
5055 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5056 {
5057         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5058         rbtdb_dbiterator_t *rbtdbiter;
5059
5060         REQUIRE(VALID_RBTDB(rbtdb));
5061
5062         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5063         if (rbtdbiter == NULL)
5064                 return (ISC_R_NOMEMORY);
5065
5066         rbtdbiter->common.methods = &dbiterator_methods;
5067         rbtdbiter->common.db = NULL;
5068         dns_db_attach(db, &rbtdbiter->common.db);
5069         rbtdbiter->common.relative_names =
5070                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5071         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5072         rbtdbiter->common.cleaning = ISC_FALSE;
5073         rbtdbiter->paused = ISC_TRUE;
5074         rbtdbiter->tree_locked = isc_rwlocktype_none;
5075         rbtdbiter->result = ISC_R_SUCCESS;
5076         dns_fixedname_init(&rbtdbiter->name);
5077         dns_fixedname_init(&rbtdbiter->origin);
5078         rbtdbiter->node = NULL;
5079         rbtdbiter->delete = 0;
5080         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5081         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5082         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5083         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5084         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5085         if (rbtdbiter->nsec3only)
5086                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5087         else
5088                 rbtdbiter->current = &rbtdbiter->chain;
5089
5090         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5091
5092         return (ISC_R_SUCCESS);
5093 }
5094
5095 static isc_result_t
5096 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5097                   dns_rdatatype_t type, dns_rdatatype_t covers,
5098                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5099                   dns_rdataset_t *sigrdataset)
5100 {
5101         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5102         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5103         rdatasetheader_t *header, *header_next, *found, *foundsig;
5104         rbtdb_serial_t serial;
5105         rbtdb_version_t *rbtversion = version;
5106         isc_boolean_t close_version = ISC_FALSE;
5107         rbtdb_rdatatype_t matchtype, sigmatchtype;
5108
5109         REQUIRE(VALID_RBTDB(rbtdb));
5110         REQUIRE(type != dns_rdatatype_any);
5111
5112         if (rbtversion == NULL) {
5113                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5114                 close_version = ISC_TRUE;
5115         }
5116         serial = rbtversion->serial;
5117         now = 0;
5118
5119         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5120                   isc_rwlocktype_read);
5121
5122         found = NULL;
5123         foundsig = NULL;
5124         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5125         if (covers == 0)
5126                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5127         else
5128                 sigmatchtype = 0;
5129
5130         for (header = rbtnode->data; header != NULL; header = header_next) {
5131                 header_next = header->next;
5132                 do {
5133                         if (header->serial <= serial &&
5134                             !IGNORE(header)) {
5135                                 /*
5136                                  * Is this a "this rdataset doesn't
5137                                  * exist" record?
5138                                  */
5139                                 if (NONEXISTENT(header))
5140                                         header = NULL;
5141                                 break;
5142                         } else
5143                                 header = header->down;
5144                 } while (header != NULL);
5145                 if (header != NULL) {
5146                         /*
5147                          * We have an active, extant rdataset.  If it's a
5148                          * type we're looking for, remember it.
5149                          */
5150                         if (header->type == matchtype) {
5151                                 found = header;
5152                                 if (foundsig != NULL)
5153                                         break;
5154                         } else if (header->type == sigmatchtype) {
5155                                 foundsig = header;
5156                                 if (found != NULL)
5157                                         break;
5158                         }
5159                 }
5160         }
5161         if (found != NULL) {
5162                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5163                 if (foundsig != NULL)
5164                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5165                                       sigrdataset);
5166         }
5167
5168         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5169                     isc_rwlocktype_read);
5170
5171         if (close_version)
5172                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5173                              ISC_FALSE);
5174
5175         if (found == NULL)
5176                 return (ISC_R_NOTFOUND);
5177
5178         return (ISC_R_SUCCESS);
5179 }
5180
5181 static isc_result_t
5182 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5183                    dns_rdatatype_t type, dns_rdatatype_t covers,
5184                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5185                    dns_rdataset_t *sigrdataset)
5186 {
5187         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5188         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5189         rdatasetheader_t *header, *header_next, *found, *foundsig;
5190         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5191         isc_result_t result;
5192         nodelock_t *lock;
5193         isc_rwlocktype_t locktype;
5194
5195         REQUIRE(VALID_RBTDB(rbtdb));
5196         REQUIRE(type != dns_rdatatype_any);
5197
5198         UNUSED(version);
5199
5200         result = ISC_R_SUCCESS;
5201
5202         if (now == 0)
5203                 isc_stdtime_get(&now);
5204
5205         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5206         locktype = isc_rwlocktype_read;
5207         NODE_LOCK(lock, locktype);
5208
5209         found = NULL;
5210         foundsig = NULL;
5211         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5212         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5213         if (covers == 0)
5214                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5215         else
5216                 sigmatchtype = 0;
5217
5218         for (header = rbtnode->data; header != NULL; header = header_next) {
5219                 header_next = header->next;
5220                 if (header->rdh_ttl <= now) {
5221                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5222                             (locktype == isc_rwlocktype_write ||
5223                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5224                                 /*
5225                                  * We update the node's status only when we
5226                                  * can get write access.
5227                                  */
5228                                 locktype = isc_rwlocktype_write;
5229
5230                                 /*
5231                                  * We don't check if refcurrent(rbtnode) == 0
5232                                  * and try to free like we do in cache_find(),
5233                                  * because refcurrent(rbtnode) must be
5234                                  * non-zero.  This is so because 'node' is an
5235                                  * argument to the function.
5236                                  */
5237                                 header->attributes |= RDATASET_ATTR_STALE;
5238                                 rbtnode->dirty = 1;
5239                         }
5240                 } else if (EXISTS(header)) {
5241                         if (header->type == matchtype)
5242                                 found = header;
5243                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5244                                  header->type == negtype)
5245                                 found = header;
5246                         else if (header->type == sigmatchtype)
5247                                 foundsig = header;
5248                 }
5249         }
5250         if (found != NULL) {
5251                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5252                 if (foundsig != NULL)
5253                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5254                                       sigrdataset);
5255         }
5256
5257         NODE_UNLOCK(lock, locktype);
5258
5259         if (found == NULL)
5260                 return (ISC_R_NOTFOUND);
5261
5262         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5263                 /*
5264                  * We found a negative cache entry.
5265                  */
5266                 if (NXDOMAIN(found))
5267                         result = DNS_R_NCACHENXDOMAIN;
5268                 else
5269                         result = DNS_R_NCACHENXRRSET;
5270         }
5271
5272         return (result);
5273 }
5274
5275 static isc_result_t
5276 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5277              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5278 {
5279         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5280         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5281         rbtdb_version_t *rbtversion = version;
5282         rbtdb_rdatasetiter_t *iterator;
5283         unsigned int refs;
5284
5285         REQUIRE(VALID_RBTDB(rbtdb));
5286
5287         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5288         if (iterator == NULL)
5289                 return (ISC_R_NOMEMORY);
5290
5291         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5292                 now = 0;
5293                 if (rbtversion == NULL)
5294                         currentversion(db,
5295                                  (dns_dbversion_t **) (void *)(&rbtversion));
5296                 else {
5297                         unsigned int refs;
5298
5299                         isc_refcount_increment(&rbtversion->references,
5300                                                &refs);
5301                         INSIST(refs > 1);
5302                 }
5303         } else {
5304                 if (now == 0)
5305                         isc_stdtime_get(&now);
5306                 rbtversion = NULL;
5307         }
5308
5309         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5310         iterator->common.methods = &rdatasetiter_methods;
5311         iterator->common.db = db;
5312         iterator->common.node = node;
5313         iterator->common.version = (dns_dbversion_t *)rbtversion;
5314         iterator->common.now = now;
5315
5316         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5317
5318         dns_rbtnode_refincrement(rbtnode, &refs);
5319         INSIST(refs != 0);
5320
5321         iterator->current = NULL;
5322
5323         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5324
5325         *iteratorp = (dns_rdatasetiter_t *)iterator;
5326
5327         return (ISC_R_SUCCESS);
5328 }
5329
5330 static isc_boolean_t
5331 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5332         rdatasetheader_t *header, *header_next;
5333         isc_boolean_t cname, other_data;
5334         dns_rdatatype_t rdtype;
5335
5336         /*
5337          * The caller must hold the node lock.
5338          */
5339
5340         /*
5341          * Look for CNAME and "other data" rdatasets active in our version.
5342          */
5343         cname = ISC_FALSE;
5344         other_data = ISC_FALSE;
5345         for (header = node->data; header != NULL; header = header_next) {
5346                 header_next = header->next;
5347                 if (header->type == dns_rdatatype_cname) {
5348                         /*
5349                          * Look for an active extant CNAME.
5350                          */
5351                         do {
5352                                 if (header->serial <= serial &&
5353                                     !IGNORE(header)) {
5354                                         /*
5355                                          * Is this a "this rdataset doesn't
5356                                          * exist" record?
5357                                          */
5358                                         if (NONEXISTENT(header))
5359                                                 header = NULL;
5360                                         break;
5361                                 } else
5362                                         header = header->down;
5363                         } while (header != NULL);
5364                         if (header != NULL)
5365                                 cname = ISC_TRUE;
5366                 } else {
5367                         /*
5368                          * Look for active extant "other data".
5369                          *
5370                          * "Other data" is any rdataset whose type is not
5371                          * KEY, NSEC, SIG or RRSIG.
5372                          */
5373                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5374                         if (rdtype != dns_rdatatype_key &&
5375                             rdtype != dns_rdatatype_sig &&
5376                             rdtype != dns_rdatatype_nsec &&
5377                             rdtype != dns_rdatatype_rrsig) {
5378                                 /*
5379                                  * Is it active and extant?
5380                                  */
5381                                 do {
5382                                         if (header->serial <= serial &&
5383                                             !IGNORE(header)) {
5384                                                 /*
5385                                                  * Is this a "this rdataset
5386                                                  * doesn't exist" record?
5387                                                  */
5388                                                 if (NONEXISTENT(header))
5389                                                         header = NULL;
5390                                                 break;
5391                                         } else
5392                                                 header = header->down;
5393                                 } while (header != NULL);
5394                                 if (header != NULL)
5395                                         other_data = ISC_TRUE;
5396                         }
5397                 }
5398         }
5399
5400         if (cname && other_data)
5401                 return (ISC_TRUE);
5402
5403         return (ISC_FALSE);
5404 }
5405
5406 static isc_result_t
5407 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5408         isc_result_t result;
5409
5410         INSIST(!IS_CACHE(rbtdb));
5411         INSIST(newheader->heap_index == 0);
5412         INSIST(!ISC_LINK_LINKED(newheader, link));
5413
5414         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5415         return (result);
5416 }
5417
5418 static isc_result_t
5419 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5420     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5421     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5422 {
5423         rbtdb_changed_t *changed = NULL;
5424         rdatasetheader_t *topheader, *topheader_prev, *header;
5425         unsigned char *merged;
5426         isc_result_t result;
5427         isc_boolean_t header_nx;
5428         isc_boolean_t newheader_nx;
5429         isc_boolean_t merge;
5430         dns_rdatatype_t rdtype, covers;
5431         rbtdb_rdatatype_t negtype;
5432         dns_trust_t trust;
5433         int idx;
5434
5435         /*
5436          * Add an rdatasetheader_t to a node.
5437          */
5438
5439         /*
5440          * Caller must be holding the node lock.
5441          */
5442
5443         if ((options & DNS_DBADD_MERGE) != 0) {
5444                 REQUIRE(rbtversion != NULL);
5445                 merge = ISC_TRUE;
5446         } else
5447                 merge = ISC_FALSE;
5448
5449         if ((options & DNS_DBADD_FORCE) != 0)
5450                 trust = dns_trust_ultimate;
5451         else
5452                 trust = newheader->trust;
5453
5454         if (rbtversion != NULL && !loading) {
5455                 /*
5456                  * We always add a changed record, even if no changes end up
5457                  * being made to this node, because it's harmless and
5458                  * simplifies the code.
5459                  */
5460                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5461                 if (changed == NULL) {
5462                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5463                         return (ISC_R_NOMEMORY);
5464                 }
5465         }
5466
5467         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5468         topheader_prev = NULL;
5469
5470         negtype = 0;
5471         if (rbtversion == NULL && !newheader_nx) {
5472                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5473                 if (rdtype == 0) {
5474                         /*
5475                          * We're adding a negative cache entry.
5476                          */
5477                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5478                         if (covers == dns_rdatatype_any) {
5479                                 /*
5480                                  * We're adding an negative cache entry
5481                                  * which covers all types (NXDOMAIN,
5482                                  * NODATA(QTYPE=ANY)).
5483                                  *
5484                                  * We make all other data stale so that the
5485                                  * only rdataset that can be found at this
5486                                  * node is the negative cache entry.
5487                                  */
5488                                 for (topheader = rbtnode->data;
5489                                      topheader != NULL;
5490                                      topheader = topheader->next) {
5491                                         set_ttl(rbtdb, topheader, 0);
5492                                         topheader->attributes |=
5493                                                 RDATASET_ATTR_STALE;
5494                                 }
5495                                 rbtnode->dirty = 1;
5496                                 goto find_header;
5497                         }
5498                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5499                 } else {
5500                         /*
5501                          * We're adding something that isn't a
5502                          * negative cache entry.  Look for an extant
5503                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5504                          * cache entry.
5505                          */
5506                         for (topheader = rbtnode->data;
5507                              topheader != NULL;
5508                              topheader = topheader->next) {
5509                                 if (topheader->type ==
5510                                     RBTDB_RDATATYPE_NCACHEANY)
5511                                         break;
5512                         }
5513                         if (topheader != NULL && EXISTS(topheader) &&
5514                             topheader->rdh_ttl > now) {
5515                                 /*
5516                                  * Found one.
5517                                  */
5518                                 if (trust < topheader->trust) {
5519                                         /*
5520                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5521                                          * is more trusted.
5522                                          */
5523                                         free_rdataset(rbtdb,
5524                                                       rbtdb->common.mctx,
5525                                                       newheader);
5526                                         if (addedrdataset != NULL)
5527                                                 bind_rdataset(rbtdb, rbtnode,
5528                                                               topheader, now,
5529                                                               addedrdataset);
5530                                         return (DNS_R_UNCHANGED);
5531                                 }
5532                                 /*
5533                                  * The new rdataset is better.  Expire the
5534                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5535                                  */
5536                                 set_ttl(rbtdb, topheader, 0);
5537                                 topheader->attributes |= RDATASET_ATTR_STALE;
5538                                 rbtnode->dirty = 1;
5539                                 topheader = NULL;
5540                                 goto find_header;
5541                         }
5542                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5543                 }
5544         }
5545
5546         for (topheader = rbtnode->data;
5547              topheader != NULL;
5548              topheader = topheader->next) {
5549                 if (topheader->type == newheader->type ||
5550                     topheader->type == negtype)
5551                         break;
5552                 topheader_prev = topheader;
5553         }
5554
5555  find_header:
5556         /*
5557          * If header isn't NULL, we've found the right type.  There may be
5558          * IGNORE rdatasets between the top of the chain and the first real
5559          * data.  We skip over them.
5560          */
5561         header = topheader;
5562         while (header != NULL && IGNORE(header))
5563                 header = header->down;
5564         if (header != NULL) {
5565                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5566
5567                 /*
5568                  * Deleting an already non-existent rdataset has no effect.
5569                  */
5570                 if (header_nx && newheader_nx) {
5571                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5572                         return (DNS_R_UNCHANGED);
5573                 }
5574
5575                 /*
5576                  * Trying to add an rdataset with lower trust to a cache DB
5577                  * has no effect, provided that the cache data isn't stale.
5578                  */
5579                 if (rbtversion == NULL && trust < header->trust &&
5580                     (header->rdh_ttl > now || header_nx)) {
5581                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5582                         if (addedrdataset != NULL)
5583                                 bind_rdataset(rbtdb, rbtnode, header, now,
5584                                               addedrdataset);
5585                         return (DNS_R_UNCHANGED);
5586                 }
5587
5588                 /*
5589                  * Don't merge if a nonexistent rdataset is involved.
5590                  */
5591                 if (merge && (header_nx || newheader_nx))
5592                         merge = ISC_FALSE;
5593
5594                 /*
5595                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5596                  * that is the union of 'newheader' and 'header'.
5597                  */
5598                 if (merge) {
5599                         unsigned int flags = 0;
5600                         INSIST(rbtversion->serial >= header->serial);
5601                         merged = NULL;
5602                         result = ISC_R_SUCCESS;
5603
5604                         if ((options & DNS_DBADD_EXACT) != 0)
5605                                 flags |= DNS_RDATASLAB_EXACT;
5606                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5607                              newheader->rdh_ttl != header->rdh_ttl)
5608                                         result = DNS_R_NOTEXACT;
5609                         else if (newheader->rdh_ttl != header->rdh_ttl)
5610                                 flags |= DNS_RDATASLAB_FORCE;
5611                         if (result == ISC_R_SUCCESS)
5612                                 result = dns_rdataslab_merge(
5613                                              (unsigned char *)header,
5614                                              (unsigned char *)newheader,
5615                                              (unsigned int)(sizeof(*newheader)),
5616                                              rbtdb->common.mctx,
5617                                              rbtdb->common.rdclass,
5618                                              (dns_rdatatype_t)header->type,
5619                                              flags, &merged);
5620                         if (result == ISC_R_SUCCESS) {
5621                                 /*
5622                                  * If 'header' has the same serial number as
5623                                  * we do, we could clean it up now if we knew
5624                                  * that our caller had no references to it.
5625                                  * We don't know this, however, so we leave it
5626                                  * alone.  It will get cleaned up when
5627                                  * clean_zone_node() runs.
5628                                  */
5629                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5630                                               newheader);
5631                                 newheader = (rdatasetheader_t *)merged;
5632                                 if (loading && RESIGN(newheader) &&
5633                                     RESIGN(header) &&
5634                                     header->resign < newheader->resign)
5635                                         newheader->resign = header->resign;
5636                         } else {
5637                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5638                                               newheader);
5639                                 return (result);
5640                         }
5641                 }
5642                 /*
5643                  * Don't replace existing NS, A and AAAA RRsets
5644                  * in the cache if they are already exist.  This
5645                  * prevents named being locked to old servers.
5646                  * Don't lower trust of existing record if the
5647                  * update is forced.
5648                  */
5649                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5650                     header->type == dns_rdatatype_ns &&
5651                     !header_nx && !newheader_nx &&
5652                     header->trust >= newheader->trust &&
5653                     dns_rdataslab_equalx((unsigned char *)header,
5654                                          (unsigned char *)newheader,
5655                                          (unsigned int)(sizeof(*newheader)),
5656                                          rbtdb->common.rdclass,
5657                                          (dns_rdatatype_t)header->type)) {
5658                         /*
5659                          * Honour the new ttl if it is less than the
5660                          * older one.
5661                          */
5662                         if (header->rdh_ttl > newheader->rdh_ttl)
5663                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5664                         if (header->noqname == NULL &&
5665                             newheader->noqname != NULL) {
5666                                 header->noqname = newheader->noqname;
5667                                 newheader->noqname = NULL;
5668                         }
5669                         if (header->closest == NULL &&
5670                             newheader->closest != NULL) {
5671                                 header->closest = newheader->closest;
5672                                 newheader->closest = NULL;
5673                         }
5674                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5675                         if (addedrdataset != NULL)
5676                                 bind_rdataset(rbtdb, rbtnode, header, now,
5677                                               addedrdataset);
5678                         return (ISC_R_SUCCESS);
5679                 }
5680                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5681                     (header->type == dns_rdatatype_a ||
5682                      header->type == dns_rdatatype_aaaa) &&
5683                     !header_nx && !newheader_nx &&
5684                     header->trust >= newheader->trust &&
5685                     dns_rdataslab_equal((unsigned char *)header,
5686                                         (unsigned char *)newheader,
5687                                         (unsigned int)(sizeof(*newheader)))) {
5688                         /*
5689                          * Honour the new ttl if it is less than the
5690                          * older one.
5691                          */
5692                         if (header->rdh_ttl > newheader->rdh_ttl)
5693                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5694                         if (header->noqname == NULL &&
5695                             newheader->noqname != NULL) {
5696                                 header->noqname = newheader->noqname;
5697                                 newheader->noqname = NULL;
5698                         }
5699                         if (header->closest == NULL &&
5700                             newheader->closest != NULL) {
5701                                 header->closest = newheader->closest;
5702                                 newheader->closest = NULL;
5703                         }
5704                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5705                         if (addedrdataset != NULL)
5706                                 bind_rdataset(rbtdb, rbtnode, header, now,
5707                                               addedrdataset);
5708                         return (ISC_R_SUCCESS);
5709                 }
5710                 INSIST(rbtversion == NULL ||
5711                        rbtversion->serial >= topheader->serial);
5712                 if (topheader_prev != NULL)
5713                         topheader_prev->next = newheader;
5714                 else
5715                         rbtnode->data = newheader;
5716                 newheader->next = topheader->next;
5717                 if (loading) {
5718                         /*
5719                          * There are no other references to 'header' when
5720                          * loading, so we MAY clean up 'header' now.
5721                          * Since we don't generate changed records when
5722                          * loading, we MUST clean up 'header' now.
5723                          */
5724                         newheader->down = NULL;
5725                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5726                 } else {
5727                         newheader->down = topheader;
5728                         topheader->next = newheader;
5729                         rbtnode->dirty = 1;
5730                         if (changed != NULL)
5731                                 changed->dirty = ISC_TRUE;
5732                         if (rbtversion == NULL) {
5733                                 set_ttl(rbtdb, header, 0);
5734                                 header->attributes |= RDATASET_ATTR_STALE;
5735                         }
5736                         idx = newheader->node->locknum;
5737                         if (IS_CACHE(rbtdb)) {
5738                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5739                                                  newheader, link);
5740                                 /*
5741                                  * XXXMLG We don't check the return value
5742                                  * here.  If it fails, we will not do TTL
5743                                  * based expiry on this node.  However, we
5744                                  * will do it on the LRU side, so memory
5745                                  * will not leak... for long.
5746                                  */
5747                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5748                         } else if (RESIGN(newheader))
5749                                 resign_insert(rbtdb, idx, newheader);
5750                 }
5751         } else {
5752                 /*
5753                  * No non-IGNORED rdatasets of the given type exist at
5754                  * this node.
5755                  */
5756
5757                 /*
5758                  * If we're trying to delete the type, don't bother.
5759                  */
5760                 if (newheader_nx) {
5761                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5762                         return (DNS_R_UNCHANGED);
5763                 }
5764
5765                 if (topheader != NULL) {
5766                         /*
5767                          * We have an list of rdatasets of the given type,
5768                          * but they're all marked IGNORE.  We simply insert
5769                          * the new rdataset at the head of the list.
5770                          *
5771                          * Ignored rdatasets cannot occur during loading, so
5772                          * we INSIST on it.
5773                          */
5774                         INSIST(!loading);
5775                         INSIST(rbtversion == NULL ||
5776                                rbtversion->serial >= topheader->serial);
5777                         if (topheader_prev != NULL)
5778                                 topheader_prev->next = newheader;
5779                         else
5780                                 rbtnode->data = newheader;
5781                         newheader->next = topheader->next;
5782                         newheader->down = topheader;
5783                         topheader->next = newheader;
5784                         rbtnode->dirty = 1;
5785                         if (changed != NULL)
5786                                 changed->dirty = ISC_TRUE;
5787                 } else {
5788                         /*
5789                          * No rdatasets of the given type exist at the node.
5790                          */
5791                         newheader->next = rbtnode->data;
5792                         newheader->down = NULL;
5793                         rbtnode->data = newheader;
5794                 }
5795                 idx = newheader->node->locknum;
5796                 if (IS_CACHE(rbtdb)) {
5797                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5798                                          newheader, link);
5799                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5800                 } else if (RESIGN(newheader)) {
5801                         resign_insert(rbtdb, idx, newheader);
5802                 }
5803         }
5804
5805         /*
5806          * Check if the node now contains CNAME and other data.
5807          */
5808         if (rbtversion != NULL &&
5809             cname_and_other_data(rbtnode, rbtversion->serial))
5810                 return (DNS_R_CNAMEANDOTHER);
5811
5812         if (addedrdataset != NULL)
5813                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
5814
5815         return (ISC_R_SUCCESS);
5816 }
5817
5818 static inline isc_boolean_t
5819 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
5820                 rbtdb_rdatatype_t type)
5821 {
5822         if (IS_CACHE(rbtdb)) {
5823                 if (type == dns_rdatatype_dname)
5824                         return (ISC_TRUE);
5825                 else
5826                         return (ISC_FALSE);
5827         } else if (type == dns_rdatatype_dname ||
5828                    (type == dns_rdatatype_ns &&
5829                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
5830                 return (ISC_TRUE);
5831         return (ISC_FALSE);
5832 }
5833
5834 static inline isc_result_t
5835 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5836            dns_rdataset_t *rdataset)
5837 {
5838         struct noqname *noqname;
5839         isc_mem_t *mctx = rbtdb->common.mctx;
5840         dns_name_t name;
5841         dns_rdataset_t neg, negsig;
5842         isc_result_t result;
5843         isc_region_t r;
5844
5845         dns_name_init(&name, NULL);
5846         dns_rdataset_init(&neg);
5847         dns_rdataset_init(&negsig);
5848
5849         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
5850         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5851
5852         noqname = isc_mem_get(mctx, sizeof(*noqname));
5853         if (noqname == NULL) {
5854                 result = ISC_R_NOMEMORY;
5855                 goto cleanup;
5856         }
5857         dns_name_init(&noqname->name, NULL);
5858         noqname->neg = NULL;
5859         noqname->negsig = NULL;
5860         noqname->type = neg.type;
5861         result = dns_name_dup(&name, mctx, &noqname->name);
5862         if (result != ISC_R_SUCCESS)
5863                 goto cleanup;
5864         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5865         if (result != ISC_R_SUCCESS)
5866                 goto cleanup;
5867         noqname->neg = r.base;
5868         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5869         if (result != ISC_R_SUCCESS)
5870                 goto cleanup;
5871         noqname->negsig = r.base;
5872         dns_rdataset_disassociate(&neg);
5873         dns_rdataset_disassociate(&negsig);
5874         newheader->noqname = noqname;
5875         return (ISC_R_SUCCESS);
5876
5877 cleanup:
5878         dns_rdataset_disassociate(&neg);
5879         dns_rdataset_disassociate(&negsig);
5880         free_noqname(mctx, &noqname);
5881         return(result);
5882 }
5883
5884 static inline isc_result_t
5885 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
5886            dns_rdataset_t *rdataset)
5887 {
5888         struct noqname *closest;
5889         isc_mem_t *mctx = rbtdb->common.mctx;
5890         dns_name_t name;
5891         dns_rdataset_t neg, negsig;
5892         isc_result_t result;
5893         isc_region_t r;
5894
5895         dns_name_init(&name, NULL);
5896         dns_rdataset_init(&neg);
5897         dns_rdataset_init(&negsig);
5898
5899         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
5900         RUNTIME_CHECK(result == ISC_R_SUCCESS);
5901
5902         closest = isc_mem_get(mctx, sizeof(*closest));
5903         if (closest == NULL) {
5904                 result = ISC_R_NOMEMORY;
5905                 goto cleanup;
5906         }
5907         dns_name_init(&closest->name, NULL);
5908         closest->neg = NULL;
5909         closest->negsig = NULL;
5910         closest->type = neg.type;
5911         result = dns_name_dup(&name, mctx, &closest->name);
5912         if (result != ISC_R_SUCCESS)
5913                 goto cleanup;
5914         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
5915         if (result != ISC_R_SUCCESS)
5916                 goto cleanup;
5917         closest->neg = r.base;
5918         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
5919         if (result != ISC_R_SUCCESS)
5920                 goto cleanup;
5921         closest->negsig = r.base;
5922         dns_rdataset_disassociate(&neg);
5923         dns_rdataset_disassociate(&negsig);
5924         newheader->closest = closest;
5925         return (ISC_R_SUCCESS);
5926
5927  cleanup:
5928         dns_rdataset_disassociate(&neg);
5929         dns_rdataset_disassociate(&negsig);
5930         free_noqname(mctx, &closest);
5931         return(result);
5932 }
5933
5934 static dns_dbmethods_t zone_methods;
5935
5936 static isc_result_t
5937 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5938             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
5939             dns_rdataset_t *addedrdataset)
5940 {
5941         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5942         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5943         rbtdb_version_t *rbtversion = version;
5944         isc_region_t region;
5945         rdatasetheader_t *newheader;
5946         rdatasetheader_t *header;
5947         isc_result_t result;
5948         isc_boolean_t delegating;
5949         isc_boolean_t tree_locked = ISC_FALSE;
5950         isc_boolean_t cache_is_overmem = ISC_FALSE;
5951
5952         REQUIRE(VALID_RBTDB(rbtdb));
5953
5954         if (rbtdb->common.methods == &zone_methods)
5955                 REQUIRE(((rbtnode->nsec3 &&
5956                           (rdataset->type == dns_rdatatype_nsec3 ||
5957                            rdataset->covers == dns_rdatatype_nsec3)) ||
5958                          (!rbtnode->nsec3 &&
5959                            rdataset->type != dns_rdatatype_nsec3 &&
5960                            rdataset->covers != dns_rdatatype_nsec3)));
5961
5962         if (rbtversion == NULL) {
5963                 if (now == 0)
5964                         isc_stdtime_get(&now);
5965         } else
5966                 now = 0;
5967
5968         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
5969                                             &region,
5970                                             sizeof(rdatasetheader_t));
5971         if (result != ISC_R_SUCCESS)
5972                 return (result);
5973
5974         newheader = (rdatasetheader_t *)region.base;
5975         init_rdataset(rbtdb, newheader);
5976         set_ttl(rbtdb, newheader, rdataset->ttl + now);
5977         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
5978                                                 rdataset->covers);
5979         newheader->attributes = 0;
5980         newheader->noqname = NULL;
5981         newheader->closest = NULL;
5982         newheader->count = init_count++;
5983         newheader->trust = rdataset->trust;
5984         newheader->additional_auth = NULL;
5985         newheader->additional_glue = NULL;
5986         newheader->last_used = now;
5987         newheader->node = rbtnode;
5988         if (rbtversion != NULL) {
5989                 newheader->serial = rbtversion->serial;
5990                 now = 0;
5991
5992                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
5993                         newheader->attributes |= RDATASET_ATTR_RESIGN;
5994                         newheader->resign = rdataset->resign;
5995                 } else
5996                         newheader->resign = 0;
5997         } else {
5998                 newheader->serial = 1;
5999                 newheader->resign = 0;
6000                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6001                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6002                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6003                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6004                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6005                         result = addnoqname(rbtdb, newheader, rdataset);
6006                         if (result != ISC_R_SUCCESS) {
6007                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6008                                               newheader);
6009                                 return (result);
6010                         }
6011                 }
6012                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6013                         result = addclosest(rbtdb, newheader, rdataset);
6014                         if (result != ISC_R_SUCCESS) {
6015                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6016                                               newheader);
6017                                 return (result);
6018                         }
6019                 }
6020         }
6021
6022         /*
6023          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6024          * just DNAME for the cache), then we need to set the callback bit
6025          * on the node.
6026          */
6027         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6028                 delegating = ISC_TRUE;
6029         else
6030                 delegating = ISC_FALSE;
6031
6032         /*
6033          * If we're adding a delegation type or the DB is a cache in an overmem
6034          * state, hold an exclusive lock on the tree.  In the latter case
6035          * the lock does not necessarily have to be acquired but it will help
6036          * purge stale entries more effectively.
6037          */
6038         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
6039                 cache_is_overmem = ISC_TRUE;
6040         if (delegating || cache_is_overmem) {
6041                 tree_locked = ISC_TRUE;
6042                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6043         }
6044
6045         if (cache_is_overmem)
6046                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6047
6048         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6049                   isc_rwlocktype_write);
6050
6051         if (rbtdb->rrsetstats != NULL) {
6052                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6053                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6054         }
6055
6056         if (IS_CACHE(rbtdb)) {
6057                 if (tree_locked)
6058                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6059
6060                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6061                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6062                         expire_header(rbtdb, header, tree_locked);
6063
6064                 /*
6065                  * If we've been holding a write lock on the tree just for
6066                  * cleaning, we can release it now.  However, we still need the
6067                  * node lock.
6068                  */
6069                 if (tree_locked && !delegating) {
6070                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6071                         tree_locked = ISC_FALSE;
6072                 }
6073         }
6074
6075         result = add(rbtdb, rbtnode, rbtversion, newheader, options, ISC_FALSE,
6076                      addedrdataset, now);
6077         if (result == ISC_R_SUCCESS && delegating)
6078                 rbtnode->find_callback = 1;
6079
6080         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6081                     isc_rwlocktype_write);
6082
6083         if (tree_locked)
6084                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6085
6086         /*
6087          * Update the zone's secure status.  If version is non-NULL
6088          * this is deferred until closeversion() is called.
6089          */
6090         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6091                 iszonesecure(db, version, rbtdb->origin_node);
6092
6093         return (result);
6094 }
6095
6096 static isc_result_t
6097 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6098                  dns_rdataset_t *rdataset, unsigned int options,
6099                  dns_rdataset_t *newrdataset)
6100 {
6101         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6102         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6103         rbtdb_version_t *rbtversion = version;
6104         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6105         unsigned char *subresult;
6106         isc_region_t region;
6107         isc_result_t result;
6108         rbtdb_changed_t *changed;
6109
6110         REQUIRE(VALID_RBTDB(rbtdb));
6111
6112         if (rbtdb->common.methods == &zone_methods)
6113                 REQUIRE(((rbtnode->nsec3 &&
6114                           (rdataset->type == dns_rdatatype_nsec3 ||
6115                            rdataset->covers == dns_rdatatype_nsec3)) ||
6116                          (!rbtnode->nsec3 &&
6117                            rdataset->type != dns_rdatatype_nsec3 &&
6118                            rdataset->covers != dns_rdatatype_nsec3)));
6119
6120         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6121                                             &region,
6122                                             sizeof(rdatasetheader_t));
6123         if (result != ISC_R_SUCCESS)
6124                 return (result);
6125         newheader = (rdatasetheader_t *)region.base;
6126         init_rdataset(rbtdb, newheader);
6127         set_ttl(rbtdb, newheader, rdataset->ttl);
6128         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6129                                                 rdataset->covers);
6130         newheader->attributes = 0;
6131         newheader->serial = rbtversion->serial;
6132         newheader->trust = 0;
6133         newheader->noqname = NULL;
6134         newheader->closest = NULL;
6135         newheader->count = init_count++;
6136         newheader->additional_auth = NULL;
6137         newheader->additional_glue = NULL;
6138         newheader->last_used = 0;
6139         newheader->node = rbtnode;
6140         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6141                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6142                 newheader->resign = rdataset->resign;
6143         } else
6144                 newheader->resign = 0;
6145
6146         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6147                   isc_rwlocktype_write);
6148
6149         changed = add_changed(rbtdb, rbtversion, rbtnode);
6150         if (changed == NULL) {
6151                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6152                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6153                             isc_rwlocktype_write);
6154                 return (ISC_R_NOMEMORY);
6155         }
6156
6157         topheader_prev = NULL;
6158         for (topheader = rbtnode->data;
6159              topheader != NULL;
6160              topheader = topheader->next) {
6161                 if (topheader->type == newheader->type)
6162                         break;
6163                 topheader_prev = topheader;
6164         }
6165         /*
6166          * If header isn't NULL, we've found the right type.  There may be
6167          * IGNORE rdatasets between the top of the chain and the first real
6168          * data.  We skip over them.
6169          */
6170         header = topheader;
6171         while (header != NULL && IGNORE(header))
6172                 header = header->down;
6173         if (header != NULL && EXISTS(header)) {
6174                 unsigned int flags = 0;
6175                 subresult = NULL;
6176                 result = ISC_R_SUCCESS;
6177                 if ((options & DNS_DBSUB_EXACT) != 0) {
6178                         flags |= DNS_RDATASLAB_EXACT;
6179                         if (newheader->rdh_ttl != header->rdh_ttl)
6180                                 result = DNS_R_NOTEXACT;
6181                 }
6182                 if (result == ISC_R_SUCCESS)
6183                         result = dns_rdataslab_subtract(
6184                                         (unsigned char *)header,
6185                                         (unsigned char *)newheader,
6186                                         (unsigned int)(sizeof(*newheader)),
6187                                         rbtdb->common.mctx,
6188                                         rbtdb->common.rdclass,
6189                                         (dns_rdatatype_t)header->type,
6190                                         flags, &subresult);
6191                 if (result == ISC_R_SUCCESS) {
6192                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6193                         newheader = (rdatasetheader_t *)subresult;
6194                         init_rdataset(rbtdb, newheader);
6195                         /*
6196                          * We have to set the serial since the rdataslab
6197                          * subtraction routine copies the reserved portion of
6198                          * header, not newheader.
6199                          */
6200                         newheader->serial = rbtversion->serial;
6201                         /*
6202                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6203                          * to additional info.  We need to clear these fields
6204                          * to avoid having duplicated references.
6205                          */
6206                         newheader->additional_auth = NULL;
6207                         newheader->additional_glue = NULL;
6208                 } else if (result == DNS_R_NXRRSET) {
6209                         /*
6210                          * This subtraction would remove all of the rdata;
6211                          * add a nonexistent header instead.
6212                          */
6213                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6214                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6215                         if (newheader == NULL) {
6216                                 result = ISC_R_NOMEMORY;
6217                                 goto unlock;
6218                         }
6219                         set_ttl(rbtdb, newheader, 0);
6220                         newheader->type = topheader->type;
6221                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6222                         newheader->trust = 0;
6223                         newheader->serial = rbtversion->serial;
6224                         newheader->noqname = NULL;
6225                         newheader->closest = NULL;
6226                         newheader->count = 0;
6227                         newheader->additional_auth = NULL;
6228                         newheader->additional_glue = NULL;
6229                         newheader->node = rbtnode;
6230                         newheader->resign = 0;
6231                         newheader->last_used = 0;
6232                 } else {
6233                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6234                         goto unlock;
6235                 }
6236
6237                 /*
6238                  * If we're here, we want to link newheader in front of
6239                  * topheader.
6240                  */
6241                 INSIST(rbtversion->serial >= topheader->serial);
6242                 if (topheader_prev != NULL)
6243                         topheader_prev->next = newheader;
6244                 else
6245                         rbtnode->data = newheader;
6246                 newheader->next = topheader->next;
6247                 newheader->down = topheader;
6248                 topheader->next = newheader;
6249                 rbtnode->dirty = 1;
6250                 changed->dirty = ISC_TRUE;
6251         } else {
6252                 /*
6253                  * The rdataset doesn't exist, so we don't need to do anything
6254                  * to satisfy the deletion request.
6255                  */
6256                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6257                 if ((options & DNS_DBSUB_EXACT) != 0)
6258                         result = DNS_R_NOTEXACT;
6259                 else
6260                         result = DNS_R_UNCHANGED;
6261         }
6262
6263         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6264                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6265
6266  unlock:
6267         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6268                     isc_rwlocktype_write);
6269
6270         /*
6271          * Update the zone's secure status.  If version is non-NULL
6272          * this is deferred until closeversion() is called.
6273          */
6274         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6275                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6276
6277         return (result);
6278 }
6279
6280 static isc_result_t
6281 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6282                dns_rdatatype_t type, dns_rdatatype_t covers)
6283 {
6284         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6285         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6286         rbtdb_version_t *rbtversion = version;
6287         isc_result_t result;
6288         rdatasetheader_t *newheader;
6289
6290         REQUIRE(VALID_RBTDB(rbtdb));
6291
6292         if (type == dns_rdatatype_any)
6293                 return (ISC_R_NOTIMPLEMENTED);
6294         if (type == dns_rdatatype_rrsig && covers == 0)
6295                 return (ISC_R_NOTIMPLEMENTED);
6296
6297         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6298         if (newheader == NULL)
6299                 return (ISC_R_NOMEMORY);
6300         set_ttl(rbtdb, newheader, 0);
6301         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6302         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6303         newheader->trust = 0;
6304         newheader->noqname = NULL;
6305         newheader->closest = NULL;
6306         newheader->additional_auth = NULL;
6307         newheader->additional_glue = NULL;
6308         if (rbtversion != NULL)
6309                 newheader->serial = rbtversion->serial;
6310         else
6311                 newheader->serial = 0;
6312         newheader->count = 0;
6313         newheader->last_used = 0;
6314         newheader->node = rbtnode;
6315
6316         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6317                   isc_rwlocktype_write);
6318
6319         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6320                      ISC_FALSE, NULL, 0);
6321
6322         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6323                     isc_rwlocktype_write);
6324
6325         /*
6326          * Update the zone's secure status.  If version is non-NULL
6327          * this is deferred until closeversion() is called.
6328          */
6329         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6330                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6331
6332         return (result);
6333 }
6334
6335 static isc_result_t
6336 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6337         rbtdb_load_t *loadctx = arg;
6338         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6339         dns_rbtnode_t *node;
6340         isc_result_t result;
6341         isc_region_t region;
6342         rdatasetheader_t *newheader;
6343
6344         /*
6345          * This routine does no node locking.  See comments in
6346          * 'load' below for more information on loading and
6347          * locking.
6348          */
6349
6350
6351         /*
6352          * SOA records are only allowed at top of zone.
6353          */
6354         if (rdataset->type == dns_rdatatype_soa &&
6355             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6356                 return (DNS_R_NOTZONETOP);
6357
6358         if (rdataset->type != dns_rdatatype_nsec3 &&
6359             rdataset->covers != dns_rdatatype_nsec3)
6360                 add_empty_wildcards(rbtdb, name);
6361
6362         if (dns_name_iswildcard(name)) {
6363                 /*
6364                  * NS record owners cannot legally be wild cards.
6365                  */
6366                 if (rdataset->type == dns_rdatatype_ns)
6367                         return (DNS_R_INVALIDNS);
6368                 /*
6369                  * NSEC3 record owners cannot legally be wild cards.
6370                  */
6371                 if (rdataset->type == dns_rdatatype_nsec3)
6372                         return (DNS_R_INVALIDNSEC3);
6373                 result = add_wildcard_magic(rbtdb, name);
6374                 if (result != ISC_R_SUCCESS)
6375                         return (result);
6376         }
6377
6378         node = NULL;
6379         if (rdataset->type == dns_rdatatype_nsec3 ||
6380             rdataset->covers == dns_rdatatype_nsec3) {
6381                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6382                 if (result == ISC_R_SUCCESS)
6383                         node->nsec3 = 1;
6384         } else {
6385                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
6386                 if (result == ISC_R_SUCCESS)
6387                         node->nsec3 = 0;
6388         }
6389         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6390                 return (result);
6391         if (result != ISC_R_EXISTS) {
6392                 dns_name_t foundname;
6393                 dns_name_init(&foundname, NULL);
6394                 dns_rbt_namefromnode(node, &foundname);
6395 #ifdef DNS_RBT_USEHASH
6396                 node->locknum = node->hashval % rbtdb->node_lock_count;
6397 #else
6398                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6399                         rbtdb->node_lock_count;
6400 #endif
6401         }
6402
6403         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6404                                             &region,
6405                                             sizeof(rdatasetheader_t));
6406         if (result != ISC_R_SUCCESS)
6407                 return (result);
6408         newheader = (rdatasetheader_t *)region.base;
6409         init_rdataset(rbtdb, newheader);
6410         set_ttl(rbtdb, newheader,
6411                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6412         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6413                                                 rdataset->covers);
6414         newheader->attributes = 0;
6415         newheader->trust = rdataset->trust;
6416         newheader->serial = 1;
6417         newheader->noqname = NULL;
6418         newheader->closest = NULL;
6419         newheader->count = init_count++;
6420         newheader->additional_auth = NULL;
6421         newheader->additional_glue = NULL;
6422         newheader->last_used = 0;
6423         newheader->node = node;
6424         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6425                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6426                 newheader->resign = rdataset->resign;
6427         } else
6428                 newheader->resign = 0;
6429
6430         result = add(rbtdb, node, rbtdb->current_version, newheader,
6431                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6432         if (result == ISC_R_SUCCESS &&
6433             delegating_type(rbtdb, node, rdataset->type))
6434                 node->find_callback = 1;
6435         else if (result == DNS_R_UNCHANGED)
6436                 result = ISC_R_SUCCESS;
6437
6438         return (result);
6439 }
6440
6441 static isc_result_t
6442 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6443         rbtdb_load_t *loadctx;
6444         dns_rbtdb_t *rbtdb;
6445
6446         rbtdb = (dns_rbtdb_t *)db;
6447
6448         REQUIRE(VALID_RBTDB(rbtdb));
6449
6450         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6451         if (loadctx == NULL)
6452                 return (ISC_R_NOMEMORY);
6453
6454         loadctx->rbtdb = rbtdb;
6455         if (IS_CACHE(rbtdb))
6456                 isc_stdtime_get(&loadctx->now);
6457         else
6458                 loadctx->now = 0;
6459
6460         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6461
6462         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6463                 == 0);
6464         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6465
6466         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6467
6468         *addp = loading_addrdataset;
6469         *dbloadp = loadctx;
6470
6471         return (ISC_R_SUCCESS);
6472 }
6473
6474 static isc_result_t
6475 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6476         rbtdb_load_t *loadctx;
6477         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6478
6479         REQUIRE(VALID_RBTDB(rbtdb));
6480         REQUIRE(dbloadp != NULL);
6481         loadctx = *dbloadp;
6482         REQUIRE(loadctx->rbtdb == rbtdb);
6483
6484         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6485
6486         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6487         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6488
6489         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6490         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6491
6492         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6493
6494         /*
6495          * If there's a KEY rdataset at the zone origin containing a
6496          * zone key, we consider the zone secure.
6497          */
6498         if (! IS_CACHE(rbtdb))
6499                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6500
6501         *dbloadp = NULL;
6502
6503         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6504
6505         return (ISC_R_SUCCESS);
6506 }
6507
6508 static isc_result_t
6509 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6510      dns_masterformat_t masterformat) {
6511         dns_rbtdb_t *rbtdb;
6512
6513         rbtdb = (dns_rbtdb_t *)db;
6514
6515         REQUIRE(VALID_RBTDB(rbtdb));
6516
6517         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6518                                  &dns_master_style_default,
6519                                  filename, masterformat));
6520 }
6521
6522 static void
6523 delete_callback(void *data, void *arg) {
6524         dns_rbtdb_t *rbtdb = arg;
6525         rdatasetheader_t *current, *next;
6526         unsigned int locknum;
6527
6528         current = data;
6529         locknum = current->node->locknum;
6530         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6531         while (current != NULL) {
6532                 next = current->next;
6533                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6534                 current = next;
6535         }
6536         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6537 }
6538
6539 static isc_boolean_t
6540 issecure(dns_db_t *db) {
6541         dns_rbtdb_t *rbtdb;
6542         isc_boolean_t secure;
6543
6544         rbtdb = (dns_rbtdb_t *)db;
6545
6546         REQUIRE(VALID_RBTDB(rbtdb));
6547
6548         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6549         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6550         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6551
6552         return (secure);
6553 }
6554
6555 static isc_boolean_t
6556 isdnssec(dns_db_t *db) {
6557         dns_rbtdb_t *rbtdb;
6558         isc_boolean_t dnssec;
6559
6560         rbtdb = (dns_rbtdb_t *)db;
6561
6562         REQUIRE(VALID_RBTDB(rbtdb));
6563
6564         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6565         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6566         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6567
6568         return (dnssec);
6569 }
6570
6571 static unsigned int
6572 nodecount(dns_db_t *db) {
6573         dns_rbtdb_t *rbtdb;
6574         unsigned int count;
6575
6576         rbtdb = (dns_rbtdb_t *)db;
6577
6578         REQUIRE(VALID_RBTDB(rbtdb));
6579
6580         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6581         count = dns_rbt_nodecount(rbtdb->tree);
6582         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6583
6584         return (count);
6585 }
6586
6587 static void
6588 settask(dns_db_t *db, isc_task_t *task) {
6589         dns_rbtdb_t *rbtdb;
6590
6591         rbtdb = (dns_rbtdb_t *)db;
6592
6593         REQUIRE(VALID_RBTDB(rbtdb));
6594
6595         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6596         if (rbtdb->task != NULL)
6597                 isc_task_detach(&rbtdb->task);
6598         if (task != NULL)
6599                 isc_task_attach(task, &rbtdb->task);
6600         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6601 }
6602
6603 static isc_boolean_t
6604 ispersistent(dns_db_t *db) {
6605         UNUSED(db);
6606         return (ISC_FALSE);
6607 }
6608
6609 static isc_result_t
6610 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6611         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6612         dns_rbtnode_t *onode;
6613         isc_result_t result = ISC_R_SUCCESS;
6614
6615         REQUIRE(VALID_RBTDB(rbtdb));
6616         REQUIRE(nodep != NULL && *nodep == NULL);
6617
6618         /* Note that the access to origin_node doesn't require a DB lock */
6619         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6620         if (onode != NULL) {
6621                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6622                 new_reference(rbtdb, onode);
6623                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6624
6625                 *nodep = rbtdb->origin_node;
6626         } else {
6627                 INSIST(IS_CACHE(rbtdb));
6628                 result = ISC_R_NOTFOUND;
6629         }
6630
6631         return (result);
6632 }
6633
6634 static isc_result_t
6635 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6636                    isc_uint8_t *flags, isc_uint16_t *iterations,
6637                    unsigned char *salt, size_t *salt_length)
6638 {
6639         dns_rbtdb_t *rbtdb;
6640         isc_result_t result = ISC_R_NOTFOUND;
6641         rbtdb_version_t *rbtversion = version;
6642
6643         rbtdb = (dns_rbtdb_t *)db;
6644
6645         REQUIRE(VALID_RBTDB(rbtdb));
6646
6647         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6648
6649         if (rbtversion == NULL)
6650                 rbtversion = rbtdb->current_version;
6651
6652         if (rbtversion->havensec3) {
6653                 if (hash != NULL)
6654                         *hash = rbtversion->hash;
6655                 if (salt != NULL && salt_length != NULL) {
6656                         REQUIRE(*salt_length >= rbtversion->salt_length);
6657                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6658                 }
6659                 if (salt_length != NULL)
6660                         *salt_length = rbtversion->salt_length;
6661                 if (iterations != NULL)
6662                         *iterations = rbtversion->iterations;
6663                 if (flags != NULL)
6664                         *flags = rbtversion->flags;
6665                 result = ISC_R_SUCCESS;
6666         }
6667         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6668
6669         return (result);
6670 }
6671
6672 static isc_result_t
6673 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6674         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6675         isc_stdtime_t oldresign;
6676         isc_result_t result = ISC_R_SUCCESS;
6677         rdatasetheader_t *header;
6678
6679         REQUIRE(VALID_RBTDB(rbtdb));
6680         REQUIRE(!IS_CACHE(rbtdb));
6681         REQUIRE(rdataset != NULL);
6682
6683         header = rdataset->private3;
6684         header--;
6685
6686         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6687                   isc_rwlocktype_write);
6688
6689         oldresign = header->resign;
6690         header->resign = resign;
6691         if (header->heap_index != 0) {
6692                 INSIST(RESIGN(header));
6693                 if (resign == 0) {
6694                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6695                                         header->heap_index);
6696                         header->heap_index = 0;
6697                 } else if (resign < oldresign)
6698                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6699                                            header->heap_index);
6700                 else
6701                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6702                                            header->heap_index);
6703         } else if (resign && header->heap_index == 0) {
6704                 header->attributes |= RDATASET_ATTR_RESIGN;
6705                 result = resign_insert(rbtdb, header->node->locknum, header);
6706         }
6707         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6708                     isc_rwlocktype_write);
6709         return (result);
6710 }
6711
6712 static isc_result_t
6713 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
6714                dns_name_t *foundname)
6715 {
6716         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6717         rdatasetheader_t *header = NULL, *this;
6718         unsigned int i;
6719         isc_result_t result = ISC_R_NOTFOUND;
6720         unsigned int locknum;
6721
6722         REQUIRE(VALID_RBTDB(rbtdb));
6723
6724         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
6725
6726         for (i = 0; i < rbtdb->node_lock_count; i++) {
6727                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
6728                 this = isc_heap_element(rbtdb->heaps[i], 1);
6729                 if (this == NULL) {
6730                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6731                                     isc_rwlocktype_read);
6732                         continue;
6733                 }
6734                 if (header == NULL)
6735                         header = this;
6736                 else if (isc_serial_lt(this->resign, header->resign)) {
6737                         locknum = header->node->locknum;
6738                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
6739                                     isc_rwlocktype_read);
6740                         header = this;
6741                 } else
6742                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
6743                                     isc_rwlocktype_read);
6744         }
6745
6746         if (header == NULL)
6747                 goto unlock;
6748
6749         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
6750
6751         if (foundname != NULL)
6752                 dns_rbt_fullnamefromnode(header->node, foundname);
6753
6754         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6755                     isc_rwlocktype_read);
6756
6757         result = ISC_R_SUCCESS;
6758
6759  unlock:
6760         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
6761
6762         return (result);
6763 }
6764
6765 static void
6766 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
6767 {
6768         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
6769         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6770         dns_rbtnode_t *node;
6771         rdatasetheader_t *header;
6772
6773         REQUIRE(VALID_RBTDB(rbtdb));
6774         REQUIRE(rdataset != NULL);
6775         REQUIRE(rbtdb->future_version == rbtversion);
6776         REQUIRE(rbtversion->writer);
6777
6778         node = rdataset->private2;
6779         header = rdataset->private3;
6780         header--;
6781
6782         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6783         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
6784                   isc_rwlocktype_write);
6785         /*
6786          * Delete from heap and save to re-signed list so that it can
6787          * be restored if we backout of this change.
6788          */
6789         new_reference(rbtdb, node);
6790         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
6791         header->heap_index = 0;
6792         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
6793
6794         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
6795                     isc_rwlocktype_write);
6796         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6797 }
6798
6799 static dns_stats_t *
6800 getrrsetstats(dns_db_t *db) {
6801         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6802
6803         REQUIRE(VALID_RBTDB(rbtdb));
6804         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
6805
6806         return (rbtdb->rrsetstats);
6807 }
6808
6809 static dns_dbmethods_t zone_methods = {
6810         attach,
6811         detach,
6812         beginload,
6813         endload,
6814         dump,
6815         currentversion,
6816         newversion,
6817         attachversion,
6818         closeversion,
6819         findnode,
6820         zone_find,
6821         zone_findzonecut,
6822         attachnode,
6823         detachnode,
6824         expirenode,
6825         printnode,
6826         createiterator,
6827         zone_findrdataset,
6828         allrdatasets,
6829         addrdataset,
6830         subtractrdataset,
6831         deleterdataset,
6832         issecure,
6833         nodecount,
6834         ispersistent,
6835         overmem,
6836         settask,
6837         getoriginnode,
6838         NULL,
6839         getnsec3parameters,
6840         findnsec3node,
6841         setsigningtime,
6842         getsigningtime,
6843         resigned,
6844         isdnssec,
6845         NULL
6846 };
6847
6848 static dns_dbmethods_t cache_methods = {
6849         attach,
6850         detach,
6851         beginload,
6852         endload,
6853         dump,
6854         currentversion,
6855         newversion,
6856         attachversion,
6857         closeversion,
6858         findnode,
6859         cache_find,
6860         cache_findzonecut,
6861         attachnode,
6862         detachnode,
6863         expirenode,
6864         printnode,
6865         createiterator,
6866         cache_findrdataset,
6867         allrdatasets,
6868         addrdataset,
6869         subtractrdataset,
6870         deleterdataset,
6871         issecure,
6872         nodecount,
6873         ispersistent,
6874         overmem,
6875         settask,
6876         getoriginnode,
6877         NULL,
6878         NULL,
6879         NULL,
6880         NULL,
6881         NULL,
6882         NULL,
6883         isdnssec,
6884         getrrsetstats
6885 };
6886
6887 isc_result_t
6888 #ifdef DNS_RBTDB_VERSION64
6889 dns_rbtdb64_create
6890 #else
6891 dns_rbtdb_create
6892 #endif
6893                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
6894                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
6895                  void *driverarg, dns_db_t **dbp)
6896 {
6897         dns_rbtdb_t *rbtdb;
6898         isc_result_t result;
6899         int i;
6900         dns_name_t name;
6901         isc_boolean_t (*sooner)(void *, void *);
6902
6903         /* Keep the compiler happy. */
6904         UNUSED(argc);
6905         UNUSED(argv);
6906         UNUSED(driverarg);
6907
6908         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
6909         if (rbtdb == NULL)
6910                 return (ISC_R_NOMEMORY);
6911
6912         memset(rbtdb, '\0', sizeof(*rbtdb));
6913         dns_name_init(&rbtdb->common.origin, NULL);
6914         rbtdb->common.attributes = 0;
6915         if (type == dns_dbtype_cache) {
6916                 rbtdb->common.methods = &cache_methods;
6917                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
6918         } else if (type == dns_dbtype_stub) {
6919                 rbtdb->common.methods = &zone_methods;
6920                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
6921         } else
6922                 rbtdb->common.methods = &zone_methods;
6923         rbtdb->common.rdclass = rdclass;
6924         rbtdb->common.mctx = NULL;
6925
6926         result = RBTDB_INITLOCK(&rbtdb->lock);
6927         if (result != ISC_R_SUCCESS)
6928                 goto cleanup_rbtdb;
6929
6930         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
6931         if (result != ISC_R_SUCCESS)
6932                 goto cleanup_lock;
6933
6934         /*
6935          * Initialize node_lock_count in a generic way to support future
6936          * extension which allows the user to specify this value on creation.
6937          * Note that when specified for a cache DB it must be larger than 1
6938          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
6939          */
6940         if (rbtdb->node_lock_count == 0) {
6941                 if (IS_CACHE(rbtdb))
6942                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
6943                 else
6944                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
6945         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
6946                 result = ISC_R_RANGE;
6947                 goto cleanup_tree_lock;
6948         }
6949         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
6950         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
6951                                         sizeof(rbtdb_nodelock_t));
6952         if (rbtdb->node_locks == NULL) {
6953                 result = ISC_R_NOMEMORY;
6954                 goto cleanup_tree_lock;
6955         }
6956
6957         rbtdb->rrsetstats = NULL;
6958         if (IS_CACHE(rbtdb)) {
6959                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
6960                 if (result != ISC_R_SUCCESS)
6961                         goto cleanup_node_locks;
6962                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
6963                                                sizeof(rdatasetheaderlist_t));
6964                 if (rbtdb->rdatasets == NULL) {
6965                         result = ISC_R_NOMEMORY;
6966                         goto cleanup_rrsetstats;
6967                 }
6968                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6969                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
6970         } else
6971                 rbtdb->rdatasets = NULL;
6972
6973         /*
6974          * Create the heaps.
6975          */
6976         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
6977                                    sizeof(isc_heap_t *));
6978         if (rbtdb->heaps == NULL) {
6979                 result = ISC_R_NOMEMORY;
6980                 goto cleanup_rdatasets;
6981         }
6982         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
6983                 rbtdb->heaps[i] = NULL;
6984         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
6985         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
6986                 result = isc_heap_create(mctx, sooner, set_index, 0,
6987                                          &rbtdb->heaps[i]);
6988                 if (result != ISC_R_SUCCESS)
6989                         goto cleanup_heaps;
6990         }
6991
6992         /*
6993          * Create deadnode lists.
6994          */
6995         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
6996                                        sizeof(rbtnodelist_t));
6997         if (rbtdb->deadnodes == NULL) {
6998                 result = ISC_R_NOMEMORY;
6999                 goto cleanup_heaps;
7000         }
7001         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7002                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7003
7004         rbtdb->active = rbtdb->node_lock_count;
7005
7006         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7007                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7008                 if (result == ISC_R_SUCCESS) {
7009                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7010                         if (result != ISC_R_SUCCESS)
7011                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7012                 }
7013                 if (result != ISC_R_SUCCESS) {
7014                         while (i-- > 0) {
7015                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7016                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7017                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7018                         }
7019                         goto cleanup_deadnodes;
7020                 }
7021                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7022         }
7023
7024         /*
7025          * Attach to the mctx.  The database will persist so long as there
7026          * are references to it, and attaching to the mctx ensures that our
7027          * mctx won't disappear out from under us.
7028          */
7029         isc_mem_attach(mctx, &rbtdb->common.mctx);
7030
7031         /*
7032          * Must be initialized before free_rbtdb() is called.
7033          */
7034         isc_ondestroy_init(&rbtdb->common.ondest);
7035
7036         /*
7037          * Make a copy of the origin name.
7038          */
7039         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7040         if (result != ISC_R_SUCCESS) {
7041                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7042                 return (result);
7043         }
7044
7045         /*
7046          * Make the Red-Black Trees.
7047          */
7048         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7049         if (result != ISC_R_SUCCESS) {
7050                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7051                 return (result);
7052         }
7053
7054         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7055         if (result != ISC_R_SUCCESS) {
7056                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7057                 return (result);
7058         }
7059
7060         /*
7061          * In order to set the node callback bit correctly in zone databases,
7062          * we need to know if the node has the origin name of the zone.
7063          * In loading_addrdataset() we could simply compare the new name
7064          * to the origin name, but this is expensive.  Also, we don't know the
7065          * node name in addrdataset(), so we need another way of knowing the
7066          * zone's top.
7067          *
7068          * We now explicitly create a node for the zone's origin, and then
7069          * we simply remember the node's address.  This is safe, because
7070          * the top-of-zone node can never be deleted, nor can its address
7071          * change.
7072          */
7073         if (!IS_CACHE(rbtdb)) {
7074                 rbtdb->origin_node = NULL;
7075                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7076                                          &rbtdb->origin_node);
7077                 if (result != ISC_R_SUCCESS) {
7078                         INSIST(result != ISC_R_EXISTS);
7079                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7080                         return (result);
7081                 }
7082                 rbtdb->origin_node->nsec3 = 0;
7083                 /*
7084                  * We need to give the origin node the right locknum.
7085                  */
7086                 dns_name_init(&name, NULL);
7087                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7088 #ifdef DNS_RBT_USEHASH
7089                 rbtdb->origin_node->locknum =
7090                         rbtdb->origin_node->hashval %
7091                         rbtdb->node_lock_count;
7092 #else
7093                 rbtdb->origin_node->locknum =
7094                         dns_name_hash(&name, ISC_TRUE) %
7095                         rbtdb->node_lock_count;
7096 #endif
7097         }
7098
7099         /*
7100          * Misc. Initialization.
7101          */
7102         result = isc_refcount_init(&rbtdb->references, 1);
7103         if (result != ISC_R_SUCCESS) {
7104                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7105                 return (result);
7106         }
7107         rbtdb->attributes = 0;
7108         rbtdb->task = NULL;
7109
7110         /*
7111          * Version Initialization.
7112          */
7113         rbtdb->current_serial = 1;
7114         rbtdb->least_serial = 1;
7115         rbtdb->next_serial = 2;
7116         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7117         if (rbtdb->current_version == NULL) {
7118                 isc_refcount_decrement(&rbtdb->references, NULL);
7119                 isc_refcount_destroy(&rbtdb->references);
7120                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7121                 return (ISC_R_NOMEMORY);
7122         }
7123         rbtdb->current_version->secure = dns_db_insecure;
7124         rbtdb->current_version->havensec3 = ISC_FALSE;
7125         rbtdb->current_version->flags = 0;
7126         rbtdb->current_version->iterations = 0;
7127         rbtdb->current_version->hash = 0;
7128         rbtdb->current_version->salt_length = 0;
7129         memset(rbtdb->current_version->salt, 0,
7130                sizeof(rbtdb->current_version->salt));
7131         rbtdb->future_version = NULL;
7132         ISC_LIST_INIT(rbtdb->open_versions);
7133         /*
7134          * Keep the current version in the open list so that list operation
7135          * won't happen in normal lookup operations.
7136          */
7137         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7138
7139         rbtdb->common.magic = DNS_DB_MAGIC;
7140         rbtdb->common.impmagic = RBTDB_MAGIC;
7141
7142         *dbp = (dns_db_t *)rbtdb;
7143
7144         return (ISC_R_SUCCESS);
7145
7146  cleanup_deadnodes:
7147         isc_mem_put(mctx, rbtdb->deadnodes,
7148                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7149
7150  cleanup_heaps:
7151         if (rbtdb->heaps != NULL) {
7152                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7153                         if (rbtdb->heaps[i] != NULL)
7154                                 isc_heap_destroy(&rbtdb->heaps[i]);
7155                 isc_mem_put(mctx, rbtdb->heaps,
7156                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7157         }
7158
7159  cleanup_rdatasets:
7160         if (rbtdb->rdatasets != NULL)
7161                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7162                             sizeof(rdatasetheaderlist_t));
7163  cleanup_rrsetstats:
7164         if (rbtdb->rrsetstats != NULL)
7165                 dns_stats_detach(&rbtdb->rrsetstats);
7166
7167  cleanup_node_locks:
7168         isc_mem_put(mctx, rbtdb->node_locks,
7169                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7170
7171  cleanup_tree_lock:
7172         isc_rwlock_destroy(&rbtdb->tree_lock);
7173
7174  cleanup_lock:
7175         RBTDB_DESTROYLOCK(&rbtdb->lock);
7176
7177  cleanup_rbtdb:
7178         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7179         return (result);
7180 }
7181
7182
7183 /*
7184  * Slabbed Rdataset Methods
7185  */
7186
7187 static void
7188 rdataset_disassociate(dns_rdataset_t *rdataset) {
7189         dns_db_t *db = rdataset->private1;
7190         dns_dbnode_t *node = rdataset->private2;
7191
7192         detachnode(db, &node);
7193 }
7194
7195 static isc_result_t
7196 rdataset_first(dns_rdataset_t *rdataset) {
7197         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7198         unsigned int count;
7199
7200         count = raw[0] * 256 + raw[1];
7201         if (count == 0) {
7202                 rdataset->private5 = NULL;
7203                 return (ISC_R_NOMORE);
7204         }
7205
7206 #if DNS_RDATASET_FIXED
7207         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7208                 raw += 2 + (4 * count);
7209         else
7210 #endif
7211                 raw += 2;
7212
7213         /*
7214          * The privateuint4 field is the number of rdata beyond the
7215          * cursor position, so we decrement the total count by one
7216          * before storing it.
7217          *
7218          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7219          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7220          * to the first entry in the offset table.
7221          */
7222         count--;
7223         rdataset->privateuint4 = count;
7224         rdataset->private5 = raw;
7225
7226         return (ISC_R_SUCCESS);
7227 }
7228
7229 static isc_result_t
7230 rdataset_next(dns_rdataset_t *rdataset) {
7231         unsigned int count;
7232         unsigned int length;
7233         unsigned char *raw;     /* RDATASLAB */
7234
7235         count = rdataset->privateuint4;
7236         if (count == 0)
7237                 return (ISC_R_NOMORE);
7238         count--;
7239         rdataset->privateuint4 = count;
7240
7241         /*
7242          * Skip forward one record (length + 4) or one offset (4).
7243          */
7244         raw = rdataset->private5;
7245 #if DNS_RDATASET_FIXED
7246         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7247 #endif
7248                 length = raw[0] * 256 + raw[1];
7249                 raw += length;
7250 #if DNS_RDATASET_FIXED
7251         }
7252         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7253 #else
7254         rdataset->private5 = raw + 2;           /* length(2) */
7255 #endif
7256
7257         return (ISC_R_SUCCESS);
7258 }
7259
7260 static void
7261 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7262         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7263 #if DNS_RDATASET_FIXED
7264         unsigned int offset;
7265 #endif
7266         unsigned int length;
7267         isc_region_t r;
7268         unsigned int flags = 0;
7269
7270         REQUIRE(raw != NULL);
7271
7272         /*
7273          * Find the start of the record if not already in private5
7274          * then skip the length and order fields.
7275          */
7276 #if DNS_RDATASET_FIXED
7277         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7278                 offset = (raw[0] << 24) + (raw[1] << 16) +
7279                          (raw[2] << 8) + raw[3];
7280                 raw = rdataset->private3;
7281                 raw += offset;
7282         }
7283 #endif
7284         length = raw[0] * 256 + raw[1];
7285 #if DNS_RDATASET_FIXED
7286         raw += 4;
7287 #else
7288         raw += 2;
7289 #endif
7290         if (rdataset->type == dns_rdatatype_rrsig) {
7291                 if (*raw & DNS_RDATASLAB_OFFLINE)
7292                         flags |= DNS_RDATA_OFFLINE;
7293                 length--;
7294                 raw++;
7295         }
7296         r.length = length;
7297         r.base = raw;
7298         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7299         rdata->flags |= flags;
7300 }
7301
7302 static void
7303 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7304         dns_db_t *db = source->private1;
7305         dns_dbnode_t *node = source->private2;
7306         dns_dbnode_t *cloned_node = NULL;
7307
7308         attachnode(db, node, &cloned_node);
7309         *target = *source;
7310
7311         /*
7312          * Reset iterator state.
7313          */
7314         target->privateuint4 = 0;
7315         target->private5 = NULL;
7316 }
7317
7318 static unsigned int
7319 rdataset_count(dns_rdataset_t *rdataset) {
7320         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7321         unsigned int count;
7322
7323         count = raw[0] * 256 + raw[1];
7324
7325         return (count);
7326 }
7327
7328 static isc_result_t
7329 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7330                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7331 {
7332         dns_db_t *db = rdataset->private1;
7333         dns_dbnode_t *node = rdataset->private2;
7334         dns_dbnode_t *cloned_node;
7335         struct noqname *noqname = rdataset->private6;
7336
7337         cloned_node = NULL;
7338         attachnode(db, node, &cloned_node);
7339         nsec->methods = &rdataset_methods;
7340         nsec->rdclass = db->rdclass;
7341         nsec->type = noqname->type;
7342         nsec->covers = 0;
7343         nsec->ttl = rdataset->ttl;
7344         nsec->trust = rdataset->trust;
7345         nsec->private1 = rdataset->private1;
7346         nsec->private2 = rdataset->private2;
7347         nsec->private3 = noqname->neg;
7348         nsec->privateuint4 = 0;
7349         nsec->private5 = NULL;
7350         nsec->private6 = NULL;
7351         nsec->private7 = NULL;
7352
7353         cloned_node = NULL;
7354         attachnode(db, node, &cloned_node);
7355         nsecsig->methods = &rdataset_methods;
7356         nsecsig->rdclass = db->rdclass;
7357         nsecsig->type = dns_rdatatype_rrsig;
7358         nsecsig->covers = noqname->type;
7359         nsecsig->ttl = rdataset->ttl;
7360         nsecsig->trust = rdataset->trust;
7361         nsecsig->private1 = rdataset->private1;
7362         nsecsig->private2 = rdataset->private2;
7363         nsecsig->private3 = noqname->negsig;
7364         nsecsig->privateuint4 = 0;
7365         nsecsig->private5 = NULL;
7366         nsec->private6 = NULL;
7367         nsec->private7 = NULL;
7368
7369         dns_name_clone(&noqname->name, name);
7370
7371         return (ISC_R_SUCCESS);
7372 }
7373
7374 static isc_result_t
7375 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7376                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7377 {
7378         dns_db_t *db = rdataset->private1;
7379         dns_dbnode_t *node = rdataset->private2;
7380         dns_dbnode_t *cloned_node;
7381         struct noqname *closest = rdataset->private7;
7382
7383         cloned_node = NULL;
7384         attachnode(db, node, &cloned_node);
7385         nsec->methods = &rdataset_methods;
7386         nsec->rdclass = db->rdclass;
7387         nsec->type = closest->type;
7388         nsec->covers = 0;
7389         nsec->ttl = rdataset->ttl;
7390         nsec->trust = rdataset->trust;
7391         nsec->private1 = rdataset->private1;
7392         nsec->private2 = rdataset->private2;
7393         nsec->private3 = closest->neg;
7394         nsec->privateuint4 = 0;
7395         nsec->private5 = NULL;
7396         nsec->private6 = NULL;
7397         nsec->private7 = NULL;
7398
7399         cloned_node = NULL;
7400         attachnode(db, node, &cloned_node);
7401         nsecsig->methods = &rdataset_methods;
7402         nsecsig->rdclass = db->rdclass;
7403         nsecsig->type = dns_rdatatype_rrsig;
7404         nsecsig->covers = closest->type;
7405         nsecsig->ttl = rdataset->ttl;
7406         nsecsig->trust = rdataset->trust;
7407         nsecsig->private1 = rdataset->private1;
7408         nsecsig->private2 = rdataset->private2;
7409         nsecsig->private3 = closest->negsig;
7410         nsecsig->privateuint4 = 0;
7411         nsecsig->private5 = NULL;
7412         nsec->private6 = NULL;
7413         nsec->private7 = NULL;
7414
7415         dns_name_clone(&closest->name, name);
7416
7417         return (ISC_R_SUCCESS);
7418 }
7419
7420 static void
7421 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7422         dns_rbtdb_t *rbtdb = rdataset->private1;
7423         dns_rbtnode_t *rbtnode = rdataset->private2;
7424         rdatasetheader_t *header = rdataset->private3;
7425
7426         header--;
7427         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7428                   isc_rwlocktype_write);
7429         header->trust = rdataset->trust = trust;
7430         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7431                   isc_rwlocktype_write);
7432 }
7433
7434 static void
7435 rdataset_expire(dns_rdataset_t *rdataset) {
7436         dns_rbtdb_t *rbtdb = rdataset->private1;
7437         dns_rbtnode_t *rbtnode = rdataset->private2;
7438         rdatasetheader_t *header = rdataset->private3;
7439
7440         header--;
7441         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7442                   isc_rwlocktype_write);
7443         expire_header(rbtdb, header, ISC_FALSE);
7444         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7445                   isc_rwlocktype_write);
7446 }
7447
7448 /*
7449  * Rdataset Iterator Methods
7450  */
7451
7452 static void
7453 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7454         rbtdb_rdatasetiter_t *rbtiterator;
7455
7456         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7457
7458         if (rbtiterator->common.version != NULL)
7459                 closeversion(rbtiterator->common.db,
7460                              &rbtiterator->common.version, ISC_FALSE);
7461         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7462         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7463                     sizeof(*rbtiterator));
7464
7465         *iteratorp = NULL;
7466 }
7467
7468 static isc_result_t
7469 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7470         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7471         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7472         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7473         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7474         rdatasetheader_t *header, *top_next;
7475         rbtdb_serial_t serial;
7476         isc_stdtime_t now;
7477
7478         if (IS_CACHE(rbtdb)) {
7479                 serial = 1;
7480                 now = rbtiterator->common.now;
7481         } else {
7482                 serial = rbtversion->serial;
7483                 now = 0;
7484         }
7485
7486         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7487                   isc_rwlocktype_read);
7488
7489         for (header = rbtnode->data; header != NULL; header = top_next) {
7490                 top_next = header->next;
7491                 do {
7492                         if (header->serial <= serial && !IGNORE(header)) {
7493                                 /*
7494                                  * Is this a "this rdataset doesn't exist"
7495                                  * record?  Or is it too old in the cache?
7496                                  *
7497                                  * Note: unlike everywhere else, we
7498                                  * check for now > header->rdh_ttl instead
7499                                  * of now >= header->rdh_ttl.  This allows
7500                                  * ANY and RRSIG queries for 0 TTL
7501                                  * rdatasets to work.
7502                                  */
7503                                 if (NONEXISTENT(header) ||
7504                                     (now != 0 && now > header->rdh_ttl))
7505                                         header = NULL;
7506                                 break;
7507                         } else
7508                                 header = header->down;
7509                 } while (header != NULL);
7510                 if (header != NULL)
7511                         break;
7512         }
7513
7514         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7515                     isc_rwlocktype_read);
7516
7517         rbtiterator->current = header;
7518
7519         if (header == NULL)
7520                 return (ISC_R_NOMORE);
7521
7522         return (ISC_R_SUCCESS);
7523 }
7524
7525 static isc_result_t
7526 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7527         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7528         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7529         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7530         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7531         rdatasetheader_t *header, *top_next;
7532         rbtdb_serial_t serial;
7533         isc_stdtime_t now;
7534         rbtdb_rdatatype_t type, negtype;
7535         dns_rdatatype_t rdtype, covers;
7536
7537         header = rbtiterator->current;
7538         if (header == NULL)
7539                 return (ISC_R_NOMORE);
7540
7541         if (IS_CACHE(rbtdb)) {
7542                 serial = 1;
7543                 now = rbtiterator->common.now;
7544         } else {
7545                 serial = rbtversion->serial;
7546                 now = 0;
7547         }
7548
7549         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7550                   isc_rwlocktype_read);
7551
7552         type = header->type;
7553         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7554         if (rdtype == 0) {
7555                 covers = RBTDB_RDATATYPE_EXT(header->type);
7556                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7557         } else
7558                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7559         for (header = header->next; header != NULL; header = top_next) {
7560                 top_next = header->next;
7561                 /*
7562                  * If not walking back up the down list.
7563                  */
7564                 if (header->type != type && header->type != negtype) {
7565                         do {
7566                                 if (header->serial <= serial &&
7567                                     !IGNORE(header)) {
7568                                         /*
7569                                          * Is this a "this rdataset doesn't
7570                                          * exist" record?
7571                                          *
7572                                          * Note: unlike everywhere else, we
7573                                          * check for now > header->ttl instead
7574                                          * of now >= header->ttl.  This allows
7575                                          * ANY and RRSIG queries for 0 TTL
7576                                          * rdatasets to work.
7577                                          */
7578                                         if ((header->attributes &
7579                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7580                                             (now != 0 && now > header->rdh_ttl))
7581                                                 header = NULL;
7582                                         break;
7583                                 } else
7584                                         header = header->down;
7585                         } while (header != NULL);
7586                         if (header != NULL)
7587                                 break;
7588                 }
7589         }
7590
7591         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7592                     isc_rwlocktype_read);
7593
7594         rbtiterator->current = header;
7595
7596         if (header == NULL)
7597                 return (ISC_R_NOMORE);
7598
7599         return (ISC_R_SUCCESS);
7600 }
7601
7602 static void
7603 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7604         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7605         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7606         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7607         rdatasetheader_t *header;
7608
7609         header = rbtiterator->current;
7610         REQUIRE(header != NULL);
7611
7612         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7613                   isc_rwlocktype_read);
7614
7615         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7616                       rdataset);
7617
7618         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7619                     isc_rwlocktype_read);
7620 }
7621
7622
7623 /*
7624  * Database Iterator Methods
7625  */
7626
7627 static inline void
7628 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7629         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7630         dns_rbtnode_t *node = rbtdbiter->node;
7631
7632         if (node == NULL)
7633                 return;
7634
7635         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7636         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7637 }
7638
7639 static inline void
7640 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7641         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7642         dns_rbtnode_t *node = rbtdbiter->node;
7643         nodelock_t *lock;
7644
7645         if (node == NULL)
7646                 return;
7647
7648         lock = &rbtdb->node_locks[node->locknum].lock;
7649         NODE_LOCK(lock, isc_rwlocktype_read);
7650         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7651                             rbtdbiter->tree_locked, ISC_FALSE);
7652         NODE_UNLOCK(lock, isc_rwlocktype_read);
7653
7654         rbtdbiter->node = NULL;
7655 }
7656
7657 static void
7658 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7659         dns_rbtnode_t *node;
7660         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7661         isc_boolean_t was_read_locked = ISC_FALSE;
7662         nodelock_t *lock;
7663         int i;
7664
7665         if (rbtdbiter->delete != 0) {
7666                 /*
7667                  * Note that "%d node of %d in tree" can report things like
7668                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7669                  * That some nodes appear on the deletions list more than
7670                  * once.  Only the last occurence will actually be deleted.
7671                  */
7672                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7673                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7674                               "flush_deletions: %d nodes of %d in tree",
7675                               rbtdbiter->delete,
7676                               dns_rbt_nodecount(rbtdb->tree));
7677
7678                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7679                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7680                         was_read_locked = ISC_TRUE;
7681                 }
7682                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7683                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7684
7685                 for (i = 0; i < rbtdbiter->delete; i++) {
7686                         node = rbtdbiter->deletions[i];
7687                         lock = &rbtdb->node_locks[node->locknum].lock;
7688
7689                         NODE_LOCK(lock, isc_rwlocktype_read);
7690                         decrement_reference(rbtdb, node, 0,
7691                                             isc_rwlocktype_read,
7692                                             rbtdbiter->tree_locked, ISC_FALSE);
7693                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7694                 }
7695
7696                 rbtdbiter->delete = 0;
7697
7698                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7699                 if (was_read_locked) {
7700                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7701                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7702
7703                 } else {
7704                         rbtdbiter->tree_locked = isc_rwlocktype_none;
7705                 }
7706         }
7707 }
7708
7709 static inline void
7710 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
7711         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7712
7713         REQUIRE(rbtdbiter->paused);
7714         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
7715
7716         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7717         rbtdbiter->tree_locked = isc_rwlocktype_read;
7718
7719         rbtdbiter->paused = ISC_FALSE;
7720 }
7721
7722 static void
7723 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
7724         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
7725         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7726         dns_db_t *db = NULL;
7727
7728         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7729                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7730                 rbtdbiter->tree_locked = isc_rwlocktype_none;
7731         } else
7732                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
7733
7734         dereference_iter_node(rbtdbiter);
7735
7736         flush_deletions(rbtdbiter);
7737
7738         dns_db_attach(rbtdbiter->common.db, &db);
7739         dns_db_detach(&rbtdbiter->common.db);
7740
7741         dns_rbtnodechain_reset(&rbtdbiter->chain);
7742         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7743         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
7744         dns_db_detach(&db);
7745
7746         *iteratorp = NULL;
7747 }
7748
7749 static isc_result_t
7750 dbiterator_first(dns_dbiterator_t *iterator) {
7751         isc_result_t result;
7752         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7753         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7754         dns_name_t *name, *origin;
7755
7756         if (rbtdbiter->result != ISC_R_SUCCESS &&
7757             rbtdbiter->result != ISC_R_NOMORE)
7758                 return (rbtdbiter->result);
7759
7760         if (rbtdbiter->paused)
7761                 resume_iteration(rbtdbiter);
7762
7763         dereference_iter_node(rbtdbiter);
7764
7765         name = dns_fixedname_name(&rbtdbiter->name);
7766         origin = dns_fixedname_name(&rbtdbiter->origin);
7767         dns_rbtnodechain_reset(&rbtdbiter->chain);
7768         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7769
7770         if (rbtdbiter->nsec3only) {
7771                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7772                 result = dns_rbtnodechain_first(rbtdbiter->current,
7773                                                 rbtdb->nsec3, name, origin);
7774         } else {
7775                 rbtdbiter->current = &rbtdbiter->chain;
7776                 result = dns_rbtnodechain_first(rbtdbiter->current,
7777                                                 rbtdb->tree, name, origin);
7778                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
7779                         rbtdbiter->current = &rbtdbiter->nsec3chain;
7780                         result = dns_rbtnodechain_first(rbtdbiter->current,
7781                                                         rbtdb->nsec3, name,
7782                                                         origin);
7783                 }
7784         }
7785         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7786                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7787                                                   NULL, &rbtdbiter->node);
7788                 if (result == ISC_R_SUCCESS) {
7789                         rbtdbiter->new_origin = ISC_TRUE;
7790                         reference_iter_node(rbtdbiter);
7791                 }
7792         } else {
7793                 INSIST(result == ISC_R_NOTFOUND);
7794                 result = ISC_R_NOMORE; /* The tree is empty. */
7795         }
7796
7797         rbtdbiter->result = result;
7798
7799         return (result);
7800 }
7801
7802 static isc_result_t
7803 dbiterator_last(dns_dbiterator_t *iterator) {
7804         isc_result_t result;
7805         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7806         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7807         dns_name_t *name, *origin;
7808
7809         if (rbtdbiter->result != ISC_R_SUCCESS &&
7810             rbtdbiter->result != ISC_R_NOMORE)
7811                 return (rbtdbiter->result);
7812
7813         if (rbtdbiter->paused)
7814                 resume_iteration(rbtdbiter);
7815
7816         dereference_iter_node(rbtdbiter);
7817
7818         name = dns_fixedname_name(&rbtdbiter->name);
7819         origin = dns_fixedname_name(&rbtdbiter->origin);
7820         dns_rbtnodechain_reset(&rbtdbiter->chain);
7821         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7822
7823         result = ISC_R_NOTFOUND;
7824         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
7825                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7826                 result = dns_rbtnodechain_last(rbtdbiter->current,
7827                                                rbtdb->nsec3, name, origin);
7828         }
7829         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
7830                 rbtdbiter->current = &rbtdbiter->chain;
7831                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7832                                                name, origin);
7833         }
7834         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
7835                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7836                                                   NULL, &rbtdbiter->node);
7837                 if (result == ISC_R_SUCCESS) {
7838                         rbtdbiter->new_origin = ISC_TRUE;
7839                         reference_iter_node(rbtdbiter);
7840                 }
7841         } else {
7842                 INSIST(result == ISC_R_NOTFOUND);
7843                 result = ISC_R_NOMORE; /* The tree is empty. */
7844         }
7845
7846         rbtdbiter->result = result;
7847
7848         return (result);
7849 }
7850
7851 static isc_result_t
7852 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
7853         isc_result_t result;
7854         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7855         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7856         dns_name_t *iname, *origin;
7857
7858         if (rbtdbiter->result != ISC_R_SUCCESS &&
7859             rbtdbiter->result != ISC_R_NOTFOUND &&
7860             rbtdbiter->result != ISC_R_NOMORE)
7861                 return (rbtdbiter->result);
7862
7863         if (rbtdbiter->paused)
7864                 resume_iteration(rbtdbiter);
7865
7866         dereference_iter_node(rbtdbiter);
7867
7868         iname = dns_fixedname_name(&rbtdbiter->name);
7869         origin = dns_fixedname_name(&rbtdbiter->origin);
7870         dns_rbtnodechain_reset(&rbtdbiter->chain);
7871         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
7872
7873         if (rbtdbiter->nsec3only) {
7874                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7875                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7876                                           &rbtdbiter->node,
7877                                           rbtdbiter->current,
7878                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7879         } else if (rbtdbiter->nonsec3) {
7880                 rbtdbiter->current = &rbtdbiter->chain;
7881                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7882                                           &rbtdbiter->node,
7883                                           rbtdbiter->current,
7884                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7885         } else {
7886                 /*
7887                  * Stay on main chain if not found on either chain.
7888                  */
7889                 rbtdbiter->current = &rbtdbiter->chain;
7890                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
7891                                           &rbtdbiter->node,
7892                                           rbtdbiter->current,
7893                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
7894                 if (result == DNS_R_PARTIALMATCH) {
7895                         dns_rbtnode_t *node = NULL;
7896                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
7897                                                   &node, &rbtdbiter->nsec3chain,
7898                                                   DNS_RBTFIND_EMPTYDATA,
7899                                                   NULL, NULL);
7900                         if (result == ISC_R_SUCCESS) {
7901                                 rbtdbiter->node = node;
7902                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
7903                         }
7904                 }
7905         }
7906
7907 #if 1
7908         if (result == ISC_R_SUCCESS) {
7909                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
7910                                                   origin, NULL);
7911                 if (result == ISC_R_SUCCESS) {
7912                         rbtdbiter->new_origin = ISC_TRUE;
7913                         reference_iter_node(rbtdbiter);
7914                 }
7915         } else if (result == DNS_R_PARTIALMATCH) {
7916                 result = ISC_R_NOTFOUND;
7917                 rbtdbiter->node = NULL;
7918         }
7919
7920         rbtdbiter->result = result;
7921 #else
7922         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7923                 isc_result_t tresult;
7924                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
7925                                                    origin, NULL);
7926                 if (tresult == ISC_R_SUCCESS) {
7927                         rbtdbiter->new_origin = ISC_TRUE;
7928                         reference_iter_node(rbtdbiter);
7929                 } else {
7930                         result = tresult;
7931                         rbtdbiter->node = NULL;
7932                 }
7933         } else
7934                 rbtdbiter->node = NULL;
7935
7936         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
7937                             ISC_R_SUCCESS : result;
7938 #endif
7939
7940         return (result);
7941 }
7942
7943 static isc_result_t
7944 dbiterator_prev(dns_dbiterator_t *iterator) {
7945         isc_result_t result;
7946         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7947         dns_name_t *name, *origin;
7948         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7949
7950         REQUIRE(rbtdbiter->node != NULL);
7951
7952         if (rbtdbiter->result != ISC_R_SUCCESS)
7953                 return (rbtdbiter->result);
7954
7955         if (rbtdbiter->paused)
7956                 resume_iteration(rbtdbiter);
7957
7958         name = dns_fixedname_name(&rbtdbiter->name);
7959         origin = dns_fixedname_name(&rbtdbiter->origin);
7960         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
7961         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
7962             !rbtdbiter->nonsec3 &&
7963             &rbtdbiter->nsec3chain == rbtdbiter->current) {
7964                 rbtdbiter->current = &rbtdbiter->chain;
7965                 dns_rbtnodechain_reset(rbtdbiter->current);
7966                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
7967                                                name, origin);
7968                 if (result == ISC_R_NOTFOUND)
7969                         result = ISC_R_NOMORE;
7970         }
7971
7972         dereference_iter_node(rbtdbiter);
7973
7974         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
7975                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
7976                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
7977                                                   NULL, &rbtdbiter->node);
7978         }
7979
7980         if (result == ISC_R_SUCCESS)
7981                 reference_iter_node(rbtdbiter);
7982
7983         rbtdbiter->result = result;
7984
7985         return (result);
7986 }
7987
7988 static isc_result_t
7989 dbiterator_next(dns_dbiterator_t *iterator) {
7990         isc_result_t result;
7991         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
7992         dns_name_t *name, *origin;
7993         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
7994
7995         REQUIRE(rbtdbiter->node != NULL);
7996
7997         if (rbtdbiter->result != ISC_R_SUCCESS)
7998                 return (rbtdbiter->result);
7999
8000         if (rbtdbiter->paused)
8001                 resume_iteration(rbtdbiter);
8002
8003         name = dns_fixedname_name(&rbtdbiter->name);
8004         origin = dns_fixedname_name(&rbtdbiter->origin);
8005         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8006         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8007             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8008                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8009                 dns_rbtnodechain_reset(rbtdbiter->current);
8010                 result = dns_rbtnodechain_first(rbtdbiter->current,
8011                                                 rbtdb->nsec3, name, origin);
8012                 if (result == ISC_R_NOTFOUND)
8013                         result = ISC_R_NOMORE;
8014         }
8015
8016         dereference_iter_node(rbtdbiter);
8017
8018         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8019                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8020                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8021                                                   NULL, &rbtdbiter->node);
8022         }
8023         if (result == ISC_R_SUCCESS)
8024                 reference_iter_node(rbtdbiter);
8025
8026         rbtdbiter->result = result;
8027
8028         return (result);
8029 }
8030
8031 static isc_result_t
8032 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8033                    dns_name_t *name)
8034 {
8035         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8036         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8037         dns_rbtnode_t *node = rbtdbiter->node;
8038         isc_result_t result;
8039         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8040         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8041
8042         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8043         REQUIRE(rbtdbiter->node != NULL);
8044
8045         if (rbtdbiter->paused)
8046                 resume_iteration(rbtdbiter);
8047
8048         if (name != NULL) {
8049                 if (rbtdbiter->common.relative_names)
8050                         origin = NULL;
8051                 result = dns_name_concatenate(nodename, origin, name, NULL);
8052                 if (result != ISC_R_SUCCESS)
8053                         return (result);
8054                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8055                         result = DNS_R_NEWORIGIN;
8056         } else
8057                 result = ISC_R_SUCCESS;
8058
8059         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8060         new_reference(rbtdb, node);
8061         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8062
8063         *nodep = rbtdbiter->node;
8064
8065         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8066                 isc_result_t expire_result;
8067
8068                 /*
8069                  * If the deletion array is full, flush it before trying
8070                  * to expire the current node.  The current node can't
8071                  * fully deleted while the iteration cursor is still on it.
8072                  */
8073                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8074                         flush_deletions(rbtdbiter);
8075
8076                 expire_result = expirenode(iterator->db, *nodep, 0);
8077
8078                 /*
8079                  * expirenode() currently always returns success.
8080                  */
8081                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8082                         unsigned int refs;
8083
8084                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8085                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8086                         dns_rbtnode_refincrement(node, &refs);
8087                         INSIST(refs != 0);
8088                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8089                 }
8090         }
8091
8092         return (result);
8093 }
8094
8095 static isc_result_t
8096 dbiterator_pause(dns_dbiterator_t *iterator) {
8097         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8098         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8099
8100         if (rbtdbiter->result != ISC_R_SUCCESS &&
8101             rbtdbiter->result != ISC_R_NOMORE)
8102                 return (rbtdbiter->result);
8103
8104         if (rbtdbiter->paused)
8105                 return (ISC_R_SUCCESS);
8106
8107         rbtdbiter->paused = ISC_TRUE;
8108
8109         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8110                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8111                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8112                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8113         }
8114
8115         flush_deletions(rbtdbiter);
8116
8117         return (ISC_R_SUCCESS);
8118 }
8119
8120 static isc_result_t
8121 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8122         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8123         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8124
8125         if (rbtdbiter->result != ISC_R_SUCCESS)
8126                 return (rbtdbiter->result);
8127
8128         return (dns_name_copy(origin, name, NULL));
8129 }
8130
8131 /*%
8132  * Additional cache routines.
8133  */
8134 static isc_result_t
8135 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8136                        dns_rdatatype_t qtype, dns_acache_t *acache,
8137                        dns_zone_t **zonep, dns_db_t **dbp,
8138                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8139                        dns_name_t *fname, dns_message_t *msg,
8140                        isc_stdtime_t now)
8141 {
8142         dns_rbtdb_t *rbtdb = rdataset->private1;
8143         dns_rbtnode_t *rbtnode = rdataset->private2;
8144         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8145         unsigned int current_count = rdataset->privateuint4;
8146         unsigned int count;
8147         rdatasetheader_t *header;
8148         nodelock_t *nodelock;
8149         unsigned int total_count;
8150         acachectl_t *acarray;
8151         dns_acacheentry_t *entry;
8152         isc_result_t result;
8153
8154         UNUSED(qtype); /* we do not use this value at least for now */
8155         UNUSED(acache);
8156
8157         header = (struct rdatasetheader *)(raw - sizeof(*header));
8158
8159         total_count = raw[0] * 256 + raw[1];
8160         INSIST(total_count > current_count);
8161         count = total_count - current_count - 1;
8162
8163         acarray = NULL;
8164
8165         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8166         NODE_LOCK(nodelock, isc_rwlocktype_read);
8167
8168         switch (type) {
8169         case dns_rdatasetadditional_fromauth:
8170                 acarray = header->additional_auth;
8171                 break;
8172         case dns_rdatasetadditional_fromcache:
8173                 acarray = NULL;
8174                 break;
8175         case dns_rdatasetadditional_fromglue:
8176                 acarray = header->additional_glue;
8177                 break;
8178         default:
8179                 INSIST(0);
8180         }
8181
8182         if (acarray == NULL) {
8183                 if (type != dns_rdatasetadditional_fromcache)
8184                         dns_acache_countquerymiss(acache);
8185                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8186                 return (ISC_R_NOTFOUND);
8187         }
8188
8189         if (acarray[count].entry == NULL) {
8190                 dns_acache_countquerymiss(acache);
8191                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8192                 return (ISC_R_NOTFOUND);
8193         }
8194
8195         entry = NULL;
8196         dns_acache_attachentry(acarray[count].entry, &entry);
8197
8198         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8199
8200         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8201                                      nodep, fname, msg, now);
8202
8203         dns_acache_detachentry(&entry);
8204
8205         return (result);
8206 }
8207
8208 static void
8209 acache_callback(dns_acacheentry_t *entry, void **arg) {
8210         dns_rbtdb_t *rbtdb;
8211         dns_rbtnode_t *rbtnode;
8212         nodelock_t *nodelock;
8213         acachectl_t *acarray = NULL;
8214         acache_cbarg_t *cbarg;
8215         unsigned int count;
8216
8217         REQUIRE(arg != NULL);
8218         cbarg = *arg;
8219
8220         /*
8221          * The caller must hold the entry lock.
8222          */
8223
8224         rbtdb = (dns_rbtdb_t *)cbarg->db;
8225         rbtnode = (dns_rbtnode_t *)cbarg->node;
8226
8227         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8228         NODE_LOCK(nodelock, isc_rwlocktype_write);
8229
8230         switch (cbarg->type) {
8231         case dns_rdatasetadditional_fromauth:
8232                 acarray = cbarg->header->additional_auth;
8233                 break;
8234         case dns_rdatasetadditional_fromglue:
8235                 acarray = cbarg->header->additional_glue;
8236                 break;
8237         default:
8238                 INSIST(0);
8239         }
8240
8241         count = cbarg->count;
8242         if (acarray != NULL && acarray[count].entry == entry) {
8243                 acarray[count].entry = NULL;
8244                 INSIST(acarray[count].cbarg == cbarg);
8245                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8246                 acarray[count].cbarg = NULL;
8247         } else
8248                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8249
8250         dns_acache_detachentry(&entry);
8251
8252         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8253
8254         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8255         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8256
8257         *arg = NULL;
8258 }
8259
8260 static void
8261 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8262                       acache_cbarg_t **cbargp)
8263 {
8264         acache_cbarg_t *cbarg;
8265
8266         REQUIRE(mctx != NULL);
8267         REQUIRE(entry != NULL);
8268         REQUIRE(cbargp != NULL && *cbargp != NULL);
8269
8270         cbarg = *cbargp;
8271
8272         dns_acache_cancelentry(entry);
8273         dns_db_detachnode(cbarg->db, &cbarg->node);
8274         dns_db_detach(&cbarg->db);
8275
8276         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8277
8278         *cbargp = NULL;
8279 }
8280
8281 static isc_result_t
8282 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8283                        dns_rdatatype_t qtype, dns_acache_t *acache,
8284                        dns_zone_t *zone, dns_db_t *db,
8285                        dns_dbversion_t *version, dns_dbnode_t *node,
8286                        dns_name_t *fname)
8287 {
8288         dns_rbtdb_t *rbtdb = rdataset->private1;
8289         dns_rbtnode_t *rbtnode = rdataset->private2;
8290         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8291         unsigned int current_count = rdataset->privateuint4;
8292         rdatasetheader_t *header;
8293         unsigned int total_count, count;
8294         nodelock_t *nodelock;
8295         isc_result_t result;
8296         acachectl_t *acarray;
8297         dns_acacheentry_t *newentry, *oldentry = NULL;
8298         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8299
8300         UNUSED(qtype);
8301
8302         if (type == dns_rdatasetadditional_fromcache)
8303                 return (ISC_R_SUCCESS);
8304
8305         header = (struct rdatasetheader *)(raw - sizeof(*header));
8306
8307         total_count = raw[0] * 256 + raw[1];
8308         INSIST(total_count > current_count);
8309         count = total_count - current_count - 1; /* should be private data */
8310
8311         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8312         if (newcbarg == NULL)
8313                 return (ISC_R_NOMEMORY);
8314         newcbarg->type = type;
8315         newcbarg->count = count;
8316         newcbarg->header = header;
8317         newcbarg->db = NULL;
8318         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8319         newcbarg->node = NULL;
8320         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8321                           &newcbarg->node);
8322         newentry = NULL;
8323         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8324                                         acache_callback, newcbarg, &newentry);
8325         if (result != ISC_R_SUCCESS)
8326                 goto fail;
8327         /* Set cache data in the new entry. */
8328         result = dns_acache_setentry(acache, newentry, zone, db,
8329                                      version, node, fname);
8330         if (result != ISC_R_SUCCESS)
8331                 goto fail;
8332
8333         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8334         NODE_LOCK(nodelock, isc_rwlocktype_write);
8335
8336         acarray = NULL;
8337         switch (type) {
8338         case dns_rdatasetadditional_fromauth:
8339                 acarray = header->additional_auth;
8340                 break;
8341         case dns_rdatasetadditional_fromglue:
8342                 acarray = header->additional_glue;
8343                 break;
8344         default:
8345                 INSIST(0);
8346         }
8347
8348         if (acarray == NULL) {
8349                 unsigned int i;
8350
8351                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8352                                       sizeof(acachectl_t));
8353
8354                 if (acarray == NULL) {
8355                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8356                         goto fail;
8357                 }
8358
8359                 for (i = 0; i < total_count; i++) {
8360                         acarray[i].entry = NULL;
8361                         acarray[i].cbarg = NULL;
8362                 }
8363         }
8364         switch (type) {
8365         case dns_rdatasetadditional_fromauth:
8366                 header->additional_auth = acarray;
8367                 break;
8368         case dns_rdatasetadditional_fromglue:
8369                 header->additional_glue = acarray;
8370                 break;
8371         default:
8372                 INSIST(0);
8373         }
8374
8375         if (acarray[count].entry != NULL) {
8376                 /*
8377                  * Swap the entry.  Delay cleaning-up the old entry since
8378                  * it would require a node lock.
8379                  */
8380                 oldentry = acarray[count].entry;
8381                 INSIST(acarray[count].cbarg != NULL);
8382                 oldcbarg = acarray[count].cbarg;
8383         }
8384         acarray[count].entry = newentry;
8385         acarray[count].cbarg = newcbarg;
8386
8387         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8388
8389         if (oldentry != NULL) {
8390                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8391                 dns_acache_detachentry(&oldentry);
8392         }
8393
8394         return (ISC_R_SUCCESS);
8395
8396  fail:
8397         if (newcbarg != NULL) {
8398                 if (newentry != NULL) {
8399                         acache_cancelentry(rbtdb->common.mctx, newentry,
8400                                            &newcbarg);
8401                         dns_acache_detachentry(&newentry);
8402                 } else {
8403                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8404                         dns_db_detach(&newcbarg->db);
8405                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8406                             sizeof(*newcbarg));
8407                 }
8408         }
8409
8410         return (result);
8411 }
8412
8413 static isc_result_t
8414 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8415                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8416 {
8417         dns_rbtdb_t *rbtdb = rdataset->private1;
8418         dns_rbtnode_t *rbtnode = rdataset->private2;
8419         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8420         unsigned int current_count = rdataset->privateuint4;
8421         rdatasetheader_t *header;
8422         nodelock_t *nodelock;
8423         unsigned int total_count, count;
8424         acachectl_t *acarray;
8425         dns_acacheentry_t *entry;
8426         acache_cbarg_t *cbarg;
8427
8428         UNUSED(qtype);          /* we do not use this value at least for now */
8429         UNUSED(acache);
8430
8431         if (type == dns_rdatasetadditional_fromcache)
8432                 return (ISC_R_SUCCESS);
8433
8434         header = (struct rdatasetheader *)(raw - sizeof(*header));
8435
8436         total_count = raw[0] * 256 + raw[1];
8437         INSIST(total_count > current_count);
8438         count = total_count - current_count - 1;
8439
8440         acarray = NULL;
8441         entry = NULL;
8442
8443         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8444         NODE_LOCK(nodelock, isc_rwlocktype_write);
8445
8446         switch (type) {
8447         case dns_rdatasetadditional_fromauth:
8448                 acarray = header->additional_auth;
8449                 break;
8450         case dns_rdatasetadditional_fromglue:
8451                 acarray = header->additional_glue;
8452                 break;
8453         default:
8454                 INSIST(0);
8455         }
8456
8457         if (acarray == NULL) {
8458                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8459                 return (ISC_R_NOTFOUND);
8460         }
8461
8462         entry = acarray[count].entry;
8463         if (entry == NULL) {
8464                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8465                 return (ISC_R_NOTFOUND);
8466         }
8467
8468         acarray[count].entry = NULL;
8469         cbarg = acarray[count].cbarg;
8470         acarray[count].cbarg = NULL;
8471
8472         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8473
8474         if (entry != NULL) {
8475                 if (cbarg != NULL)
8476                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8477                 dns_acache_detachentry(&entry);
8478         }
8479
8480         return (ISC_R_SUCCESS);
8481 }
8482
8483 /*%
8484  * Routines for LRU-based cache management.
8485  */
8486
8487 /*%
8488  * See if a given cache entry that is being reused needs to be updated
8489  * in the LRU-list.  From the LRU management point of view, this function is
8490  * expected to return true for almost all cases.  When used with threads,
8491  * however, this may cause a non-negligible performance penalty because a
8492  * writer lock will have to be acquired before updating the list.
8493  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8494  * function returns true if the entry has not been updated for some period of
8495  * time.  We differentiate the NS or glue address case and the others since
8496  * experiments have shown that the former tends to be accessed relatively
8497  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8498  * may cause external queries at a higher level zone, involving more
8499  * transactions).
8500  *
8501  * Caller must hold the node (read or write) lock.
8502  */
8503 static inline isc_boolean_t
8504 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8505         if ((header->attributes &
8506              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8507                 return (ISC_FALSE);
8508
8509 #if DNS_RBTDB_LIMITLRUUPDATE
8510         if (header->type == dns_rdatatype_ns ||
8511             (header->trust == dns_trust_glue &&
8512              (header->type == dns_rdatatype_a ||
8513               header->type == dns_rdatatype_aaaa))) {
8514                 /*
8515                  * Glue records are updated if at least 60 seconds have passed
8516                  * since the previous update time.
8517                  */
8518                 return (header->last_used + 60 <= now);
8519         }
8520
8521         /* Other records are updated if 5 minutes have passed. */
8522         return (header->last_used + 300 <= now);
8523 #else
8524         UNUSED(now);
8525
8526         return (ISC_TRUE);
8527 #endif
8528 }
8529
8530 /*%
8531  * Update the timestamp of a given cache entry and move it to the head
8532  * of the corresponding LRU list.
8533  *
8534  * Caller must hold the node (write) lock.
8535  *
8536  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8537  */
8538 static void
8539 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8540               isc_stdtime_t now)
8541 {
8542         INSIST(IS_CACHE(rbtdb));
8543
8544         /* To be checked: can we really assume this? XXXMLG */
8545         INSIST(ISC_LINK_LINKED(header, link));
8546
8547         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8548         header->last_used = now;
8549         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8550 }
8551
8552 /*%
8553  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8554  * under an overmem condition.  To recover from this condition quickly, up to
8555  * 2 entries will be purged.  This process is triggered while adding a new
8556  * entry, and we specifically avoid purging entries in the same LRU bucket as
8557  * the one to which the new entry will belong.  Otherwise, we might purge
8558  * entries of the same name of different RR types while adding RRsets from a
8559  * single response (consider the case where we're adding A and AAAA glue records
8560  * of the same NS name).
8561  */
8562 static void
8563 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8564               isc_stdtime_t now, isc_boolean_t tree_locked)
8565 {
8566         rdatasetheader_t *header, *header_prev;
8567         unsigned int locknum;
8568         int purgecount = 2;
8569
8570         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8571              locknum != locknum_start && purgecount > 0;
8572              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8573                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8574                           isc_rwlocktype_write);
8575
8576                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8577                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8578                         expire_header(rbtdb, header, tree_locked);
8579                         purgecount--;
8580                 }
8581
8582                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8583                      header != NULL && purgecount > 0;
8584                      header = header_prev) {
8585                         header_prev = ISC_LIST_PREV(header, link);
8586                         /*
8587                          * Unlink the entry at this point to avoid checking it
8588                          * again even if it's currently used someone else and
8589                          * cannot be purged at this moment.  This entry won't be
8590                          * referenced any more (so unlinking is safe) since the
8591                          * TTL was reset to 0.
8592                          */
8593                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8594                                         link);
8595                         expire_header(rbtdb, header, tree_locked);
8596                         purgecount--;
8597                 }
8598
8599                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8600                                     isc_rwlocktype_write);
8601         }
8602 }
8603
8604 static void
8605 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8606               isc_boolean_t tree_locked)
8607 {
8608         set_ttl(rbtdb, header, 0);
8609         header->attributes |= RDATASET_ATTR_STALE;
8610         header->node->dirty = 1;
8611
8612         /*
8613          * Caller must hold the node (write) lock.
8614          */
8615
8616         if (dns_rbtnode_refcurrent(header->node) == 0) {
8617                 /*
8618                  * If no one else is using the node, we can clean it up now.
8619                  * We first need to gain a new reference to the node to meet a
8620                  * requirement of decrement_reference().
8621                  */
8622                 new_reference(rbtdb, header->node);
8623                 decrement_reference(rbtdb, header->node, 0,
8624                                     isc_rwlocktype_write,
8625                                     tree_locked ? isc_rwlocktype_write :
8626                                     isc_rwlocktype_none, ISC_FALSE);
8627         }
8628 }